Actual source code: vscatfce.c
 
   petsc-3.10.3 2018-12-18
   
  1:  #include <petsc/private/vecscatterimpl.h>
  2: #if defined(PETSC_HAVE_VECCUDA)
  3:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
  4: #endif
  5: /* ------------------------------------------------------------------*/
  6: /*@
  7:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
  8:       and the VecScatterEnd() does nothing
 10:    Not Collective
 12:    Input Parameter:
 13: .   ctx - scatter context created with VecScatterCreate()
 15:    Output Parameter:
 16: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
 18:    Level: developer
 20: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
 21: @*/
 22: PetscErrorCode  VecScatterGetMerged(VecScatter ctx,PetscBool  *flg)
 23: {
 26:   *flg = ctx->beginandendtogether;
 27:   return(0);
 28: }
 30: /*@
 31:    VecScatterBegin - Begins a generalized scatter from one vector to
 32:    another. Complete the scattering phase with VecScatterEnd().
 34:    Neighbor-wise Collective on VecScatter and Vec
 36:    Input Parameters:
 37: +  ctx - scatter context generated by VecScatterCreate()
 38: .  x - the vector from which we scatter
 39: .  y - the vector to which we scatter
 40: .  addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
 41:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
 42: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
 43:     SCATTER_FORWARD or SCATTER_REVERSE
 46:    Level: intermediate
 48:    Options Database: See VecScatterCreate()
 50:    Notes:
 51:    The vectors x and y need not be the same vectors used in the call
 52:    to VecScatterCreate(), but x must have the same parallel data layout
 53:    as that passed in as the x to VecScatterCreate(), similarly for the y.
 54:    Most likely they have been obtained from VecDuplicate().
 56:    You cannot change the values in the input vector between the calls to VecScatterBegin()
 57:    and VecScatterEnd().
 59:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
 60:    the SCATTER_FORWARD.
 62:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1
 64:    This scatter is far more general than the conventional
 65:    scatter, since it can be a gather or a scatter or a combination,
 66:    depending on the indices ix and iy.  If x is a parallel vector and y
 67:    is sequential, VecScatterBegin() can serve to gather values to a
 68:    single processor.  Similarly, if y is parallel and x sequential, the
 69:    routine can scatter from one processor to many processors.
 71:    Concepts: scatter^between vectors
 72:    Concepts: gather^between vectors
 74: .seealso: VecScatterCreate(), VecScatterEnd()
 75: @*/
 76: PetscErrorCode  VecScatterBegin(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 77: {
 79: #if defined(PETSC_USE_DEBUG)
 80:   PetscInt       to_n,from_n;
 81: #endif
 86:   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
 88: #if defined(PETSC_USE_DEBUG)
 89:   /*
 90:      Error checking to make sure these vectors match the vectors used
 91:    to create the vector scatter context. -1 in the from_n and to_n indicate the
 92:    vector lengths are unknown (for example with mapped scatters) and thus
 93:    no error checking is performed.
 94:   */
 95:   if (ctx->from_n >= 0 && ctx->to_n >= 0) {
 96:     VecGetLocalSize(x,&from_n);
 97:     VecGetLocalSize(y,&to_n);
 98:     if (mode & SCATTER_REVERSE) {
 99:       if (to_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,ctx->from_n);
100:       if (from_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,ctx->to_n);
101:     } else {
102:       if (to_n != ctx->to_n)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,ctx->to_n);
103:       if (from_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,ctx->from_n);
104:     }
105:   }
106: #endif
108:   ctx->inuse = PETSC_TRUE;
109:   PetscLogEventBegin(VEC_ScatterBegin,ctx,x,y,0);
110:   (*ctx->ops->begin)(ctx,x,y,addv,mode);
111:   if (ctx->beginandendtogether && ctx->ops->end) {
112:     ctx->inuse = PETSC_FALSE;
113:     (*ctx->ops->end)(ctx,x,y,addv,mode);
114:   }
115:   PetscLogEventEnd(VEC_ScatterBegin,ctx,x,y,0);
116:   return(0);
117: }
119: /* --------------------------------------------------------------------*/
120: /*@
121:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
122:    after first calling VecScatterBegin().
124:    Neighbor-wise Collective on VecScatter and Vec
126:    Input Parameters:
127: +  ctx - scatter context generated by VecScatterCreate()
128: .  x - the vector from which we scatter
129: .  y - the vector to which we scatter
130: .  addv - either ADD_VALUES or INSERT_VALUES.
131: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
132:      SCATTER_FORWARD, SCATTER_REVERSE
134:    Level: intermediate
136:    Notes:
137:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
139:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1
141: .seealso: VecScatterBegin(), VecScatterCreate()
142: @*/
143: PetscErrorCode  VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
144: {
151:   ctx->inuse = PETSC_FALSE;
152:   if (!ctx->ops->end) return(0);
153:   if (!ctx->beginandendtogether) {
154:     PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
155:     (*(ctx)->ops->end)(ctx,x,y,addv,mode);
156:     PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
157:   }
158:   return(0);
159: }
161: /*@
162:    VecScatterDestroy - Destroys a scatter context created by
163:    VecScatterCreate().
165:    Collective on VecScatter
167:    Input Parameter:
168: .  ctx - the scatter context
170:    Level: intermediate
172: .seealso: VecScatterCreate(), VecScatterCopy()
173: @*/
174: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
175: {
179:   if (!*ctx) return(0);
181:   if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
182:   if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}
184:   /* if memory was published with SAWs then destroy it */
185:   PetscObjectSAWsViewOff((PetscObject)(*ctx));
186:   if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
187: #if defined(PETSC_HAVE_VECCUDA)
188:   VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
189: #endif
190:   PetscHeaderDestroy(ctx);
191:   return(0);
192: }
194: /*@
195:    VecScatterCopy - Makes a copy of a scatter context.
197:    Collective on VecScatter
199:    Input Parameter:
200: .  sctx - the scatter context
202:    Output Parameter:
203: .  ctx - the context copy
205:    Level: advanced
207: .seealso: VecScatterCreate(), VecScatterDestroy()
208: @*/
209: PetscErrorCode  VecScatterCopy(VecScatter sctx,VecScatter *ctx)
210: {
212:   VecScatterType type;
217:   if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
218:   PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
219:   (*ctx)->to_n   = sctx->to_n;
220:   (*ctx)->from_n = sctx->from_n;
221:   (*sctx->ops->copy)(sctx,*ctx);
223:   VecScatterGetType(sctx,&type);
224:   PetscObjectChangeTypeName((PetscObject)(*ctx),type);
225:   return(0);
226: }
228: /* ------------------------------------------------------------------*/
229: /*@C
230:    VecScatterView - Views a vector scatter context.
232:    Collective on VecScatter
234:    Input Parameters:
235: +  ctx - the scatter context
236: -  viewer - the viewer for displaying the context
238:    Level: intermediate
240: @*/
241: PetscErrorCode  VecScatterView(VecScatter ctx,PetscViewer viewer)
242: {
247:   if (!viewer) {
248:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
249:   }
251:   if (ctx->ops->view) {
252:     (*ctx->ops->view)(ctx,viewer);
253:   }
254:   return(0);
255: }
257: /*@C
258:    VecScatterRemap - Remaps the "from" and "to" indices in a
259:    vector scatter context. FOR EXPERTS ONLY!
261:    Collective on VecScatter
263:    Input Parameters:
264: +  scat    - vector scatter context
265: .  tomap   - remapping plan for "to" indices (may be NULL).
266: -  frommap - remapping plan for "from" indices (may be NULL)
268:    Level: developer
270:    Notes:
271:      In the parallel case the todata contains indices from where the data is taken
272:      (and then sent to others)! The fromdata contains indices from where the received
273:      data is finally put locally.
275:      In the sequential case the todata contains indices from where the data is put
276:      and the fromdata contains indices from where the data is taken from.
277:      This is backwards from the paralllel case!
279: @*/
280: PetscErrorCode  VecScatterRemap(VecScatter scat,PetscInt tomap[],PetscInt frommap[])
281: {
282:   VecScatter_MPI_General *to,*from;
283:   VecScatter_Seq_General *sgto,*sgfrom;
284:   VecScatter_Seq_Stride  *ssto;
285:   PetscInt               i,ierr;
292:   to     = (VecScatter_MPI_General*)scat->todata;
293:   from   = (VecScatter_MPI_General*)scat->fromdata;
294:   ssto   = (VecScatter_Seq_Stride*)scat->todata;
295:   sgto   = (VecScatter_Seq_General*)scat->todata;
296:   sgfrom = (VecScatter_Seq_General*)scat->fromdata;
298:   /* remap indices from where we take/read data */
299:   if (tomap) {
300:     if (to->format == VEC_SCATTER_MPI_TOALL) {
301:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
302:     } else if (to->format == VEC_SCATTER_MPI_GENERAL) {
303:       /* handle off processor parts */
304:       for (i=0; i<to->starts[to->n]; i++) to->indices[i] = tomap[to->indices[i]];
306:       /* handle local part */
307:       for (i=0; i<to->local.n; i++) to->local.vslots[i] = tomap[to->local.vslots[i]];
309:       /* the memcpy optimizations in vecscatter was based on index patterns it has.
310:          They need to be recalculated when indices are changed (remapped).
311:        */
312:       VecScatterMemcpyPlanDestroy_PtoP(to,from);
313:       VecScatterMemcpyPlanCreate_PtoP(to,from);
314:     } else if (sgfrom->format == VEC_SCATTER_SEQ_GENERAL) {
315:       /* remap indices*/
316:       for (i=0; i<sgfrom->n; i++) sgfrom->vslots[i] = tomap[sgfrom->vslots[i]];
317:       /* update optimizations, which happen when it is a Stride1toSG, SGtoStride1 or SGToSG vecscatter */
318:       if (ssto->format == VEC_SCATTER_SEQ_STRIDE && ssto->step == 1) {
319:         PetscInt tmp[2];
320:         tmp[0] = 0; tmp[1] = sgfrom->n;
321:         VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
322:         VecScatterMemcpyPlanCreate_Index(1,tmp,sgfrom->vslots,1/*bs*/,&sgfrom->memcpy_plan);
323:       } else if (sgto->format == VEC_SCATTER_SEQ_GENERAL) {
324:         VecScatterMemcpyPlanDestroy(&sgto->memcpy_plan);;
325:         VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
326:         VecScatterMemcpyPlanCreate_SGToSG(1/*bs*/,sgto,sgfrom);
327:       }
328:     } else if (sgfrom->format == VEC_SCATTER_SEQ_STRIDE) {
329:       VecScatter_Seq_Stride *ssto = (VecScatter_Seq_Stride*)sgfrom;
331:       /* if the remapping is the identity and stride is identity then skip remap */
332:       if (ssto->step == 1 && ssto->first == 0) {
333:         for (i=0; i<ssto->n; i++) {
334:           if (tomap[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
335:         }
336:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
337:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
338:   }
340:   if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
342:   /*
343:     Mark then vector lengths as unknown because we do not know the
344:     lengths of the remapped vectors
345:   */
346:   scat->from_n = -1;
347:   scat->to_n   = -1;
348:   return(0);
349: }
351: /*
352:  VecScatterGetTypes_Private - Returns the scatter types.
354:  scatter - The scatter.
355:  from    - Upon exit this contains the type of the from scatter.
356:  to      - Upon exit this contains the type of the to scatter.
357: */
358: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterFormat *from,VecScatterFormat *to)
359: {
360:   VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
361:   VecScatter_Common* todata   = (VecScatter_Common*)scatter->todata;
364:   *from = fromdata->format;
365:   *to = todata->format;
366:   return(0);
367: }
370: /*
371:   VecScatterIsSequential_Private - Returns true if the scatter is sequential.
373:   scatter - The scatter.
374:   flag    - Upon exit flag is true if the scatter is of type VecScatter_Seq_General
375:             or VecScatter_Seq_Stride; otherwise flag is false.
376: */
377: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
378: {
379:   VecScatterFormat scatterType = scatter->format;
382:   if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
383:     *flag = PETSC_TRUE;
384:   } else {
385:     *flag = PETSC_FALSE;
386:   }
387:   return(0);
388: }
390: #if defined(PETSC_HAVE_VECCUDA)
392: /*@C
393:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
394:    to another for GPU based computation.
396:    Input Parameters:
397: +  inctx - scatter context generated by VecScatterCreate()
398: .  x - the vector from which we scatter
399: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
400:     SCATTER_FORWARD or SCATTER_REVERSE
402:   Level: intermediate
404:   Notes:
405:    Effectively, this function creates all the necessary indexing buffers and work
406:    vectors needed to move data only those data points in a vector which need to
407:    be communicated across ranks. This is done at the first time this function is
408:    called. Currently, this only used in the context of the parallel SpMV call in
409:    MatMult_MPIAIJCUSPARSE.
411:    This function is executed before the call to MatMult. This enables the memory
412:    transfers to be overlapped with the MatMult SpMV kernel call.
414: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
415: @*/
416: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
417: {
419:   VecScatter_MPI_General *to,*from;
420:   PetscErrorCode         ierr;
421:   PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
422:   PetscBool              isSeq1,isSeq2;
425:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
426:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
427:   if (isSeq1 || isSeq2) {
428:     return(0);
429:   }
430:   if (mode & SCATTER_REVERSE) {
431:     to     = (VecScatter_MPI_General*)inctx->fromdata;
432:     from   = (VecScatter_MPI_General*)inctx->todata;
433:   } else {
434:     to     = (VecScatter_MPI_General*)inctx->todata;
435:     from   = (VecScatter_MPI_General*)inctx->fromdata;
436:   }
437:   bs           = to->bs;
438:   nrecvs       = from->n;
439:   nsends       = to->n;
440:   indices      = to->indices;
441:   sstartsSends = to->starts;
442:   sstartsRecvs = from->starts;
443:   if (x->valid_GPU_array != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
444:     if (!inctx->spptr) {
445:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
446:       PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
447:       /* Here we create indices for both the senders and receivers. */
448:       PetscMalloc1(ns,&tindicesSends);
449:       PetscMalloc1(nr,&tindicesRecvs);
451:       PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
452:       PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));
454:       PetscSortRemoveDupsInt(&ns,tindicesSends);
455:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);
457:       PetscMalloc1(bs*ns,&sindicesSends);
458:       PetscMalloc1(from->bs*nr,&sindicesRecvs);
460:       /* sender indices */
461:       for (i=0; i<ns; i++) {
462:         for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
463:       }
464:       PetscFree(tindicesSends);
466:       /* receiver indices */
467:       for (i=0; i<nr; i++) {
468:         for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
469:       }
470:       PetscFree(tindicesRecvs);
472:       /* create GPU indices, work vectors, ... */
473:       VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
474:       PetscFree(sindicesSends);
475:       PetscFree(sindicesRecvs);
476:     }
477:   }
478:   return(0);
479: }
481: /*@C
482:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
483:    another for GPU based computation.
485:    Input Parameter:
486: +  inctx - scatter context generated by VecScatterCreate()
488:   Level: intermediate
490:   Notes:
491:    Effectively, this function resets the temporary buffer flags. Currently, this
492:    only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
493:    or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
494:    buffers used for messaging are no longer valid.
496: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
497: @*/
498: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
499: {
501:   return(0);
502: }
504: #endif