Actual source code: cuspvecimpl.h
 
   petsc-3.6.2 2015-10-02
   
  4: #include <petsccusp.h>
  5: #include <petsc/private/vecimpl.h>
  7: #include <algorithm>
  8: #include <vector>
  9: #include <string>
 11: #include <cublas.h>
 12: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500
 13: #include <cusp/blas/blas.h>
 14: #else
 15: #include <cusp/blas.h>
 16: #endif
 17: #include <thrust/host_vector.h>
 18: #include <thrust/device_vector.h>
 19: #include <thrust/iterator/constant_iterator.h>
 20: #include <thrust/transform.h>
 21: #include <thrust/iterator/permutation_iterator.h>
 23: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
 24: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory>
 25: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
 26: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
 28: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar*, PetscScalar*);
 29: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec);
 30: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec);
 31: PETSC_INTERN PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar*);
 32: PETSC_INTERN PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar);
 33: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar*,Vec*);
 34: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
 35: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec);
 36: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar*);
 37: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP(Vec);
 38: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar*);
 39: PETSC_INTERN PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar*);
 40: PETSC_INTERN PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar*);
 41: PETSC_INTERN PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar);
 42: PETSC_INTERN PetscErrorCode VecCopy_SeqCUSP(Vec,Vec);
 43: PETSC_INTERN PetscErrorCode VecSwap_SeqCUSP(Vec,Vec);
 44: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec);
 45: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec);
 46: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec*);
 47: PETSC_INTERN PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*);
 48: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU(Vec);
 49: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck(Vec);
 50: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheckHost(Vec);
 51: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec);
 52: PETSC_INTERN PetscErrorCode VecView_Seq(Vec,PetscViewer);
 53: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP(Vec);
 54: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec);
 55: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom);
 56: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUSP(Vec,Vec);
 57: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUSP(Vec,Vec);
 59: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU_Public(Vec);
 60: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck_Public(Vec);
 62: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)
 64: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])
 66: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0
 68: struct Vec_CUSP {
 69:   CUSPARRAY *GPUarray;        /* this always holds the GPU data */
 70:   cudaStream_t stream;        /* A stream for doing asynchronous data transfers */
 71:   PetscBool hostDataRegisteredAsPageLocked;
 72: };
 74: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*);
 75: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
 76: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
 77: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
 79: typedef enum {VEC_SCATTER_CUSP_STOS, VEC_SCATTER_CUSP_PTOP} VecCUSPScatterType;
 80: typedef enum {VEC_SCATTER_CUSP_GENERAL, VEC_SCATTER_CUSP_STRIDED} VecCUSPSequentialScatterMode;
 82: struct  _p_VecScatterCUSPIndices_PtoP {
 83:   PetscInt ns;
 84:   PetscInt sendLowestIndex;
 85:   PetscInt nr;
 86:   PetscInt recvLowestIndex;
 87: };
 89: struct  _p_VecScatterCUSPIndices_StoS {
 90:   /* from indices data */
 91:   PetscInt *fslots;
 92:   PetscInt fromFirst;
 93:   PetscInt fromStep;
 94:   VecCUSPSequentialScatterMode fromMode;
 96:   /* to indices data */
 97:   PetscInt *tslots;
 98:   PetscInt toFirst;
 99:   PetscInt toStep;
100:   VecCUSPSequentialScatterMode toMode;
102:   PetscInt n;
103:   PetscInt MAX_BLOCKS;
104:   PetscInt MAX_CORESIDENT_THREADS;
105:   cudaStream_t stream;
106: };
108: struct  _p_PetscCUSPIndices {
109:   void * scatter;
110:   VecCUSPScatterType scatterType;
111: };
113: #endif