GPUOcelot

NVIDIAGPUDevice.h

Go to the documentation of this file.
00001 
00007 #ifndef NVIDIA_GPU_DEVICE_H_INCLUDED
00008 #define NVIDIA_GPU_DEVICE_H_INCLUDED
00009 
00010 // ocelot includes
00011 #include <ocelot/executive/interface/Device.h>
00012 #include <ocelot/cuda/interface/cuda_internal.h>
00013 
00014 namespace executive
00015 {
00016   class NVIDIAExecutableKernel;
00017 }
00018 
00019 namespace executive
00020 {
00022   class NVIDIAGPUDevice : public Device
00023   {
00024     public:
00026       class MemoryAllocation : public Device::MemoryAllocation
00027       {
00028         private:
00030           unsigned int _flags;
00032           size_t _size;
00034           CUdeviceptr _devicePointer;
00036           void* _hostPointer;
00038           bool _external;
00039         
00040         public:
00042           MemoryAllocation();
00044           MemoryAllocation(size_t size);
00046           MemoryAllocation(size_t size, unsigned int flags);
00048           MemoryAllocation(CUmodule module, const ir::Global& global);
00050           MemoryAllocation(void* pointer, size_t size);
00052           MemoryAllocation(void* pointer, size_t size,
00053             unsigned int flags);
00055           ~MemoryAllocation();
00056 
00057         public:
00059           MemoryAllocation(const MemoryAllocation& a);
00061           MemoryAllocation(MemoryAllocation&& a);
00062           
00064           MemoryAllocation& operator=(const MemoryAllocation& a);
00066           MemoryAllocation& operator=(MemoryAllocation&& a);
00067       
00068         public:
00070           unsigned int flags() const;
00072           void* mappedPointer() const;
00074           void* pointer() const;
00076           size_t size() const;
00078           void copy(size_t offset, const void* host, size_t size );
00080           void copy(void* host, size_t offset, size_t size) const;
00082           void memset(size_t offset, int value, size_t size);
00084           void copy(Device::MemoryAllocation* allocation, 
00085             size_t toOffset, size_t fromOffset, size_t size) const;
00086       };
00087 
00088     private:
00090       class Module
00091       {
00092         private:
00094           CUmodule _handle;
00095 
00096         public:
00098           typedef std::unordered_map<std::string, void*> GlobalMap;
00100           typedef std::unordered_map<std::string, 
00101             NVIDIAExecutableKernel*> KernelMap;
00103           typedef std::vector<MemoryAllocation*> AllocationVector;
00104       
00105         public:
00107           const ir::Module* ir;
00109           GlobalMap globals;
00111           KernelMap kernels;
00112           
00113         public:
00115           Module(const ir::Module* m = 0);
00117           Module(const Module& m);
00119           ~Module();
00120           
00121         public:
00123           void load();
00125           bool loaded() const;
00127           void translate();
00129           bool translated() const;
00131           AllocationVector loadGlobals();
00133           NVIDIAExecutableKernel* getKernel(const std::string& name);
00135           void* getTexture(const std::string& name);
00136       };
00137 
00139       typedef std::unordered_map<std::string, Module> ModuleMap;
00140 
00142       typedef std::map<void*, MemoryAllocation*> AllocationMap;
00143       
00145       typedef std::unordered_map<unsigned int, CUstream> StreamMap;
00146       
00148       typedef std::unordered_map<unsigned int, CUevent> EventMap;
00149       
00151       typedef std::unordered_map<unsigned int, 
00152         CUgraphicsResource> GraphicsMap;
00153       
00155       class Array3D
00156       {
00157         public:
00159           CUarray array;
00161           CUdeviceptr ptr;
00163           ir::Dim3 size;
00165           unsigned int bytesPerElement;
00166           
00167         public:
00169           Array3D(const cudaChannelFormatDesc& desc, 
00170             const ir::Dim3& size, CUdeviceptr d);
00172           Array3D();
00174           ~Array3D();
00175       
00176         public:
00178           void update();
00179       };
00180       
00182       typedef std::unordered_map<std::string, Array3D*> ArrayMap;
00183       
00184     private:
00186       AllocationMap _allocations;
00187       
00189       AllocationMap _hostAllocations;
00190       
00192       ModuleMap _modules;
00193       
00195       StreamMap _streams;
00196       
00198       EventMap _events;
00199       
00201       GraphicsMap _graphics;
00202     
00204       CUcontext _context;
00205       
00207       bool _selected;
00208       
00210       unsigned int _next;
00211     
00213       unsigned int _selectedStream;
00214     
00216       bool _opengl;
00217       
00219       ArrayMap _arrays;
00220         
00221     private:
00223       static bool _cudaDriverInitialized;
00225       static CUresult _lastError;
00226       
00227     public:
00229       static DeviceVector createDevices(unsigned int flags,
00230         int computeCapability);
00232       static unsigned int deviceCount(int computeCapability);
00233     
00234     public:
00236       NVIDIAGPUDevice(int id = 0, unsigned int flags = 0);
00238       ~NVIDIAGPUDevice();
00239       
00240     public:
00241       Device::MemoryAllocation* getMemoryAllocation(const void* address, 
00242         AllocationType type) const;
00244       Device::MemoryAllocation* getGlobalAllocation(
00245         const std::string& module, const std::string& name);
00247       Device::MemoryAllocation* allocate(size_t size);
00249       Device::MemoryAllocation* allocateHost(size_t size, 
00250         unsigned int flags);
00252       Device::MemoryAllocation* registerHost(void* pointer, size_t size, 
00253         unsigned int flags);
00255       void free(void* pointer);
00257       MemoryAllocationVector getNearbyAllocations(void* pointer) const;
00259       Device::MemoryAllocationVector getAllAllocations() const;
00261       void clearMemory();
00262     
00263     public:
00265       void* glRegisterBuffer(unsigned int buffer, 
00266         unsigned int flags);
00268       void* glRegisterImage(unsigned int image, 
00269         unsigned int target, unsigned int flags);
00271       void unRegisterGraphicsResource(void* resource);
00273       void mapGraphicsResource(void** resource, int count, 
00274         unsigned int stream);
00276       void* getPointerToMappedGraphicsResource(size_t& size, 
00277         void* resource);
00279       void setGraphicsResourceFlags(void* resource, 
00280         unsigned int flags);
00282       void unmapGraphicsResource(void** resource, int count, unsigned int streamID);
00283 
00284     public:
00286       void load(const ir::Module* module);
00288       void unload(const std::string& name);
00290       ExecutableKernel* getKernel(const std::string& module, 
00291         const std::string& kernel);
00292 
00293     public:
00295       unsigned int createEvent(int flags);
00297       void destroyEvent(unsigned int event);
00299       bool queryEvent(unsigned int event);
00301       void recordEvent(unsigned int event, unsigned int stream);
00303       void synchronizeEvent(unsigned int event);
00305       float getEventTime(unsigned int start, unsigned int end);
00306     
00307     public:
00309       unsigned int createStream();
00311       void destroyStream(unsigned int stream);
00313       bool queryStream(unsigned int stream);
00315       void synchronizeStream(unsigned int stream);
00317       void setStream(unsigned int stream);
00318       
00319     public:
00322       void select();
00324       void unselect();
00325     
00326     public:
00328       void bindTexture(void* pointer, 
00329         const std::string& moduleName, const std::string& textureName, 
00330         const textureReference& ref, const cudaChannelFormatDesc& desc, 
00331         const ir::Dim3& size);
00333       void unbindTexture(const std::string& moduleName, 
00334         const std::string& textureName);
00336       void* getTextureReference(const std::string& moduleName,
00337         const std::string& textureName);
00338 
00339     public:
00351       void launch(const std::string& module, 
00352         const std::string& kernel, const ir::Dim3& grid, 
00353         const ir::Dim3& block, size_t sharedMemory, 
00354         const void* argumentBlock, size_t argumentBlockSize, 
00355         const trace::TraceGeneratorVector& 
00356         traceGenerators = trace::TraceGeneratorVector(),
00357         const ir::ExternalFunctionSet* externals = 0);
00359       cudaFuncAttributes getAttributes(const std::string& module, 
00360         const std::string& kernel);
00362       unsigned int getLastError();
00364       void synchronize();
00365       
00366     public:
00368       void limitWorkerThreads(unsigned int threads);      
00370       void setOptimizationLevel(translator::Translator::OptimizationLevel 
00371         level);
00372 
00373   };
00374 }
00375 
00376 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines