GPUOcelot
|
#include <NVIDIAExecutableKernel.h>
Public Member Functions | |
NVIDIAExecutableKernel (ir::IRKernel &kernel, const CUfunction &function, executive::Device *d=0) | |
NVIDIAExecutableKernel () | |
~NVIDIAExecutableKernel () | |
void | launchGrid (int width, int height, int depth) |
void | setKernelShape (int x, int y, int z) |
void | setDevice (const Device *device, unsigned int limit) |
void | setExternSharedMemorySize (unsigned int bytes) |
void | updateArgumentMemory () |
Indicate that the kernels arguments have been updated. | |
void | updateMemory () |
Indicate that other memory has been updated. | |
TextureVector | textureReferences () const |
Get a vector of all textures references by the kernel. | |
void | updateGlobalMemory () |
void | updateConstantMemory () |
void | addTraceGenerator (trace::TraceGenerator *generator) |
void | removeTraceGenerator (trace::TraceGenerator *generator) |
void | setExternalFunctionSet (const ir::ExternalFunctionSet &s) |
void | clearExternalFunctionSet () |
void | setWorkerThreads (unsigned int limit) |
Sets the max number of pthreads this kernel can use. | |
Protected Member Functions | |
void | configureArguments () |
bool | initialize () |
Protected Attributes | |
CUfunction | cuFunction |
executive::NVIDIAExecutableKernel::NVIDIAExecutableKernel | ( | ir::IRKernel & | kernel, |
const CUfunction & | function, | ||
executive::Device * | d = 0 |
||
) |
Construct a NVIDIAExecutableKernel from an existing kernel
executive::NVIDIAExecutableKernel::NVIDIAExecutableKernel | ( | ) |
executive::NVIDIAExecutableKernel::~NVIDIAExecutableKernel | ( | ) |
void executive::NVIDIAExecutableKernel::addTraceGenerator | ( | trace::TraceGenerator * | generator | ) | [virtual] |
adds a trace generator to the EmulatedKernel
Reimplemented from executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::clearExternalFunctionSet | ( | ) | [virtual] |
clear the external function table for the emulated kernel
Implements executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::configureArguments | ( | ) | [protected] |
Configures the argument block for the CUDA driver API
bool executive::NVIDIAExecutableKernel::initialize | ( | ) | [protected] |
Construct
void executive::NVIDIAExecutableKernel::launchGrid | ( | int | width, |
int | height, | ||
int | depth | ||
) | [virtual] |
Launch a kernel on a 2D grid
Implements executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::removeTraceGenerator | ( | trace::TraceGenerator * | generator | ) | [virtual] |
removes a trace generator from an EmulatedKernel
Reimplemented from executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::setDevice | ( | const Device * | device, |
unsigned int | limit | ||
) |
Sets the device used to execute the kernel
void executive::NVIDIAExecutableKernel::setExternalFunctionSet | ( | const ir::ExternalFunctionSet & | s | ) | [virtual] |
sets an external function table for the emulated kernel
Implements executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::setExternSharedMemorySize | ( | unsigned int | bytes | ) | [virtual] |
sets the size of shared memory in bytes
Implements executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::setKernelShape | ( | int | x, |
int | y, | ||
int | z | ||
) | [virtual] |
Sets the shape of a kernel
Implements executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::setWorkerThreads | ( | unsigned int | workerThreadLimit | ) | [virtual] |
Sets the max number of pthreads this kernel can use.
Implements executive::ExecutableKernel.
executive::ExecutableKernel::TextureVector executive::NVIDIAExecutableKernel::textureReferences | ( | ) | const [virtual] |
Get a vector of all textures references by the kernel.
Implements executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::updateArgumentMemory | ( | ) | [virtual] |
Indicate that the kernels arguments have been updated.
Implements executive::ExecutableKernel.
void executive::NVIDIAExecutableKernel::updateConstantMemory | ( | ) |
void executive::NVIDIAExecutableKernel::updateGlobalMemory | ( | ) |
void executive::NVIDIAExecutableKernel::updateMemory | ( | ) | [virtual] |
Indicate that other memory has been updated.
Implements executive::ExecutableKernel.
CUDA function refering to this kernel