00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "controller.h"
00019 #include "initData.h"
00020
00021 #include <GL/glew.h>
00022
00023 #include <cuda.h>
00024 #include <cuda_runtime_api.h>
00025 #include <cuda_gl_interop.h>
00026
00027 #include <eq/base/log.h>
00028
00029 using namespace std;
00030
00031 extern "C"
00032 {
00033 void cudaInit(int argc, char **argv);
00034 void setDeviceSoftening(float softening);
00035 void allocateNBodyArrays(float* vel[2], int numBytes);
00036 void deleteNBodyArrays(float* vel[2]);
00037 void integrateNbodySystem(float* newPos, float* newVel,
00038 float* oldPos, float* oldVel,
00039 unsigned int pboOldPos, unsigned int pboNewPos,
00040 float deltaTime, float damping,
00041 unsigned int numBodies, int offset, int length, int p, int q,
00042 int bUsePBO);
00043 void copyArrayFromDevice(float* host, const float* device, unsigned int pbo, int numBytes);
00044 void copyArrayToDevice(float* device, const float* host, int numBytes);
00045 void registerGLBufferObject(unsigned int pbo);
00046 void unregisterGLBufferObject(unsigned int pbo);
00047 void threadSync();
00048 }
00049
00050 namespace eqNbody
00051 {
00052
00053 void checkCUDAError(const char *msg)
00054 {
00055 cudaError_t err = cudaGetLastError();
00056 if( cudaSuccess != err)
00057 {
00058 EQERROR << "CUDA error: " << msg << ": " << cudaGetErrorString( err) << std::endl;
00059 exit(EXIT_FAILURE);
00060 }
00061 }
00062
00063 Controller::Controller() : _numBodies(0), _p(0), _q(0), _usePBO(true)
00064 {
00065 _dPos[0] = _dPos[1] = 0;
00066 _dVel[0] = _dVel[1] = 0;
00067
00068 _currentRead = 0;
00069 _currentWrite = 1;
00070 }
00071
00072 bool Controller::init( int devID, const InitData& initData, float* hPos, bool usePBO, bool useGL )
00073 {
00074 _numBodies = initData.getNumBodies();
00075 _p = initData.getP();
00076 _q = initData.getQ();
00077 _damping = initData.getDamping();
00078
00079 _usePBO = usePBO;
00080 _pointSize = 1.0f;
00081
00082
00083 if(_q * _p > 256)
00084 {
00085 _p = 256 / _q;
00086 }
00087
00088 if (_q == 1 && _numBodies < _p)
00089 {
00090 _p = _numBodies;
00091 }
00092
00093
00094 if( devID == -1 ) {
00095 _devID = _getMaxGflopsDeviceId();
00096 EQWARN << "No CUDA device, using the fastest device: " << _devID << std::endl;
00097 }
00098 else {
00099 _devID = devID;
00100 }
00101
00102 if(useGL) {
00103 cudaGLSetGLDevice( _devID );
00104 }
00105 else {
00106 cudaSetDevice( _devID );
00107 }
00108
00109 cudaGetDevice(&_devID);
00110 cudaGetDeviceProperties(&_props, _devID);
00111
00112 EQINFO << "Using CUDA device: " << _devID << std::endl;
00113
00114 if (usePBO)
00115 {
00116
00117
00118 glGenBuffers(2, (GLuint*)_pbo);
00119 for (int i = 0; i < 2; ++i)
00120 {
00121 glBindBuffer(GL_ARRAY_BUFFER, _pbo[i]);
00122 glBufferData(GL_ARRAY_BUFFER, 4 * sizeof(float) * _numBodies, hPos, GL_DYNAMIC_DRAW);
00123
00124 int size = 0;
00125 glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint*)&size);
00126
00127 if ((unsigned)size != 4 * (sizeof(float) * _numBodies)) {
00128 EQERROR << "WARNING: Pixel Buffer Object allocation failed" << endl;
00129 }
00130
00131 glBindBuffer(GL_ARRAY_BUFFER, 0);
00132 registerGLBufferObject(_pbo[i]);
00133 }
00134 }
00135 else
00136 {
00137 allocateNBodyArrays(_dPos, _numBodies * sizeof( float) * 4);
00138 }
00139
00140 allocateNBodyArrays(_dVel, _numBodies * sizeof( float) * 4);
00141
00142 checkCUDAError("Controller::init");
00143
00144 setSoftening(0.00125f);
00145 _renderer.init();
00146
00147 return true;
00148 }
00149
00150 bool Controller::exit()
00151 {
00152 deleteNBodyArrays(_dVel);
00153
00154 if (_usePBO)
00155 {
00156 unregisterGLBufferObject(_pbo[0]);
00157 unregisterGLBufferObject(_pbo[1]);
00158 glDeleteBuffers(2, (const GLuint*)_pbo);
00159 }
00160 else
00161 {
00162 deleteNBodyArrays(_dPos);
00163 }
00164
00165 return true;
00166 }
00167
00168 void Controller::compute(const unsigned int frameID, const FrameData& fd, const eq::Range& range)
00169 {
00170 int offset = range.start * _numBodies;
00171 int length = ((range.end - range.start) * _numBodies) / _p;
00172
00173 integrateNbodySystem(_dPos[_currentWrite], _dVel[_currentWrite],
00174 _dPos[_currentRead], _dVel[_currentRead],
00175 _pbo[_currentWrite], _pbo[_currentRead],
00176 fd.getTimeStep(), _damping, _numBodies, offset, length, _p, _q,
00177 (_usePBO ? 1 : 0));
00178
00179 checkCUDAError("Controller::run");
00180 }
00181
00182 void Controller::draw(const FrameData& fd)
00183 {
00184 glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
00185
00186 if(_usePBO) {
00187 _renderer.setPBO(_pbo[_currentRead], _numBodies);
00188 }
00189 else {
00190 _renderer.setPositions(fd.getPos(), _numBodies);
00191 _renderer.setPBO(0, _numBodies);
00192 }
00193
00194 _renderer.setColors(fd.getCol(), _numBodies);
00195 _renderer.setSpriteSize(_pointSize);
00196 _renderer.draw(PARTICLE_SPRITES_COLOR);
00197
00198 std::swap(_currentRead, _currentWrite);
00199 }
00200
00201 void Controller::setSoftening(float softening)
00202 {
00203 setDeviceSoftening(softening);
00204 }
00205
00206 void Controller::getArray(BodyArray array, DataProxy& proxy)
00207 {
00208 float* ddata = 0;
00209 float* hdata = 0;
00210 unsigned int pbo = 0;
00211
00212 unsigned int offset = proxy.getOffset();
00213
00214 switch (array)
00215 {
00216 default:
00217 case BODYSYSTEM_POSITION:
00218 hdata = proxy.getPosition();
00219 ddata = _dPos[_currentRead];
00220 if (_usePBO) {
00221 pbo = _pbo[_currentRead];
00222 }
00223 break;
00224
00225 case BODYSYSTEM_VELOCITY:
00226 hdata = proxy.getVelocity();
00227 ddata = _dVel[_currentRead];
00228 break;
00229 }
00230
00231 copyArrayFromDevice(hdata+offset, ddata+offset, pbo, proxy.getNumBytes());
00232 proxy.markDirty();
00233 }
00234
00235 void Controller::setArray(BodyArray array, const FrameData& fd)
00236 {
00237 unsigned int numBytes = fd.getNumBytes();
00238
00239 switch (array)
00240 {
00241 default:
00242 case BODYSYSTEM_POSITION:
00243 {
00244 if (_usePBO)
00245 {
00246 unregisterGLBufferObject(_pbo[_currentRead]);
00247 glBindBuffer(GL_ARRAY_BUFFER, _pbo[_currentRead]);
00248 glBufferData(GL_ARRAY_BUFFER, numBytes, fd.getPos(), GL_DYNAMIC_DRAW);
00249
00250 int size = 0;
00251 glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint*)&size);
00252 if ((unsigned)size != numBytes) {
00253 EQERROR << "WARNING: Pixel Buffer Object download failed. Size " << size << " does not match numBytes " << numBytes << std::endl;
00254 }
00255 glBindBuffer(GL_ARRAY_BUFFER, 0);
00256 registerGLBufferObject(_pbo[_currentRead]);
00257 }
00258 else
00259 {
00260 copyArrayToDevice(_dPos[_currentRead], fd.getPos(), numBytes);
00261 }
00262 }
00263 break;
00264
00265 case BODYSYSTEM_VELOCITY:
00266 copyArrayToDevice(_dVel[_currentRead], fd.getVel(), numBytes);
00267 break;
00268 }
00269 }
00270
00271 void Controller::synchronizeGPUThreads() const
00272 {
00273 threadSync();
00274 }
00275
00276 int Controller::_getMaxGflopsDeviceId()
00277 {
00278 #if __DEVICE_EMULATION__
00279 return 0;
00280 #else
00281 int device_count = 0;
00282 cudaGetDeviceCount( &device_count );
00283
00284 cudaDeviceProp device_properties;
00285 int max_gflops_device = 0;
00286 int max_gflops = 0;
00287
00288 int current_device = 0;
00289 cudaGetDeviceProperties( &device_properties, current_device );
00290 max_gflops = device_properties.multiProcessorCount * device_properties.clockRate;
00291 ++current_device;
00292
00293 while( current_device < device_count )
00294 {
00295 cudaGetDeviceProperties( &device_properties, current_device );
00296 int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
00297 if( gflops > max_gflops )
00298 {
00299 max_gflops = gflops;
00300 max_gflops_device = current_device;
00301 }
00302 ++current_device;
00303 }
00304 return max_gflops_device;
00305 #endif
00306 }
00307
00308 }