controller.cpp

00001 /*
00002  * Copyright (c) 2009, Philippe Robert <probert@eyescale.ch> 
00003  *
00004  * This library is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU Lesser General Public License version 2.1 as published
00006  * by the Free Software Foundation.
00007  *  
00008  * This library is distributed in the hope that it will be useful, but WITHOUT
00009  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00010  * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
00011  * details.
00012  * 
00013  * You should have received a copy of the GNU Lesser General Public License
00014  * along with this library; if not, write to the Free Software Foundation, Inc.,
00015  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00016  */
00017 
00018 #include "controller.h"
00019 #include "initData.h"
00020 
00021 #include <GL/glew.h>
00022 
00023 #include <cuda.h>
00024 #include <cuda_runtime_api.h>
00025 #include <cuda_gl_interop.h>
00026 
00027 #include <eq/base/log.h>
00028 
00029 using namespace std;
00030 
00031 extern "C"
00032 {
00033     void cudaInit(int argc, char **argv);
00034     void setDeviceSoftening(float softening);
00035     void allocateNBodyArrays(float* vel[2], int numBytes);
00036     void deleteNBodyArrays(float* vel[2]);
00037     void integrateNbodySystem(float* newPos, float* newVel, 
00038                               float* oldPos, float* oldVel,
00039                               unsigned int pboOldPos, unsigned int pboNewPos,
00040                               float deltaTime, float damping, 
00041                               unsigned int numBodies, int offset, int length, int p, int q,
00042                               int bUsePBO);
00043     void copyArrayFromDevice(float* host, const float* device, unsigned int pbo, int numBytes);
00044     void copyArrayToDevice(float* device, const float* host, int numBytes);
00045     void registerGLBufferObject(unsigned int pbo);
00046     void unregisterGLBufferObject(unsigned int pbo);
00047     void threadSync();
00048 }
00049 
00050 namespace eqNbody
00051 {
00052     
00053     void checkCUDAError(const char *msg)
00054     {
00055         cudaError_t err = cudaGetLastError();
00056         if( cudaSuccess != err) 
00057         {
00058             EQERROR << "CUDA error: " << msg << ": " << cudaGetErrorString( err) << std::endl;
00059             exit(EXIT_FAILURE);
00060         }                         
00061     }
00062     
00063     Controller::Controller() : _numBodies(0), _p(0), _q(0), _usePBO(true)
00064     {
00065         _dPos[0] = _dPos[1] = 0;
00066         _dVel[0] = _dVel[1] = 0;    
00067         
00068         _currentRead = 0;
00069         _currentWrite = 1;
00070     }
00071         
00072     bool Controller::init( int devID, const InitData& initData, float* hPos, bool usePBO, bool useGL )
00073     {       
00074         _numBodies  = initData.getNumBodies();
00075         _p          = initData.getP();
00076         _q          = initData.getQ();
00077         _damping    = initData.getDamping();
00078         
00079         _usePBO     = usePBO;
00080         _pointSize  = 1.0f;
00081 
00082         // Setup p and q properly
00083         if(_q * _p > 256)
00084         {
00085             _p = 256 / _q;
00086         }
00087         
00088         if (_q == 1 && _numBodies < _p)
00089         {
00090             _p = _numBodies;
00091         }
00092                 
00093         // Setup the CUDA device
00094         if( devID == -1 ) {
00095             _devID = _getMaxGflopsDeviceId();
00096             EQWARN << "No CUDA device, using the fastest device: " << _devID << std::endl;
00097         }
00098         else {
00099             _devID = devID;
00100         }
00101         
00102         if(useGL) {
00103             cudaGLSetGLDevice( _devID );
00104         }
00105         else {
00106             cudaSetDevice( _devID );
00107         }
00108         
00109         cudaGetDevice(&_devID);
00110         cudaGetDeviceProperties(&_props, _devID);
00111         
00112         EQINFO << "Using CUDA device: " << _devID << std::endl;
00113 
00114         if (usePBO)
00115         {
00116             // create the position pixel buffer objects for rendering
00117             // we will actually compute directly from this memory in CUDA too
00118             glGenBuffers(2, (GLuint*)_pbo);   
00119             for (int i = 0; i < 2; ++i)
00120             {
00121                 glBindBuffer(GL_ARRAY_BUFFER, _pbo[i]);
00122                 glBufferData(GL_ARRAY_BUFFER, 4 * sizeof(float) * _numBodies, hPos, GL_DYNAMIC_DRAW);
00123                 
00124                 int size = 0;
00125                 glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint*)&size); 
00126                 
00127                 if ((unsigned)size != 4 * (sizeof(float) * _numBodies)) {
00128                     EQERROR << "WARNING: Pixel Buffer Object allocation failed" << endl;
00129                 }
00130             
00131                 glBindBuffer(GL_ARRAY_BUFFER, 0);
00132                 registerGLBufferObject(_pbo[i]);
00133             }
00134         }
00135         else
00136         {
00137             allocateNBodyArrays(_dPos, _numBodies * sizeof( float) * 4);
00138         }
00139         
00140         allocateNBodyArrays(_dVel, _numBodies * sizeof( float) * 4);
00141 
00142         checkCUDAError("Controller::init");
00143         
00144         setSoftening(0.00125f);
00145         _renderer.init();
00146                 
00147         return true;
00148     }
00149     
00150     bool Controller::exit()
00151     {
00152         deleteNBodyArrays(_dVel);
00153         
00154         if (_usePBO)
00155         {
00156             unregisterGLBufferObject(_pbo[0]);
00157             unregisterGLBufferObject(_pbo[1]);
00158             glDeleteBuffers(2, (const GLuint*)_pbo);
00159         }
00160         else
00161         {
00162             deleteNBodyArrays(_dPos);
00163         }
00164         
00165         return true;
00166     }
00167                         
00168     void Controller::compute(const unsigned int frameID, const FrameData& fd, const eq::Range& range)
00169     {
00170         int offset  = range.start * _numBodies;
00171         int length  = ((range.end - range.start) * _numBodies) / _p;
00172             
00173         integrateNbodySystem(_dPos[_currentWrite], _dVel[_currentWrite], 
00174                              _dPos[_currentRead], _dVel[_currentRead],
00175                              _pbo[_currentWrite], _pbo[_currentRead],
00176                              fd.getTimeStep(), _damping, _numBodies, offset, length, _p, _q, 
00177                              (_usePBO ? 1 : 0));        
00178         
00179         checkCUDAError("Controller::run");
00180     }
00181     
00182     void Controller::draw(const FrameData& fd)
00183     {
00184         glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);  
00185 
00186         if(_usePBO) {
00187             _renderer.setPBO(_pbo[_currentRead], _numBodies);
00188         }
00189         else {
00190             _renderer.setPositions(fd.getPos(), _numBodies);
00191             _renderer.setPBO(0, _numBodies);
00192         }
00193         
00194         _renderer.setColors(fd.getCol(), _numBodies); // do this only on init
00195         _renderer.setSpriteSize(_pointSize);
00196         _renderer.draw(PARTICLE_SPRITES_COLOR);
00197 
00198         std::swap(_currentRead, _currentWrite);
00199     }
00200     
00201     void Controller::setSoftening(float softening)
00202     {
00203         setDeviceSoftening(softening);
00204     }
00205         
00206     void Controller::getArray(BodyArray array, DataProxy& proxy)
00207     {
00208         float* ddata = 0;       
00209         float* hdata = 0;
00210         unsigned int pbo = 0;
00211         
00212         unsigned int offset = proxy.getOffset();
00213                         
00214         switch (array)
00215         {
00216             default:
00217             case BODYSYSTEM_POSITION:
00218                 hdata = proxy.getPosition();
00219                 ddata = _dPos[_currentRead];
00220                 if (_usePBO) {
00221                     pbo = _pbo[_currentRead];
00222                 }
00223                 break;
00224                 
00225             case BODYSYSTEM_VELOCITY:
00226                 hdata = proxy.getVelocity();
00227                 ddata = _dVel[_currentRead];
00228                 break;
00229         }
00230 
00231         copyArrayFromDevice(hdata+offset, ddata+offset, pbo, proxy.getNumBytes());
00232         proxy.markDirty();
00233     }
00234     
00235     void Controller::setArray(BodyArray array, const FrameData& fd)
00236     {
00237         unsigned int numBytes = fd.getNumBytes();
00238         
00239         switch (array)
00240         {
00241             default:
00242             case BODYSYSTEM_POSITION:
00243             {
00244                 if (_usePBO)
00245                 {
00246                     unregisterGLBufferObject(_pbo[_currentRead]);
00247                     glBindBuffer(GL_ARRAY_BUFFER, _pbo[_currentRead]);
00248                     glBufferData(GL_ARRAY_BUFFER, numBytes, fd.getPos(), GL_DYNAMIC_DRAW);
00249                     
00250                     int size = 0;
00251                     glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint*)&size); 
00252                     if ((unsigned)size != numBytes) {
00253                         EQERROR << "WARNING: Pixel Buffer Object download failed. Size " << size << " does not match numBytes " << numBytes << std::endl;
00254                     }
00255                     glBindBuffer(GL_ARRAY_BUFFER, 0);
00256                     registerGLBufferObject(_pbo[_currentRead]);
00257                 }
00258                 else
00259                 {
00260                     copyArrayToDevice(_dPos[_currentRead], fd.getPos(), numBytes);
00261                 }
00262             }
00263                 break;
00264 
00265             case BODYSYSTEM_VELOCITY:
00266                 copyArrayToDevice(_dVel[_currentRead], fd.getVel(), numBytes);
00267                 break;
00268         }       
00269     }
00270         
00271     void Controller::synchronizeGPUThreads() const
00272     {
00273         threadSync();
00274     }
00275     
00276     int Controller::_getMaxGflopsDeviceId()
00277     {       
00278 #if __DEVICE_EMULATION__
00279         return 0;
00280 #else       
00281         int device_count = 0;
00282         cudaGetDeviceCount( &device_count );
00283         
00284         cudaDeviceProp device_properties;
00285         int max_gflops_device = 0;
00286         int max_gflops = 0;
00287         
00288         int current_device = 0;
00289         cudaGetDeviceProperties( &device_properties, current_device );
00290         max_gflops = device_properties.multiProcessorCount * device_properties.clockRate;
00291         ++current_device;
00292         
00293         while( current_device < device_count )
00294         {
00295             cudaGetDeviceProperties( &device_properties, current_device );
00296             int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
00297             if( gflops > max_gflops )
00298             {
00299                 max_gflops        = gflops;
00300                 max_gflops_device = current_device;
00301             }
00302             ++current_device;
00303         }
00304         return max_gflops_device;
00305 #endif
00306     }
00307     
00308 }
Generated on Mon Aug 10 18:58:32 2009 for Equalizer 0.9 by  doxygen 1.5.8