Table Of Contents

Previous topic

OptiX 400 Shakedown

Next topic

OptiX 400 : Interop Wierdness

This Page

optix 400 : seedPhotonsFromGenstepsImp FATAL : mismatch between CPU and GPU photon counts

Focussed workaround on gensteps using OpticksBufferControl

BufferControl sets tags associated with the buffer, stored in NPY that are acted upon wherever buffer operations happen.

537 void OpticksEvent::createSpec()
538 {
...
547     m_genstep_spec = new NPYSpec(genstep_   ,  0,6,4,0,      NPYBase::FLOAT     , "OPTIX_SETSIZE,OPTIX_INPUT_ONLY,UPLOAD_WITH_CUDA,BUFFER_COPY_ON_DIRTY")  ;
548
282 template <typename T>
283 void OContext::upload(optix::Buffer& buffer, NPY<T>* npy)
284 {
285     unsigned int numBytes = npy->getNumBytes(0) ;
286
287     OpticksBufferControl ctrl(npy->getBufferControl());
288
289     LOG(info)<<"OContext::upload"
290              << " numBytes " << numBytes
291              << npy->description("upload")
292              ;
293
294     if(ctrl.isSet("UPLOAD_WITH_CUDA"))
295     {
296         void* d_ptr = NULL;
297         rtBufferGetDevicePointer(buffer->get(), 0, &d_ptr);
298         cudaMemcpy(d_ptr, npy->getBytes(), numBytes, cudaMemcpyHostToDevice);
299         buffer->markDirty();
300     }
301     else
302     {
303         memcpy( buffer->map(), npy->getBytes(), numBytes );
304         buffer->unmap();
305     }
306 }

Issue

Tao encountered crazy photon counts from the Thrust reduction of the gensteps:

OpSeeder::seedPhotonsFromGenstepsViaOptiX (CBufSpec)s_ox : dev_ptr 0x700ca0000 size 400000 num_bytes 6400000 hexdump 0
OpSeeder::seedPhotonsFromGenstepsImp (CBufSpec)s_gs : dev_ptr 0x700ba0000 size 6 num_bytes 96 hexdump 0
OpSeeder::seedPhotonsFromGenstepsImp (CBufSpec)s_ox : dev_ptr 0x700ca0000 size 400000 num_bytes 6400000 hexdump 0
2016-08-16 14:19:34.869 INFO  [3271157] [OpSeeder::seedPhotonsFromGenstepsImp@146] OpSeeder::seedPhotonsFromGenstepsImp gensteps 1,6,4 num_genstep_values 24
2016-08-16 14:19:34.875 FATAL [3271157] [OpSeeder::seedPhotonsFromGenstepsImp@156] OpSeeder::seedPhotonsFromGenstepsImp num_photons 4294967295 x_num_photons 100000
Assertion failed: (num_photons == x_num_photons && "FATAL : mismatch between CPU and GPU photon counts from the gensteps"), function seedPhotonsFromGenstepsImp, file /Users/blyth/opticks/opticksop/OpSeeder.cc, line 162.
Abort trap: 6

Suggests the gensteps failed to be properly uploaded:

132 void OpSeeder::seedPhotonsFromGenstepsImp(const CBufSpec& s_gs, const CBufSpec& s_ox)
133 {
134     s_gs.Summary("OpSeeder::seedPhotonsFromGenstepsImp (CBufSpec)s_gs");
135     s_ox.Summary("OpSeeder::seedPhotonsFromGenstepsImp (CBufSpec)s_ox");
136
137     TBuf tgs("tgs", s_gs );
138     TBuf tox("tox", s_ox );
139
140     //tgs.dump<unsigned int>("App::seedPhotonsFromGenstepsImp tgs", 6*4, 3, nv0 ); // stride, begin, end
141
142     NPY<float>* gensteps =  m_evt->getGenstepData() ;
143
144     unsigned int num_genstep_values = gensteps->getNumValues(0) ;
145
146     LOG(info) << "OpSeeder::seedPhotonsFromGenstepsImp"
147                << " gensteps " << gensteps->getShapeString()
148                << " num_genstep_values " << num_genstep_values
149                ;
150
151     unsigned int num_photons = tgs.reduce<unsigned int>(6*4, 3, num_genstep_values );  // adding photon counts for each genstep
152
153     unsigned int x_num_photons = m_evt->getNumPhotons() ;
154
155     if(num_photons != x_num_photons)
156           LOG(fatal)
157           << "OpSeeder::seedPhotonsFromGenstepsImp"
158           << " num_photons " << num_photons
159           << " x_num_photons " << x_num_photons
160           ;
161
162     assert(num_photons == x_num_photons && "FATAL : mismatch between CPU and GPU photon counts from the gensteps") ;

Taos fix for the compute mode issue

+    // memcpy( buffer->map(), npy->getBytes(), numBytes );
+    // buffer->unmap();
+    void* d_ptr = NULL;
+    rtBufferGetDevicePointer(buffer->get(), 0, &d_ptr);
+    cudaMemcpy(d_ptr, npy->getBytes(), numBytes, cudaMemcpyHostToDevice);
+    buffer->markDirty();

buffer = m_context->createBuffer(RT_BUFFER_INPUT_OUTPUT | RT_BUFFER_COPY_ON_DIRTY);