Opticks replaces Geant4 optical photon simulation with an equivalent implementation that benefits from state-of-the-art GPU ray tracing from NVIDIA OptiX. All optically relevant aspects of Geant4 context must be translated+copied to GPU:
- geometry : solids, structure, material+surface properties
- generation : Cerenkov + Scintillation (using Gensteps from Geant4)
- propagation : Rayleigh scattering, absorption, reemission, boundary
Achieving+maintaining equivalence is time consuming, BUT:
- transformative performance benefits >1000x Geant4
light produced in certain special scintillator materials, such as JUNO LS (liquid scintillator)
light produced in many materials (eg Water[refractive index 1.333]) when particle travels through it faster than the speed of light in the medium (for water when velocity v/c > 1/1.3333 = 0.75 )
34 template <typename T> struct qctx 35 { 36 curandStateXORWOW* r ; 37 38 cudaTextureObject_t scint_tex ; .. 52 qprop<T>* prop ; .. 64 QCTX_METHOD float4 boundary_lookup( unsigned ix, unsigned iy ); 65 QCTX_METHOD float4 boundary_lookup( float nm, unsigned line, unsigned k ); 66 67 QCTX_METHOD float scint_wavelength_hd0(curandStateXORWOW& rng); 68 QCTX_METHOD float scint_wavelength_hd10(curandStateXORWOW& rng); 69 QCTX_METHOD float scint_wavelength_hd20(curandStateXORWOW& rng); 70 QCTX_METHOD void scint_dirpol(quad4& p, curandStateXORWOW& rng); 71 QCTX_METHOD void reemit_photon(quad4& p, float scintillationTime, curandStateXORWOW& rng); 72 QCTX_METHOD void scint_photon( quad4& p, GS& g, curandStateXORWOW& rng); 73 QCTX_METHOD void scint_photon( quad4& p, curandStateXORWOW& rng); 74 75 QCTX_METHOD void cerenkov_fabricate_genstep(GS& g, bool energy_range ); .. 80 QCTX_METHOD void cerenkov_photon(quad4& p, unsigned id, curandStateXORWOW& rng, const GS& g, int print_id = -1 ) ; 81 QCTX_METHOD void cerenkov_photon(quad4& p, unsigned id, curandStateXORWOW& rng, int print_id = -1 ) ; 82 83 QCTX_METHOD void cerenkov_photon_enprop(quad4& p, unsigned id, curandStateXORWOW& rng, const GS& g, int print_id = -1 ) ; 84 QCTX_METHOD void cerenkov_photon_enprop(quad4& p, unsigned id, curandStateXORWOW& rng, int print_id = -1 ) ; 85 86 QCTX_METHOD void cerenkov_photon_expt( quad4& p, unsigned id, curandStateXORWOW& rng, int print_id = -1 ); 87
Opticks GPU equivalent
// optixrap/cu/scintillationstep.h 185 __device__ void 186 generate_scintillation_photon(Photon& p, ScintillationStep& ss, curandState& rng) 187 { ... 199 p.wavelength = reemission_lookup(curand_uniform(&rng)); 200 // optixrap/cu/reemission_lookup.h 025 static __device__ __inline__ float reemission_lookup(float u) 26 { 27 float ui = u/reemission_domain.z + 0.5f ; 28 return tex2D(reemission_texture, ui, 0.5f ); 29 }
200 G4VParticleChange* 201 DsG4Scintillation::PostStepDoIt( const G4Track& aTrack, const G4Step& aStep) 208 { ... 540 G4PhysicsOrderedFreeVector* ScintillationIntegral = NULL; 543 ScintillationIntegral = 544 (G4PhysicsOrderedFreeVector*) ((*theFastIntegralTable)(materialIndex)); ... 579 for(G4int i = 0 ; i < NumPhoton ; i++) { ... 583 G4double sampledEnergy; 586 G4double CIIvalue = G4UniformRand()* 587 ScintillationIntegral->GetMaxValue(); 588 sampledEnergy= 589 ScintillationIntegral->GetEnergy(CIIvalue); 096 G4double G4PhysicsOrderedFreeVector::GetEnergy(G4double aValue) 97 { 98 G4double e; ... 104 size_t closestBin = FindValueBinLocation(aValue); 105 e = LinearInterpolationOfEnergy(aValue, closestBin); 107 return e; 108 } 110 size_t G4PhysicsOrderedFreeVector::FindValueBinLocation(G4double aValue) 111 { 112 size_t bin = std::lower_bound(dataVector.begin(), dataVector.end(), aValue) 113 - dataVector.begin() - 1; 114 bin = std::min(bin, numberOfNodes-2); 115 return bin; 116 }
DsG4Scintillator compared with | chi2 | note |
---|---|---|
hd0_cudaFilterModePoint | 13.79 | "artifact" issue in tails, terrible chi2 |
hd0 | 1.00 | interpolation enabled, fixes chi2 |
hd20 | 0.88 | effectively x20 bins, improves chi2 |
hd20_cudaFilterModePoint | 1.29 | XHD "HD20" even OK without interpolation |
GPU texture lookup
float scint_wavelength_hd20(curandStateXORWOW& rng) { float u0 = curand_uniform(&rng); float wl ; constexpr float y0 = 0.5f/3.f ; constexpr float y1 = 1.5f/3.f ; constexpr float y2 = 2.5f/3.f ; if( u0 < 0.05f ) { wl = tex2D(scint_tex, u0*20.f , y1 ); } else if ( u0 > 0.95f ) { wl = tex2D (scint_tex, (u0 - 0.95f)*20.f , y2 ); } else { wl = tex2D (scint_tex, u0, y0 ); } return wl ; }
156 NPY<double>* X4Scintillation::CreateGeant4InterpolatedInverseCDF( 157 const G4PhysicsOrderedFreeVector* ScintillatorIntegral, 158 unsigned num_bins, unsigned hd_factor, .. 161 ) 164 double mx = ScintillatorIntegral->GetMaxValue() ; 166 NPY<double>* icdf = NPY<double>::make(3, num_bins, 1 ); 170 int nj = icdf->getShape(1); 180 double edge = 1./double(hd_factor) ; ... 202 for(int j=0 ; j < nj ; j++) 203 { 204 double u_all = double(j)/double(nj) ; 205 double u_lhs = double(j)/double(hd_factor*nj) ; 206 double u_rhs = 1. - edge + double(j)/double(hd_factor*nj) ; 207 208 double energy_all = ScintillatorIntegral->GetEnergy( u_all*mx ); 209 double energy_lhs = ScintillatorIntegral->GetEnergy( u_lhs*mx ); 210 double energy_rhs = ScintillatorIntegral->GetEnergy( u_rhs*mx ); 211 212 double wavelength_all = h_Planck*c_light/energy_all/nm ; 213 double wavelength_lhs = h_Planck*c_light/energy_lhs/nm ; 214 double wavelength_rhs = h_Planck*c_light/energy_rhs/nm ; 215 217 icdf->setValue(0, j, 0, 0, wavelength_all ); 218 icdf->setValue(1, j, 0, 0, wavelength_lhs ); 219 icdf->setValue(2, j, 0, 0, wavelength_rhs ); 220 } 221 return icdf ; 222 }
https://bitbucket.org/simoncblyth/opticks/src/master/extg4/X4Scintillation.cc
https://bitbucket.org/simoncblyth/opticks/src/master/qudarap/qctx.h
\ B / \ . | . / AC ct / n 1 i BetaInverse \ C | C / cos th = ---- = -------- = ------ = --- = ------------- \ . \ | / . / AB bct b n n sampledRI . \ bct / . \ | / BetaInverse \ | / ct maxCos = ----------------- \ |th/ ---- nMax \ | / n \|/ A Particle travels AB, light travels AC, ACB is right angle Only get Cerenkov radiation when cos th <= 1 , beta >= beta_min = 1/n BetaInverse <= BetaInverse_max = n At the small beta threshold AB = AC, beta = beta_min = 1/n eg for n = 1.333, beta_min = 0.75 cos th = 1, th = 0 light is in direction of the particle For ultra relativistic particle beta = 1, there is a maximum angle th = arccos( 1/n )
Rejection Sampling
Start from flat energy distrib
sampleRI < BetaInverse :
168 G4VParticleChange* 169 G4Cerenkov_modified::PostStepDoIt( const G4Track& aTrack, const G4Step& aStep) ... 252 G4double Pmin = Rindex->GetMinLowEdgeEnergy(); 253 G4double Pmax = Rindex->GetMaxLowEdgeEnergy(); 254 G4double dp = Pmax - Pmin; ... 268 G4double maxCos = BetaInverse / nMax; 270 G4double maxSin2 = (1.0 - maxCos) * (1.0 + maxCos); ... 315 for (G4int i = 0; i < fNumPhotons; i++) { 317 318 G4double rand; 319 G4double sampledEnergy, sampledRI; 320 G4double cosTheta, sin2Theta; ... 324 do { 325 rand = G4UniformRand(); 326 sampledEnergy = Pmin + rand * dp; 327 sampledRI = Rindex->Value(sampledEnergy); 334 cosTheta = BetaInverse / sampledRI; 342 sin2Theta = (1.0 - cosTheta)*(1.0 + cosTheta); 343 rand = G4UniformRand(); 344 346 } while (rand*maxSin2 > sin2Theta);
1.5/1.8 = .83, 1.5/1.62 = 0.92
Poor chi2 match investigations
random aligned comparison
256M curand_uniform floats -> Geant4
BUT statistical comparison still grotty chi2/ndf
Resorting to double precision rejection sampling
QUESTIONS
574 template <typename T> 575 inline QCTX_METHOD void qctx<T>::cerenkov_photon_expt( quad4& p, unsigned id, curandStateXORWOW& rng, int print_id ) 576 { 577 double BetaInverse = 1.5 ; 578 double Pmin = 1.55 ; 579 double Pmax = 15.5 ; 580 double nMax = 1.793 ; 581 double maxCos = BetaInverse / nMax; 582 double maxSin2 = ( 1. - maxCos )*( 1. + maxCos ); 583 584 double u0 ; 585 double u1 ; 586 double energy ; 587 double sampledRI ; 588 double cosTheta ; 589 double sin2Theta ; 590 double u_mxs2_s2 ; 592 unsigned loop = 0u ; 593 594 do { 596 u0 = curand_uniform_double(&rng) ; 598 energy = Pmin + u0*(Pmax - Pmin) ; 600 sampledRI = prop->interpolate( 0u, energy ); 602 cosTheta = BetaInverse / sampledRI ; 604 sin2Theta = (1. - cosTheta)*(1. + cosTheta); 606 u1 = curand_uniform_double(&rng) ; 608 u_mxs2_s2 = u1*maxSin2 - sin2Theta ; 610 loop += 1 ; 612 } while ( u_mxs2_s2 > 0. );
590 "uni_acrylic3" Fasteners : cost 65x more than all 45.6k PMTs
uni_acrylic3
LS absorption length (red)
GPU Ray Tracing APIs Converged
NVIDIA OptiX 6->7 : drastically slimmed down
More control/flexibility over everything.
Demands much more developer effort than OptiX 6
IAS < Inst < Solid < Prim < Node
struct CSGFoundry { void upload(); // to GPU ... std::vector<CSGSolid> solid ; // compounds (eg PMT) std::vector<CSGPrim> prim ; std::vector<CSGNode> node ; // shapes, operators std::vector<float4> plan ; // planes std::vector<qat4> tran ; // CSG transforms std::vector<qat4> itra ; // inverse CSG transforms std::vector<qat4> inst ; // instance transforms // entire geometry in four GPU allocations CSGPrim* d_prim ; CSGNode* d_node ; float4* d_plan ; qat4* d_itra ; };
referencing by offset, count
Simple intersect headers, common CPU/GPU types
GAS : Geometry Acceleration Structure
IAS : Instance Acceleration Structure
CSG : Constructive Solid Geometry
1st JUNO Opticks OptiX 7 Ray-trace
Very New CSG "Foundry" CPU/GPU Geometry
Factorize ~300,000 vol -> 10 comp
"progeny digest" characterizes subtree of every volume-node
ridx | plc | prim | component | note |
---|---|---|---|---|
0 | 1 | 3084 | 3084:sWorld | non-repeated remainder |
1 | 25600 | 5 | 5:PMT_3inch_pmt_solid | 4 types of PMT |
2 | 12612 | 5 | 5:NNVTMCPPMTsMask | |
3 | 5000 | 5 | 5:HamamatsuR12860sMask | |
4 | 2400 | 5 | 5:mask_PMT_20inch_vetosMask | |
5 | 590 | 1 | 1:sStrutBallhead | 4 parts of same assembly, BUT not grouped as siblings (not parent-child) |
6 | 590 | 1 | 1:uni1 | |
7 | 590 | 1 | 1:base_steel | |
8 | 590 | 1 | 1:uni_acrylic3 | |
9 | 504 | 130 | 130:sPanel | repeated parts of TT |
Increasing instancing : reduces memory for geometry -> improved performance
Vary Geom. Compare Render Times
Fast render -> Fast simulation
Same viewpoint, vary GPU geometry
Very large range of times 1:600
Table identifies slow geometry to fix :
Good performance for ONLY PMTs :
idx | -e | time(s) | relative | enabled geometry description |
---|---|---|---|---|
0 | 9, | 0.0017 | 0.1702 | ONLY: 130:sPanel |
1 | 7, | 0.0017 | 0.1714 | ONLY: 1:base_steel |
2 | 6, | 0.0019 | 0.1923 | ONLY: 1:uni1 |
3 | 5, | 0.0027 | 0.2780 | ONLY: 1:sStrutBallhead |
4 | 4, | 0.0032 | 0.3268 | ONLY: 5:mask_PMT_20inch_vetosMask |
5 | 1, | 0.0032 | 0.3287 | ONLY: 5:PMT_3inch_pmt_solid |
6 | 2, | 0.0055 | 0.5669 | ONLY: 5:NNVTMCPPMTsMask |
7 | 3, | 0.0074 | 0.7582 | ONLY: 5:HamamatsuR12860sMask |
8 | 1,2,3,4 | 0.0097 | 1.0000 | ONLY PMT |
9 | t8,0 | 0.0099 | 1.0179 | EXCL: 1:uni_acrylic3 3084:sWorld |
10 | 0, | 0.1171 | 12.0293 | ONLY: 3084:sWorld |
11 | t8, | 0.1186 | 12.1769 | EXCL: 1:uni_acrylic3 |
12 | t0, | 0.5278 | 54.2066 | EXCL: 3084:sWorld |
13 | 8, | 0.5310 | 54.5298 | ONLY: 1:uni_acrylic3 |
14 | t3, | 0.6017 | 61.7954 | EXCL: 5:HamamatsuR12860sMask |
15 | t2, | 0.6043 | 62.0620 | EXCL: 5:NNVTMCPPMTsMask |
16 | t5, | 0.6171 | 63.3787 | EXCL: 1:sStrutBallhead |
17 | t6, | 0.6196 | 63.6301 | EXCL: 1:uni1 |
18 | t7, | 0.6226 | 63.9458 | EXCL: 1:base_steel |
19 | t0 | 0.6240 | 64.0879 | 3084:sWorld |
20 | t4, | 0.6243 | 64.1169 | EXCL: 5:mask_PMT_20inch_vetosMask |
21 | t9, | 0.6335 | 65.0636 | EXCL: 130:sPanel |
22 | t1, | 0.6391 | 65.6384 | EXCL: 5:PMT_3inch_pmt_solid |