LZ-Opticks-NVIDIA OptiX 6->7 : Notes

Progress

NEXT STEPS:

Simon C Blyth, April 4, 2021

"Foundry" based CSG geometry model : Solid/Prim/Node

All solids+constituents created via Foundry (ref by index)

Node Examples:

Foundry::upload

all solids in geometry -> only four GPU allocations

  • d_prim, d_node, d_plan, d_itra

https://github.com/simoncblyth/OptiXTest/blob/main/Foundry.h https://github.com/simoncblyth/OptiXTest/blob/main/qat4.h

OptiX 7 Intersect Prim (numNode, nodeOffset) ~ HitGroupData


150 extern "C" __global__ void __intersection__is()
151 {
152     HitGroupData* hg = (HitGroupData*)optixGetSbtDataPointer();
153     int numNode = hg->numNode ;
154     int nodeOffset = hg->nodeOffset ;
155
156     const Node* node = params.node + nodeOffset ;
157     const float4* plan = params.plan ;
158     const qat4*   itra = params.itra ;
159
160     const float  t_min = optixGetRayTmin() ;
161     const float3 ray_origin = optixGetObjectRayOrigin();
162     const float3 ray_direction = optixGetObjectRayDirection();
163
164     float4 isect ;
165     if(intersect_prim(isect, numNode, node, plan, itra,
             t_min , ray_origin, ray_direction ))
166     {
167         const unsigned hitKind = 0u ;
168         unsigned a0, a1, a2, a3;
169
170         a0 = float_as_uint( isect.x );
171         a1 = float_as_uint( isect.y );
172         a2 = float_as_uint( isect.z );
173         a3 = float_as_uint( isect.w ) ;
174
175         optixReportIntersection( isect.w, hitKind, a0, a1, a2, a3 );
176     }
177 }

Prim : 1 or more Node : Holds AABB

q x y z w
q0 numNode nodeOffset tranOffset planOffset
q1 sbtIndexOffset      
q2 BBMin_x BBMin_y BBMin_z BBMax_x
q3 BBMax_y BBMax_z    
 36 PrimSpec Prim::MakeSpec( const Prim* prim0,
                             unsigned primIdx,
                             unsigned numPrim ) // static
 37 {
 38     const Prim* prim = prim0 + primIdx ;
 40     PrimSpec ps ;
 41     ps.aabb = prim->AABB() ;
 42     ps.sbtIndexOffset = prim->sbtIndexOffsetPtr() ;
 43     ps.num_prim = numPrim ;
 44     ps.stride_in_bytes = sizeof(Prim);
 45     return ps ;
 46 }  // used on CPU to give device side pointers offset from d_prim 

167 PrimSpec Foundry::getPrimSpecDevice(unsigned solidIdx) const
168 {
170     const Solid* so = solid.data() + solidIdx ;
171     return Prim::MakeSpec( d_prim,  so->primOffset, so->numPrim ) ;;
174 }

Node : 4x4x32bit : 6 float params, 6 float AABB, 4 ints : typecode/gtranformIdx/boundary/index

sp:sphere zs:zsphere cy:cylinder ds:disc cn:cone hy:hyperboloid pl:plane sl:slab cx:convexpolyhedron b3:box3

q x y z w notes
q0

sp/zs/cy:cen_x cn:r1 hy:r0 z=0 waist b3:fx pl/sl:nx

cx:planeIdx

sp/zs/cy:cen_y cn:z1 hy:zf b3:fy pl/sl:ny

cx:planeNum

sp/zs/cy:cen_z cn:r2 hy:z1 b3:fz pl/sl:nz ds:inner_r

sp/zs/cy:radius cn:z2 hy:z2

pl:d ds:radius

cn:z2 > z1 hy:z2 > z1 b3: fullside dimensions, center always origin
q1 zs:zdelta_0 sl:a cy:z1 ds:z1 zs:zdelta_1 sl:b cy:z2 ds:z2 boundary index sl:a,b offsets from origin cy:z2 > z1
q2 BBMin_x BBMin_y BBMin_z BBMax_x q2.w was previously typecode
q3 BBMax_y BBMax_z typecode gtransformIdx complement  

intersect_prim -> intersect_node/intersect_tree


1089 INTERSECT_FUNC
1090 bool intersect_prim( float4& isect, int numNode, const Node* node, const float4* plan, const qat4* itra,
                          const float t_min , const float3& ray_origin, const float3& ray_direction )
1091 {
1092     return numNode == 1
1093                ?
1094                   intersect_node(isect,          node, plan, itra, t_min, ray_origin, ray_direction )
1095                :
1096                   intersect_tree(isect, numNode, node, plan, itra, t_min, ray_origin, ray_direction )
1097                ;
1098 }

Intersection maths : intersect_node.h intersect_tree.h -> allows testing on CPU with tests/ScanTest.cc

https://github.com/simoncblyth/OptiXTest/blob/main/intersect_node.h

https://github.com/simoncblyth/OptiXTest/blob/main/intersect_tree.h

intersect_node : handling scale-rotate-translate transforms and complemented "inside-out" solids

1028 bool intersect_node( float4& isect, const Node* node, const float4* plan, const qat4* itra, const float t_min ,
                          const float3& ray_origin , const float3& ray_direction )
1029 {
1030     const unsigned typecode = node->typecode() ;
1031     const unsigned gtransformIdx = node->gtransformIdx() ;
1032     const bool complement = node->complement();

1034     const qat4* q = gtransformIdx > 0 ? itra + gtransformIdx - 1 : nullptr ;  // gtransformIdx is 1-based, 0 meaning None
1036     float3 origin    = q ? q->right_multiply(ray_origin,    1.f) : ray_origin ;
1037     float3 direction = q ? q->right_multiply(ray_direction, 0.f) : ray_direction ;
....
1055     bool valid = false ;
1056     switch(typecode)
1057     {
1058     case CSG_SPHERE:   valid = intersect_node_sphere(  isect, node->q0,           t_min, origin, direction ) ; break ;
1059     case CSG_ZSPHERE:  valid = intersect_node_zsphere( isect, node->q0, node->q1, t_min, origin, direction ) ; break ;
1060     case CSG_CONVEXP:  valid = intersect_node_convexp( isect, node, plan,         t_min, origin, direction ) ; break ;
1061     case CSG_CONE:     valid = intersect_node_cone(    isect, node->q0,           t_min, origin, direction ) ; break ;
1062     case CSG_HYPERB:   valid = intersect_node_hyperb(  isect, node->q0,           t_min, origin, direction ) ; break ;
1063     case CSG_BOX3:     valid = intersect_node_box3(    isect, node->q0,           t_min, origin, direction ) ; break ;
1064     case CSG_PLANE:    valid = intersect_node_plane(   isect, node->q0,           t_min, origin, direction ) ; break ;
1065     case CSG_SLAB:     valid = intersect_node_slab(    isect, node->q0, node->q1, t_min, origin, direction ) ; break ;
1066     case CSG_CYLINDER: valid = intersect_node_cylinder(isect, node->q0, node->q1, t_min, origin, direction ) ; break ;
1067     case CSG_DISC:     valid = intersect_node_disc(    isect, node->q0, node->q1, t_min, origin, direction ) ; break ;
1068     }
1069     if(valid && q ) q->left_multiply_inplace( isect, 0.f ) ;
         // normals transform with inverse-transform-transposed -> left_multiply 
1076     if(complement){ isect.x = -isect.x ; isect.y = -isect.y ; isect.z = -isect.z ; }
         // flip complement normal, even for miss need to signal the complement with a -0.f  
1082     return valid ;
1083 }

intersect_tree : Boolean CSG implementation : tranche slices of postorder sequence...

https://github.com/simoncblyth/OptiXTest/blob/main/intersect_tree.h

 10 #include "error.h"
 11 #include "tranche.h"
 12 #include "csg.h"
 13 #include "postorder.h"
 14 #include "pack.h"
 15 #include "csg_classify.h"
 16
 19 bool intersect_tree( float4& isect,
                         int numNode,
                         const Node* node,
                         const float4* plan0,
                         const qat4* itra0,
                         const float t_min ,
                         const float3& ray_origin,
                         const float3& ray_direction
                       )
 20 {
 21     unsigned height = TREE_HEIGHT(numNode) ; // 1->0, 3->1, 7->2, 15->3, 31->4
 22     float propagate_epsilon = 0.0001f ;  // ?
 23     int ierr = 0 ;
 24
 25     LUT lut ;
 26     Tranche tr ;
 27     tr.curr = -1 ;
 29     unsigned fullTree = PACK4(0,0, 1 << height, 0 ) ;  // leftmost: 1<<height,  root:1>>1 = 0 ("parent" of root)
 30
 35     tranche_push( tr, fullTree, t_min );
 37     CSG_Stack csg ;
 38     csg.curr = -1 ;
 39     int tloop = -1 ;
 40
 41     while (tr.curr > -1)
 42     {
 ..

CSG Parade Grid 1

CSG Parade Grid 2

CSG Working

CSG Boolean Parade Grid

Clustered Sphere Issue : Testing split shapes in single GAS

/env/presentation/OptiXTest/clustered_sphere_one_visible_only_half.png /env/presentation/OptiXTest/pyvista_clustered_sphere_half.png

Expecting 9 spheres (CPU ScanTest "PyVista" view on right)

"Extra" Background Slides Follow

Two-Level Hierarchy : Instance transforms (TLAS) over Geometry (BLAS)

OptiX supports multiple instance levels : IAS->IAS->GAS BUT: Simple two-level is faster : works in hardware RT Cores

AS
Acceleration Structure
TLAS (IAS)
4x4 transforms, refs to BLAS
BLAS (GAS)
triangles : vertices, indices
custom primitives : AABB
AABB
axis-aligned bounding box

SBT : Shader Binding Table

Flexibly binds together:

  1. geometry objects
  2. shader programs
  3. data for shader programs

Hidden in OptiX 1-6 APIs


Optimizing Geometry : Split BLAS to avoid overlapping bbox

/env/presentation/nvidia/optimize/split_blas_half.png

Optimization : deciding where to draw lines between:

  1. structure and solid (IAS and GAS)
  2. solids within GAS (bbox choice to minimize traversal intersection tests)

Where those lines are drawn defines the AS

https://developer.nvidia.com/blog/best-practices-using-nvidia-rtx-ray-tracing/


Optimizing Geometry : Merge BLAS when lots of overlaps

/env/presentation/nvidia/optimize/merge_blas_half.png

https://developer.nvidia.com/blog/best-practices-using-nvidia-rtx-ray-tracing/


Ray Intersection with Transformed Object -> Geometry Instancing

/env/presentation/instancing/ray_intersection_in_two_spaces_p308_shirley_ch13_half.png

Fig 13.5 "Realistic Ray Tracing", Peter Shirley

Advantages apply equally to acceleration structures

Equivalent Intersects -> same t

  1. ray with ellipsoid : M*p
  2. M-1 ray with sphere : p

Local Frame Advantages

  1. simpler intersect (sphere vs ellipsoid)
  2. closer to origin -> better precision

Geometry Instancing Advantages

Requirements


G4Boolean -> CUDA/OptiX Intersection Program Implementing CSG

Complete Binary Tree, pick between pairs of nearest intersects:

UNION tA < tB Enter B Exit B Miss B
Enter A ReturnA LoopA ReturnA
Exit A ReturnA ReturnB ReturnA
Miss A ReturnB ReturnB ReturnMiss
[1] Ray Tracing CSG Objects Using Single Hit Intersections, Andrew Kensler (2006)
with corrections by author of XRT Raytracer http://xrt.wikidot.com/doc:csg
[2] https://bitbucket.org/simoncblyth/opticks/src/tip/optixrap/cu/csg_intersect_boolean.h
Similar to binary expression tree evaluation using postorder traverse.

Constructive Solid Geometry (CSG) : Shapes defined "by construction"

Simple by construction definition, implicit geometry.

CSG expressions

3D Parametric Ray : ray(t) = r0 + t rDir

Ray Geometry Intersection

How to pick exactly ?


CSG : Which primitive intersect to pick ?

Classical Roth diagram approach

Computational requirements:

BUT : High performance on GPU requires:

Classical approach not appropriate on GPU


CSG Complete Binary Tree Serialization -> simplifies GPU side

Geant4 solid -> CSG binary tree (leaf primitives, non-leaf operators, 4x4 transforms on any node)

Serialize to complete binary tree buffer:

Height 3 complete binary tree with level order indices:

                                                   depth     elevation

                     1                               0           3

          10                   11                    1           2

     100       101        110        111             2           1

 1000 1001  1010 1011  1100 1101  1110  1111         3           0

postorder_next(i,elevation) = i & 1 ? i >> 1 : (i << elevation) + (1 << elevation) ; // from pattern of bits

Postorder tree traverse visits all nodes, starting from leftmost, such that children are visited prior to their parents.


Evaluative CSG intersection Pseudocode : recursion emulated

fullTree = PACK( 1 << height, 1 >> 1 )  // leftmost, parent_of_root(=0)
tranche.push(fullTree, ray.tmin)

while (!tranche.empty)         // stack of begin/end indices 
{
    begin, end, tmin <- tranche.pop  ; node <- begin ;
    while( node != end )                   // over tranche of postorder traversal 
    {
        elevation = height - TREE_DEPTH(node) ;
        if(is_primitive(node)){ isect <- intersect_primitive(node, tmin) ;  csg.push(isect) }
        else{
            i_left, i_right = csg.pop, csg.pop            // csg stack of intersect normals, t 
            l_state = CLASSIFY(i_left, ray.direction, tmin)
            r_state = CLASSIFY(i_right, ray.direction, tmin)
            action = LUT(operator(node), leftIsCloser)(l_state, r_state)

            if(      action is ReturnLeft/Right)     csg.push(i_left or i_right)
            else if( action is LoopLeft/Right)
            {
                left = 2*node ; right = 2*node + 1 ;
                endTranche = PACK( node,  end );
                leftTranche = PACK(  left << (elevation-1), right << (elevation-1) )
                rightTranche = PACK(  right << (elevation-1),  node  )
                loopTranche = action ? leftTranche : rightTranche

                tranche.push(endTranche, tmin)
                tranche.push(loopTranche, tminAdvanced )  // subtree re-traversal with changed tmin 
                break ; // to next tranche
            }
        }
        node <- postorder_next(node, elevation)         // bit twiddling postorder 
    }
}
isect = csg.pop();         // winning intersect  

https://bitbucket.org/simoncblyth/opticks/src/tip/optixrap/cu/csg_intersect_boolean.h


CSG Deep Tree : Positivize tree using De Morgan's laws

1st step to allow balancing : Positivize : remove CSG difference di operators

                                                     ...    ...

                                               un          cy

                                       un          cy

                               un          cy

                       un          cy

               un          cy

       di          cy

   cy      cy

                                                     ...    ...

                                               un          cy

                                       un          cy

                               un          cy

                       un          cy

               un          cy

       in          cy

   cy      !cy


CSG Examples