9
9
#include < ATen/ATen.h>
10
10
#include < ATen/cuda/CUDAContext.h>
11
11
#include < c10/cuda/CUDAGuard.h>
12
- #include < thrust/device_vector.h>
13
- #include < thrust/scan.h>
14
12
#include < cstdio>
15
13
#include " marching_cubes/tables.h"
16
14
@@ -40,20 +38,6 @@ through" each cube in the grid.
40
38
// EPS: Used to indicate if two float values are close
41
39
__constant__ const float EPSILON = 1e-5 ;
42
40
43
- // Thrust wrapper for exclusive scan
44
- //
45
- // Args:
46
- // output: pointer to on-device output array
47
- // input: pointer to on-device input array, where scan is performed
48
- // numElements: number of elements for the input array
49
- //
50
- void ThrustScanWrapper (int * output, int * input, int numElements) {
51
- thrust::exclusive_scan (
52
- thrust::device_ptr<int >(input),
53
- thrust::device_ptr<int >(input + numElements),
54
- thrust::device_ptr<int >(output));
55
- }
56
-
57
41
// Linearly interpolate the position where an isosurface cuts an edge
58
42
// between two vertices, based on their scalar values
59
43
//
@@ -455,19 +439,24 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCuda(
455
439
grid.x = 65535 ;
456
440
}
457
441
442
+ using at::indexing::None;
443
+ using at::indexing::Slice;
444
+
458
445
auto d_voxelVerts =
459
- at::zeros ({numVoxels}, at::TensorOptions ().dtype (at::kInt ))
446
+ at::zeros ({numVoxels + 1 }, at::TensorOptions ().dtype (at::kInt ))
460
447
.to (vol.device ());
448
+ auto d_voxelVerts_ = d_voxelVerts.index ({Slice (1 , None)});
461
449
auto d_voxelOccupied =
462
- at::zeros ({numVoxels}, at::TensorOptions ().dtype (at::kInt ))
450
+ at::zeros ({numVoxels + 1 }, at::TensorOptions ().dtype (at::kInt ))
463
451
.to (vol.device ());
452
+ auto d_voxelOccupied_ = d_voxelOccupied.index ({Slice (1 , None)});
464
453
465
454
// Execute "ClassifyVoxelKernel" kernel to precompute
466
455
// two arrays - d_voxelOccupied and d_voxelVertices to global memory,
467
456
// which stores the occupancy state and number of voxel vertices per voxel.
468
457
ClassifyVoxelKernel<<<grid, threads, 0 , stream>>> (
469
- d_voxelVerts .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
470
- d_voxelOccupied .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
458
+ d_voxelVerts_ .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
459
+ d_voxelOccupied_ .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
471
460
vol.packed_accessor32 <float , 3 , at::RestrictPtrTraits>(),
472
461
isolevel);
473
462
AT_CUDA_CHECK (cudaGetLastError ());
@@ -477,18 +466,12 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCuda(
477
466
// count for voxels in the grid and compute the number of active voxels.
478
467
// If the number of active voxels is 0, return zero tensor for verts and
479
468
// faces.
480
- auto d_voxelOccupiedScan =
481
- at::zeros ({numVoxels}, at::TensorOptions ().dtype (at::kInt ))
482
- .to (vol.device ());
483
- ThrustScanWrapper (
484
- d_voxelOccupiedScan.data_ptr <int >(),
485
- d_voxelOccupied.data_ptr <int >(),
486
- numVoxels);
469
+
470
+ auto d_voxelOccupiedScan = at::cumsum (d_voxelOccupied, 0 );
471
+ auto d_voxelOccupiedScan_ = d_voxelOccupiedScan.index ({Slice (1 , None)});
487
472
488
473
// number of active voxels
489
- int lastElement = d_voxelVerts[numVoxels - 1 ].cpu ().item <int >();
490
- int lastScan = d_voxelOccupiedScan[numVoxels - 1 ].cpu ().item <int >();
491
- int activeVoxels = lastElement + lastScan;
474
+ int activeVoxels = d_voxelOccupiedScan[numVoxels].cpu ().item <int >();
492
475
493
476
const int device_id = vol.device ().index ();
494
477
auto opt = at::TensorOptions ().dtype (at::kInt ).device (at::kCUDA , device_id);
@@ -509,22 +492,17 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCuda(
509
492
CompactVoxelsKernel<<<grid, threads, 0 , stream>>> (
510
493
d_compVoxelArray.packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
511
494
d_voxelOccupied.packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
512
- d_voxelOccupiedScan .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
495
+ d_voxelOccupiedScan_ .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
513
496
numVoxels);
514
497
AT_CUDA_CHECK (cudaGetLastError ());
515
498
cudaDeviceSynchronize ();
516
499
517
500
// Scan d_voxelVerts array to generate offsets of vertices for each voxel
518
- auto d_voxelVertsScan = at::zeros ({numVoxels}, opt);
519
- ThrustScanWrapper (
520
- d_voxelVertsScan.data_ptr <int >(),
521
- d_voxelVerts.data_ptr <int >(),
522
- numVoxels);
501
+ auto d_voxelVertsScan = at::cumsum (d_voxelVerts, 0 );
502
+ auto d_voxelVertsScan_ = d_voxelVertsScan.index ({Slice (1 , None)});
523
503
524
504
// total number of vertices
525
- lastElement = d_voxelVerts[numVoxels - 1 ].cpu ().item <int >();
526
- lastScan = d_voxelVertsScan[numVoxels - 1 ].cpu ().item <int >();
527
- int totalVerts = lastElement + lastScan;
505
+ int totalVerts = d_voxelVertsScan[numVoxels].cpu ().item <int >();
528
506
529
507
// Execute "GenerateFacesKernel" kernel
530
508
// This runs only on the occupied voxels.
@@ -544,7 +522,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCuda(
544
522
faces.packed_accessor <int64_t , 2 , at::RestrictPtrTraits>(),
545
523
ids.packed_accessor <int64_t , 1 , at::RestrictPtrTraits>(),
546
524
d_compVoxelArray.packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
547
- d_voxelVertsScan .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
525
+ d_voxelVertsScan_ .packed_accessor32 <int , 1 , at::RestrictPtrTraits>(),
548
526
activeVoxels,
549
527
vol.packed_accessor32 <float , 3 , at::RestrictPtrTraits>(),
550
528
faceTable.packed_accessor32 <int , 2 , at::RestrictPtrTraits>(),
0 commit comments