Point Cloud Library (PCL) 1.12.0
Loading...
Searching...
No Matches
scan_block.hpp
1/*
2* Software License Agreement (BSD License)
3*
4* Copyright (c) 2011, Willow Garage, Inc.
5* All rights reserved.
6*
7* Redistribution and use in source and binary forms, with or without
8* modification, are permitted provided that the following conditions
9* are met:
10*
11* * Redistributions of source code must retain the above copyright
12* notice, this list of conditions and the following disclaimer.
13* * Redistributions in binary form must reproduce the above
14* copyright notice, this list of conditions and the following
15* disclaimer in the documentation and/or other materials provided
16* with the distribution.
17* * Neither the name of Willow Garage, Inc. nor the names of its
18* contributors may be used to endorse or promote products derived
19* from this software without specific prior written permission.
20*
21* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32* POSSIBILITY OF SUCH DAMAGE.
33*
34* Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35*/
36
37#ifndef PCL_GPU_OCTREE_SCAN_BLOCK_HPP
38#define PCL_GPU_OCTREE_SCAN_BLOCK_HPP
39
40
41namespace pcl
42{
43 namespace device
44 {
46
47 template <ScanKind Kind , class T>
48 __device__ __forceinline__ T scan_warp ( volatile T *ptr , const unsigned int idx = threadIdx.x )
49 {
50 const unsigned int lane = idx & 31; // index of thread in warp (0..31)
51
52 if ( lane >= 1) ptr [idx ] = ptr [idx - 1] + ptr [idx];
53 if ( lane >= 2) ptr [idx ] = ptr [idx - 2] + ptr [idx];
54 if ( lane >= 4) ptr [idx ] = ptr [idx - 4] + ptr [idx];
55 if ( lane >= 8) ptr [idx ] = ptr [idx - 8] + ptr [idx];
56 if ( lane >= 16) ptr [idx ] = ptr [idx - 16] + ptr [idx];
57
58 if( Kind == inclusive )
59 return ptr [idx ];
60 else
61 return (lane > 0) ? ptr [idx - 1] : 0;
62 }
63
64 template <ScanKind Kind , class T>
65 __device__ __forceinline__ T scan_block( volatile T *ptr , const unsigned int idx = threadIdx.x )
66 {
67 const unsigned int lane = idx & 31;
68 const unsigned int warpid = idx >> 5;
69
70 // Step 1: Intra - warp scan in each warp
71 T val = scan_warp <Kind>( ptr , idx );
72
73 __syncthreads ();
74
75 // Step 2: Collect per - warp partial results
76
77 /* if( warpid == 0 )
78 if( lane == 31 )
79 ptr [ warpid ] = ptr [idx ];
80
81 __syncthreads ();
82
83 if( warpid > 0 ) */
84 if( lane == 31 )
85 ptr [ warpid ] = ptr [idx ];
86
87 __syncthreads ();
88
89 // Step 3: Use 1st warp to scan per - warp results
90 if( warpid == 0 )
91 scan_warp<inclusive>( ptr , idx );
92
93 __syncthreads ();
94
95 // Step 4: Accumulate results from Steps 1 and 3
96 if ( warpid > 0)
97 val = ptr [warpid -1] + val;
98
99 __syncthreads ();
100
101 // Step 5: Write and return the final result
102 ptr[idx] = val;
103
104 __syncthreads ();
105
106 return val ;
107 }
108 }
109}
110
111#endif /* PCL_GPU_OCTREE_SCAN_BLOCK_HPP */
__device__ __forceinline__ T scan_warp(volatile T *ptr, const unsigned int idx=threadIdx.x)
Definition device.hpp:87
__device__ __forceinline__ T scan_block(volatile T *ptr, const unsigned int idx=threadIdx.x)