CUDA似乎达到了极限，但那是什么极限？

__global__ void DoCheck(float2* points, int* segmentToPolylineIndexMap, int segmentCount, int* output) { int segmentIndex = threadIdx.x + blockIdx.x * blockDim.x; int pointCount = segmentCount + 1; if(segmentIndex >= segmentCount) return; int polylineIndex = segmentToPolylineIndexMap[segmentIndex]; int result = 0; if(polylineIndex >= 0) { float2 p1 = points[segmentIndex]; float2 p2 = points[segmentIndex+1]; float2 A = p2; float2 a; a.x = p2.x - p1.x; a.y = p2.y - p1.y; for(int i = segmentIndex+2; i < segmentCount; i++) { int currentPolylineIndex = segmentToPolylineIndexMap[i]; // if not a different segment within out polyline and // not a fake segment bool isLegit = (currentPolylineIndex != polylineIndex && currentPolylineIndex >= 0); float2 p3 = points[i]; float2 p4 = points[i+1]; float2 B = p4; float2 b; b.x = p4.x - p3.x; b.y = p4.y - p3.y; float2 c; c.x = B.x - A.x; c.y = B.y - A.y; float2 b_perp; b_perp.x = -b.y; b_perp.y = b.x; float numerator = dot(b_perp, c); float denominator = dot(b_perp, a); bool isParallel = (denominator == 0.0); float quotient = numerator / denominator; float2 intersectionPoint; intersectionPoint.x = quotient * a.x + A.x; intersectionPoint.y = quotient * a.y + A.y; result = result | (isLegit && !isParallel && intersectionPoint.x > min(p1.x, p2.x) && intersectionPoint.x > min(p3.x, p4.x) && intersectionPoint.x < max(p1.x, p2.x) && intersectionPoint.x < max(p3.x, p4.x) && intersectionPoint.y > min(p1.y, p2.y) && intersectionPoint.y > min(p3.y, p4.y) && intersectionPoint.y < max(p1.y, p2.y) && intersectionPoint.y < max(p3.y, p4.y)); } } output[segmentIndex] = result; }

1条回答

网友
1楼 · 发布于 2024-10-01 13:36:43

正在被耗尽的资源是时间。在所有当前的CUDA平台上，显示驱动程序包括一个看门狗定时器，它将杀死任何需要几秒钟以上执行的内核。在运行显示的卡上运行代码受此限制。在
在您使用的WDDM Windows平台上，有三种可能的解决方案/解决方案：
买一张Telsa卡并使用TCC驱动程序，这就彻底解决了这个问题
尝试修改注册表设置以增加计时器限制（有关详细信息，请在google上搜索TdrDelay注册表项，但我不是Windows用户，不能比这更具体）
将内核代码修改为“可重入”，并在多个内核启动（而不是一次）中处理数据并行工作负载。内核启动开销并不是那么大，在多个内核运行中处理工作负载通常很容易实现，这取决于您使用的算法。在

相关问题更多 >

编程相关推荐

热门问题

热门文章