Basically when I run over 160 blocks at once the function seems to have sometimes executed twice on one kernel, when it should only be executing once. I'm using a gtx 1050 I thought I had like 900 cores or something?
The function should return all 118's in the array but it returns one 1063 and then the rest unchanged 13's.
I am new to cuda (and C++ in general) and I am fuzzy on pointers.
#include "cuda_runtime.h" #include "device_launch_parameters.h" #include <iostream> #include <stdio.h> __host__ __device__ unsigned int random(unsigned int &seed) { seed = seed * 9 + 1; return seed; } __global__ void rndKernel(unsigned int *seeds) { random(seeds[blockIdx.x]); } int main() { const unsigned int size = 161; unsigned int* dev_seeds; unsigned int* seeds; cudaMalloc((void**)&dev_seeds, sizeof(unsigned int) * size); seeds = new unsigned int[size]; for (int i = 0; i < size; i++) { seeds[i] = 13; } cudaMemcpy(dev_seeds, seeds, sizeof(unsigned int) * size, cudaMemcpyHostToDevice); rndKernel <<< size, 1 >>> (dev_seeds); cudaMemcpy(seeds, dev_seeds, sizeof(unsigned int) * size, cudaMemcpyDeviceToHost); cudaFree(dev_seeds); for (int i = 0; i < size; i++) { std::cout << seeds[i] << '\n'; } free(seeds); return 0; } https://stackoverflow.com/questions/65588992/unexpected-behavior-with-more-than-160-cuda-blocks January 06, 2021 at 09:34AM
没有评论:
发表评论