2, జులై 2026, గురువారం

f&o stocks.....................











 

sample CUDA Program GUP HPC NVIDIA

 #include <stdio.h>


__global__ void vecAdd(float *a, float *b, float *c, int n) {

    int i = blockIdx.x * blockDim.x + threadIdx.x;

    if (i < n) {

        c[i] = a[i] + b[i];

    }

}


int main() {

    int n = 1024;

    size_t bytes = n * sizeof(float);


    float *h_a = (float*)malloc(bytes);

    float *h_b = (float*)malloc(bytes);

    float *h_c = (float*)malloc(bytes);


    for (int i = 0; i < n; i++) {

        h_a[i] = i * 1.0f;

        h_b[i] = i * 2.0f;

    }


    float *d_a, *d_b, *d_c;

    cudaMalloc(&d_a, bytes);

    cudaMalloc(&d_b, bytes);

    cudaMalloc(&d_c, bytes);


    cudaMemcpy(d_a, h_a, bytes, cudaMemcpyHostToDevice);

    cudaMemcpy(d_b, h_b, bytes, cudaMemcpyHostToDevice);


    int threadsPerBlock = 256;

    int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock;

    vecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_a, d_b, d_c, n);


    cudaMemcpy(h_c, d_c, bytes, cudaMemcpyDeviceToHost);


    printf("c[0] = %f\n", h_c[0]);

    printf("c[1023] = %f\n", h_c[1023]);


    cudaFree(d_a); cudaFree(d_b); cudaFree(d_c);

    free(h_a); free(h_b); free(h_c);

    return 0;

}

OUTPUT...Compute Unified Device Architecture




Mr. Dr.................