-
Notifications
You must be signed in to change notification settings - Fork 0
/
commands.txt
117 lines (91 loc) · 3.51 KB
/
commands.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
The following commands has been tested in Ubuntu v20.04.6, gcc 7.5.0, g++ 7.5.0 and NVIDIA CUDA 11.1.0.
Step-1: (Installing required files)
sudo apt install gcc-7
sudo apt install g++-7
sudo apt install git
sudo apt install python3-pip
sudo apt-get install build-essential xutils-dev bison zlib1g-dev flex libglu1-mesa-dev
sudo apt-get install doxygen graphviz
sudo apt-get install python-pmw python3-ply python3-numpy libpng-dev python3-matplotlib
sudo apt-get install libxi-dev libxmu-dev freeglut3-dev
git clone https://github.com/gpgpu-sim/gpgpu-sim_distribution
wget https://developer.download.nvidia.com/compute/cuda/11.1.0/local_installers/cuda_11.1.0_455.23.05_linux.run
sudo sh cuda_11.1.0_455.23.05_linux.run (this will open a window; type accept and then select everything except the driver and proceed with the installation)
Step-2: (Building the files)
cd gpgpu-sim_distribution
export CUDA_INSTALL_PATH=/usr/local/cuda
bash
source setup_environment
make
Step-3: (Running the simulator)
cd
nano .bashrc (opens bash file)
inside the bash file, go to the end and enter the following:
export PATH="/usr/local/cuda-11.1/bin:$PATH"
export LD_LIBRARY_PATH="/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH"
Then, Ctrl+S to save the file and Ctrl+X to close the file.
source .bashrc
mkdir test
cd test
touch test.cu
after this, open test.rc in /home/test folder and paste the below code:
#include <stdio.h>
#include <cuda.h>
#include <stdlib.h>
#include <time.h>
#define N 4096 // size of array
__global__ void add(int *a,int *b, int *c) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < N){
c[tid] = a[tid]+b[tid];
}
}
int main(int argc, char *argv[]) {
int T = 10, B = 1; // threads per block and blocks per grid
int a[N],b[N],c[N];
int *dev_a, *dev_b, *dev_c;
printf("Size of array = %d\n", N);
do {
printf("Enter number of threads per block: ");
scanf("%d",&T);
printf("\nEnter nuumber of blocks per grid: ");
scanf("%d",&B);
if (T * B != N) printf("Error T x B != N, try again");
} while (T * B != N);
cudaEvent_t start, stop; // using cuda events to measure time
float elapsed_time_ms; // which is applicable for asynchronous code also
cudaMalloc((void**)&dev_a,N * sizeof(int));
cudaMalloc((void**)&dev_b,N * sizeof(int));
cudaMalloc((void**)&dev_c,N * sizeof(int));
for(int i=0;i<N;i++) { // load arrays with some numbers
a[i] = i;
b[i] = i*1;
}
cudaMemcpy(dev_a, a , N*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b , N*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(dev_c, c , N*sizeof(int),cudaMemcpyHostToDevice);
cudaEventCreate( &start ); // instrument code to measure start time
cudaEventCreate( &stop );
cudaEventRecord( start, 0 );
add<<<B,T>>>(dev_a,dev_b,dev_c);
cudaMemcpy(c,dev_c,N*sizeof(int),cudaMemcpyDeviceToHost);
cudaEventRecord( stop, 0 ); // instrument code to measue end time
cudaEventSynchronize( stop );
cudaEventElapsedTime( &elapsed_time_ms, start, stop );
for(int i=0;i<N;i++) {
printf("%d+%d=%d\n",a[i],b[i],c[i]);
}
printf("Time to calculate results: %f ms.\n", elapsed_time_ms); // print out execution time
// clean up
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
cudaEventDestroy(start);
cudaEventDestroy(stop);
return 0;
}
save test.cu and quit the file.
source ~/gpgpu-sim_distribution/setup_environment
nvcc -test.cu -lcudart
./a.out
Sample code => http://web.mit.edu/pocky/www/cudaworkshop/WorkshopFiles.html