Actual source code: bench_spmv.c
1: static char help[] = "Driver for benchmarking SpMV.";
3: #include <petscmat.h>
4: #include "cJSON.h"
5: #include "mmloader.h"
7: char *read_file(const char *filename)
8: {
9: FILE *file = NULL;
10: long length = 0;
11: char *content = NULL;
12: size_t read_chars = 0;
14: /* open in read binary mode */
15: file = fopen(filename, "rb");
16: if (file) {
17: /* get the length */
18: fseek(file, 0, SEEK_END);
19: length = ftell(file);
20: fseek(file, 0, SEEK_SET);
21: /* allocate content buffer */
22: content = (char *)malloc((size_t)length + sizeof(""));
23: /* read the file into memory */
24: read_chars = fread(content, sizeof(char), (size_t)length, file);
25: content[read_chars] = '\0';
26: fclose(file);
27: }
28: return content;
29: }
31: void write_file(const char *filename, const char *content)
32: {
33: FILE *file = NULL;
34: file = fopen(filename, "w");
35: if (file) { fputs(content, file); }
36: fclose(file);
37: }
39: int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40: {
41: char *content = read_file(inputjsonfile);
42: cJSON *matrix_json = NULL;
43: const cJSON *problem = NULL, *elem = NULL;
44: const cJSON *item = NULL;
45: char **filenames, **groupnames, **matnames;
46: int i, n;
47: if (!content) return 0;
48: matrix_json = cJSON_Parse(content);
49: if (!matrix_json) return 0;
50: n = cJSON_GetArraySize(matrix_json);
51: *nmat = n;
52: filenames = (char **)malloc(sizeof(char *) * n);
53: groupnames = (char **)malloc(sizeof(char *) * n);
54: matnames = (char **)malloc(sizeof(char *) * n);
55: for (i = 0; i < n; i++) {
56: elem = cJSON_GetArrayItem(matrix_json, i);
57: item = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58: filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59: strcpy(filenames[i], item->valuestring);
60: problem = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61: item = cJSON_GetObjectItemCaseSensitive(problem, "group");
62: groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63: strcpy(groupnames[i], item->valuestring);
64: item = cJSON_GetObjectItemCaseSensitive(problem, "name");
65: matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66: strcpy(matnames[i], item->valuestring);
67: }
68: cJSON_Delete(matrix_json);
69: free(content);
70: *outputfilenames = filenames;
71: *outputgroupnames = groupnames;
72: *outputmatnames = matnames;
73: return 0;
74: }
76: int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77: {
78: char *content = read_file(inputjsonfile);
79: cJSON *matrix_json = NULL;
80: cJSON *elem = NULL;
81: int i, n;
82: if (!content) return 0;
83: matrix_json = cJSON_Parse(content);
84: if (!matrix_json) return 0;
85: n = cJSON_GetArraySize(matrix_json);
86: for (i = 0; i < n; i++) {
87: cJSON *spmv = NULL;
88: cJSON *format = NULL;
89: elem = cJSON_GetArrayItem(matrix_json, i);
90: spmv = cJSON_GetObjectItem(elem, "spmv");
91: if (spmv) {
92: format = cJSON_GetObjectItem(spmv, matformat);
93: if (format) {
94: cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95: cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96: } else {
97: format = cJSON_CreateObject();
98: cJSON_AddItemToObject(spmv, matformat, format);
99: cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100: cJSON_AddNumberToObject(format, "repetitions", repetitions);
101: }
102: } else {
103: spmv = cJSON_CreateObject();
104: cJSON_AddItemToObject(elem, "spmv", spmv);
105: format = cJSON_CreateObject();
106: cJSON_AddItemToObject(spmv, matformat, format);
107: cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108: cJSON_AddNumberToObject(format, "repetitions", repetitions);
109: }
110: }
111: free(content);
112: content = cJSON_Print(matrix_json);
113: write_file(inputjsonfile, content);
114: cJSON_Delete(matrix_json);
115: free(content);
116: return 0;
117: }
119: /*
120: For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121: The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122: in each repetition. As a result, each MatMult call is fresh, and we can capture
123: the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124: during consecutive calls.
125: */
126: PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127: {
128: Mat A2 = NULL;
129: PetscInt i;
130: Vec u;
131: PetscLogDouble vstart = 0, vend = 0;
132: PetscBool isaijcusparse, isaijkokkos, issellcuda;
134: PetscFunctionBeginUser;
135: PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136: PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
137: PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
138: if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
139: if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
140: PetscCall(VecDuplicate(b, &u));
141: if (time) *time = 0.0;
142: for (i = 0; i < repetitions; i++) {
143: if (use_gpu) {
144: PetscCall(MatDestroy(&A2));
145: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
146: if (issellcuda) {
147: PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2));
148: PetscCall(MatConvert(A2, MATSELLCUDA, MAT_INPLACE_MATRIX, &A2));
149: } else {
150: PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
151: }
152: } else A2 = A;
153: /* Timing MatMult */
154: if (time) PetscCall(PetscTime(&vstart));
156: PetscCall(MatMult(A2, b, u));
158: if (time) {
159: PetscCall(PetscTime(&vend));
160: *time += (PetscReal)(vend - vstart);
161: }
162: }
163: PetscCall(VecDestroy(&u));
164: if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
165: PetscFunctionReturn(PETSC_SUCCESS);
166: }
168: PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
169: {
170: PetscLogEvent event;
171: PetscEventPerfInfo eventInfo;
172: //PetscReal gpuflopRate;
174: // if (matformat) {
175: // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
176: // } else {
177: // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
178: // }
179: // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
180: // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
182: PetscFunctionBeginUser;
183: PetscCall(PetscLogEventGetId("MatMult", &event));
184: PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
185: //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
186: // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
187: if (cputime) *cputime = eventInfo.time;
188: #if defined(PETSC_HAVE_DEVICE)
189: if (gputime) *gputime = eventInfo.GpuTime;
190: if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
191: #endif
192: PetscFunctionReturn(PETSC_SUCCESS);
193: }
195: PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
196: {
197: PetscBool iscsr, issell, iscsrkokkos;
199: PetscFunctionBeginUser;
200: PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
201: if (iscsr) {
202: if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
203: else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
204: } else {
205: PetscCall(PetscStrcmp(matformat, "sell", &issell));
206: if (issell) {
207: if (use_gpu) PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
208: else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
209: } else {
210: PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
211: if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
212: }
213: }
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
217: int main(int argc, char **args)
218: {
219: PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1;
220: Mat A;
221: Vec b;
222: char jfilename[PETSC_MAX_PATH_LEN];
223: char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
224: char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
225: char *matformats[5];
226: char **filenames = NULL, **groupnames = NULL, **matnames = NULL;
227: char ordering[256] = MATORDERINGRCM;
228: PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
229: IS rowperm = NULL, colperm = NULL;
230: PetscViewer fd;
231: PetscReal starting_spmv_time = 0, *spmv_times;
233: PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
234: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
235: PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
236: if (!flg1) {
237: nformats = 1;
238: PetscCall(PetscStrallocpy("csr", &matformats[0]));
239: }
240: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
241: PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
242: /* Read matrix and RHS */
243: PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
244: PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
245: PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
246: PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
247: PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
248: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
249: PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
250: PetscOptionsEnd();
251: #if !defined(PETSC_HAVE_DEVICE)
252: PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
253: #endif
254: PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
255: if (flg3) {
256: ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
257: PetscCall(PetscCalloc1(nmat, &spmv_times));
258: } else if (flg2) {
259: PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
260: } else if (flg1) {
261: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
262: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
263: PetscCall(MatSetType(A, MATAIJ));
264: PetscCall(MatSetFromOptions(A));
265: PetscCall(MatLoad(A, fd));
266: PetscCall(PetscViewerDestroy(&fd));
267: }
268: if (permute) {
269: Mat Aperm;
270: PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
271: PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
272: PetscCall(MatDestroy(&A));
273: A = Aperm; /* Replace original operator with permuted version */
274: }
275: /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
276: PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
277: if (bflg) {
278: PetscViewer fb;
279: PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
280: PetscCall(VecSetFromOptions(b));
281: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
282: PetscCall(VecLoad(b, fb));
283: PetscCall(PetscViewerDestroy(&fb));
284: }
286: for (j = 0; j < nformats; j++) {
287: char *petscmatformat = NULL;
288: PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
289: PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
290: if (flg3) { // mat names specified in a JSON file
291: for (i = 0; i < nmat; i++) {
292: PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
293: if (!bflg) {
294: PetscCall(MatCreateVecs(A, &b, NULL));
295: PetscCall(VecSet(b, 1.0));
296: }
297: PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
298: if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
299: else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
300: PetscCall(MatDestroy(&A));
301: if (!bflg) PetscCall(VecDestroy(&b));
302: }
303: UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
304: starting_spmv_time = spmv_times[nmat - 1];
305: } else {
306: PetscReal spmv_time;
307: if (!bflg) {
308: PetscCall(MatCreateVecs(A, &b, NULL));
309: PetscCall(VecSet(b, 1.0));
310: }
311: PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
312: if (!bflg) PetscCall(VecDestroy(&b));
313: }
314: PetscCall(PetscFree(petscmatformat));
315: }
316: if (flg3) {
317: for (i = 0; i < nmat; i++) {
318: free(filenames[i]);
319: free(groupnames[i]);
320: free(matnames[i]);
321: }
322: free(filenames);
323: free(groupnames);
324: free(matnames);
325: PetscCall(PetscFree(spmv_times));
326: }
327: for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
328: if (flg1 || flg2) PetscCall(MatDestroy(&A));
329: if (bflg) PetscCall(VecDestroy(&b));
330: PetscCall(ISDestroy(&rowperm));
331: PetscCall(ISDestroy(&colperm));
332: PetscCall(PetscFinalize());
333: return 0;
334: }
335: /*TEST
337: build:
338: requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
339: depends: mmloader.c mmio.c cJSON.c
341: test:
342: suffix: 1
343: args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
345: test:
346: suffix: 2
347: args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
348: output_file: output/bench_spmv_1.out
349: requires: cuda
351: TEST*/