Yet Another eXchange Tool 0.11.3
Loading...
Searching...
No Matches
xt_ddt.c
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifdef HAVE_CONFIG_H
47#include "config.h"
48#endif
49
50#include <stdbool.h>
51#include <string.h>
52#include <mpi.h>
53
54#ifdef _OPENACC
55#define STR(s) #s
56#define xt_Pragma(args) _Pragma(args)
57#define XtPragmaACC(args) xt_Pragma(STR(acc args))
58#else
59#define XtPragmaACC(args)
60#endif
61
62#include "core/core.h"
63#include "core/ppm_xfuncs.h"
64#include "xt/xt_mpi.h"
65#include "xt_ddt.h"
66#include "xt_ddt_internal.h"
67
68//static const char filename[] = "xt_ddt.c";
69
70
71static void xt_ddt_pack_8(
72 size_t count, ssize_t *restrict displs, const uint8_t *restrict src,
73 uint8_t *restrict dst, enum xt_memtype memtype);
74static void xt_ddt_pack_16(
75 size_t count, ssize_t *restrict displs, const uint16_t *restrict src,
76 uint16_t *restrict dst, enum xt_memtype memtype);
77static void xt_ddt_pack_32(
78 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
79 uint32_t *restrict dst, enum xt_memtype memtype);
80static void xt_ddt_pack_32_2(
81 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
82 uint32_t *restrict dst, enum xt_memtype memtype);
83static void xt_ddt_pack_96(
84 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
85 uint32_t *restrict dst, enum xt_memtype memtype);
86static void xt_ddt_pack_64(
87 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
88 uint64_t *restrict dst, enum xt_memtype memtype);
89static void xt_ddt_pack_128(
90 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
91 uint64_t *restrict dst, enum xt_memtype memtype);
92static void xt_ddt_pack_160(
93 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
94 uint32_t *restrict dst, enum xt_memtype memtype);
95static void xt_ddt_pack_256(
96 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
97 uint64_t (*restrict dst)[4], enum xt_memtype memtype);
98
99static void xt_ddt_unpack_8(
100 size_t count, ssize_t *restrict displs, const uint8_t *restrict src,
101 uint8_t *restrict dst, enum xt_memtype memtype);
102static void xt_ddt_unpack_16(
103 size_t count, ssize_t *restrict displs, const uint16_t *restrict src,
104 uint16_t *restrict dst, enum xt_memtype memtype);
105static void xt_ddt_unpack_32(
106 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
107 uint32_t *restrict dst, enum xt_memtype memtype);
108static void xt_ddt_unpack_32_2(
109 size_t count, ssize_t *restrict displs, const uint32_t (*restrict src)[2],
110 uint32_t *restrict dst, enum xt_memtype memtype);
111static void xt_ddt_unpack_96(
112 size_t count, ssize_t *restrict displs, const uint32_t (*restrict src)[3],
113 uint32_t *restrict dst, enum xt_memtype memtype);
114static void xt_ddt_unpack_64(
115 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
116 uint64_t *restrict dst, enum xt_memtype memtype);
117static void xt_ddt_unpack_128(
118 size_t count, ssize_t *restrict displs, const uint64_t (*restrict src)[2],
119 uint64_t *restrict dst, enum xt_memtype memtype);
120static void xt_ddt_unpack_160(
121 size_t count, ssize_t *restrict displs, const uint32_t (*restrict src)[5],
122 uint32_t *restrict dst, enum xt_memtype memtype);
123static void xt_ddt_unpack_256(
124 size_t count, ssize_t *restrict displs, const uint64_t (*restrict src)[4],
125 uint64_t *restrict dst, enum xt_memtype memtype);
126
128 {.base_pack_size = 1,
129 .element_size = 1,
132 {.base_pack_size = 2,
133 .element_size = 2,
136 {.base_pack_size = 4,
137 .element_size = 4,
140 {.base_pack_size = 4,
141 .element_size = 8,
144 {.base_pack_size = 8,
145 .element_size = 8,
148 {.base_pack_size = 4,
149 .element_size = 12,
152 {.base_pack_size = 8,
153 .element_size = 16,
156 {.base_pack_size = 4,
157 .element_size = 20,
160 {.base_pack_size = 8,
161 .element_size = 32,
164};
165
167
168 return (ddt == NULL)?0:(ddt->pack_size);
169}
170
171size_t xt_ddt_get_pack_size(MPI_Datatype mpi_ddt) {
172
174}
175
176static void xt_ddt_copy_displs(Xt_ddt ddt, enum xt_memtype memtype) {
177
178 // count total number of displacements
179 size_t total_displs_size = 0, count = ddt->count;
180 for (size_t i = 0; i < count; ++i)
181 total_displs_size += ddt->data[i].displ_count;
182
183 // allocate displacements in specified memory type
184 ssize_t *displs;
185 size_t buffer_size = total_displs_size * sizeof(*displs);
186 displs = xt_gpu_malloc(buffer_size, memtype);
187
188 // copy displacements from host to specified memory type
190 displs, ddt->data[0].displs[XT_MEMTYPE_HOST],
191 buffer_size, memtype, XT_MEMTYPE_HOST);
192
193 // set displacements for all data entries
194 for (size_t i = 0, offset = 0; i < count; ++i) {
195 ddt->data[i].displs[memtype] = displs + offset;
196 offset += ddt->data[i].displ_count;
197 }
198
199 ddt->displs_available[memtype] = 1;
200}
201
202#define add_rhs_byte_displ(rtype,ptr,disp) \
203 ((const rtype *)(const void *)((const unsigned char *)(ptr) + (disp)))
204
205static void xt_ddt_pack_8(
206 size_t count, ssize_t *restrict displs, const uint8_t *restrict src,
207 uint8_t *restrict dst, enum xt_memtype memtype) {
208#ifndef _OPENACC
209 (void)memtype;
210#endif
212 parallel loop independent deviceptr(src, dst, displs)
213 if (memtype != XT_MEMTYPE_HOST))
214 for (size_t i = 0; i < count; ++i)
215 dst[i] = *add_rhs_byte_displ(uint8_t, src, displs[i]);
216}
217
218static void xt_ddt_pack_16(
219 size_t count, ssize_t *restrict displs, const uint16_t *restrict src,
220 uint16_t *restrict dst, enum xt_memtype memtype) {
221#ifndef _OPENACC
222 (void)memtype;
223#endif
225 parallel loop independent deviceptr(src, dst, displs)
226 if (memtype != XT_MEMTYPE_HOST))
227 for (size_t i = 0; i < count; ++i)
228 dst[i] = *add_rhs_byte_displ(uint16_t, src, + displs[i]);
229}
230
231static void xt_ddt_pack_32(
232 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
233 uint32_t *restrict dst, enum xt_memtype memtype) {
234#ifndef _OPENACC
235 (void)memtype;
236#endif
238 parallel loop independent deviceptr(src, dst, displs)
239 if (memtype != XT_MEMTYPE_HOST))
240 for (size_t i = 0; i < count; ++i)
241 dst[i] = *add_rhs_byte_displ(uint32_t, src, + displs[i]);
242}
243
245 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
246 uint32_t *restrict dst, enum xt_memtype memtype) {
247 uint32_t (*restrict dst_)[2] = (uint32_t(*)[2])dst;
248#ifndef _OPENACC
249 (void)memtype;
250#endif
252 parallel loop independent deviceptr(src, dst_, displs)
253 if (memtype != XT_MEMTYPE_HOST))
254 for (size_t i = 0; i < count; ++i) {
255 const uint32_t *src_32 = add_rhs_byte_displ(uint32_t, src, displs[i]);
256XtPragmaACC(loop independent)
257 for (int j = 0; j < 2; ++j) dst_[i][j] = src_32[j];
258 }
259}
260
261static void xt_ddt_pack_96(
262 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
263 uint32_t *restrict dst, enum xt_memtype memtype) {
264 uint32_t (*restrict dst_)[3] = (uint32_t(*)[3])dst;
265#ifndef _OPENACC
266 (void)memtype;
267#endif
269 parallel loop independent deviceptr(src, dst_, displs)
270 if (memtype != XT_MEMTYPE_HOST))
271 for (size_t i = 0; i < count; ++i) {
272 const uint32_t *src_32 = add_rhs_byte_displ(uint32_t, src, displs[i]);
273XtPragmaACC(loop independent)
274 for (int j = 0; j < 3; ++j) dst_[i][j] = src_32[j];
275 }
276}
277
278static void xt_ddt_pack_64(
279 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
280 uint64_t *restrict dst, enum xt_memtype memtype) {
281#ifndef _OPENACC
282 (void)memtype;
283#endif
285 parallel loop independent deviceptr(src, dst, displs)
286 if (memtype != XT_MEMTYPE_HOST))
287 for (size_t i = 0; i < count; ++i)
288 dst[i] = *add_rhs_byte_displ(uint64_t, src, displs[i]);
289}
290
291static void xt_ddt_pack_128(
292 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
293 uint64_t *restrict dst, enum xt_memtype memtype) {
294 uint64_t (*restrict dst_)[2] = (uint64_t(*)[2])dst;
295#ifndef _OPENACC
296 (void)memtype;
297#endif
299 parallel loop independent deviceptr(src, dst_, displs)
300 if (memtype != XT_MEMTYPE_HOST))
301 for (size_t i = 0; i < count; ++i) {
302 const uint64_t *src_64 = add_rhs_byte_displ(uint64_t, src, displs[i]);
303XtPragmaACC(loop independent)
304 for (int j = 0; j < 2; ++j) dst_[i][j] = src_64[j];
305 }
306}
307
308static void xt_ddt_pack_160(
309 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
310 uint32_t *restrict dst, enum xt_memtype memtype) {
311 uint32_t (*restrict dst_)[5] = (uint32_t(*)[5])dst;
312#ifndef _OPENACC
313 (void)memtype;
314#endif
316 parallel loop independent deviceptr(src, dst_, displs)
317 if (memtype != XT_MEMTYPE_HOST))
318 for (size_t i = 0; i < count; ++i) {
319 const uint32_t *src_32 = add_rhs_byte_displ(uint32_t, src, displs[i]);
320XtPragmaACC(loop independent)
321 for (int j = 0; j < 5; ++j) dst_[i][j] = src_32[j];
322 }
323}
324
325static void xt_ddt_pack_256(
326 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
327 uint64_t (*restrict dst)[4], enum xt_memtype memtype) {
328#ifndef _OPENACC
329 (void)memtype;
330#endif
332 parallel loop independent deviceptr(src, dst, displs)
333 if (memtype != XT_MEMTYPE_HOST))
334 for (size_t i = 0; i < count; ++i) {
335 const uint64_t *src_64 = add_rhs_byte_displ(uint64_t, src, displs[i]);
336XtPragmaACC(loop independent)
337 for (int j = 0; j < 4; ++j) dst[i][j] = src_64[j];
338 }
339}
340
342 Xt_ddt ddt, const void *src, void *dst, enum xt_memtype memtype) {
343
345
346 size_t dst_offset = 0;
347
348 // if the displacements are not avaible in the required memory type
349 if (!ddt->displs_available[memtype]) xt_ddt_copy_displs(ddt, memtype);
350
351 size_t count = ddt->count;
352
353 // for all sections with the same elemental datatype extent
354 for (size_t i = 0; i < count; ++i) {
355
356 struct xt_ddt_kernels * kernel =
358 size_t displ_count = ddt->data[i].displ_count;
359 kernel->pack(
360 displ_count, ddt->data[i].displs[memtype], src,
361 (unsigned char *)dst + dst_offset, memtype);
362
363 dst_offset += displ_count * kernel->element_size;
364 }
366}
367
368void xt_ddt_pack(MPI_Datatype mpi_ddt, const void *src, void *dst) {
369
371 XT_GPU_INSTR_PUSH(xt_ddt_pack:initialise);
372
373 enum xt_memtype src_memtype = xt_gpu_get_memtype(src);
374 enum xt_memtype dst_memtype = xt_gpu_get_memtype(dst);
375
376 size_t pack_size;
377 void *orig_dst;
378 /* pacify buggy -Wmaybe-uninitialized */
379#if defined __GNUC__ && __GNUC__ <= 11
380 pack_size = 0;
381 orig_dst = NULL;
382#endif
383
384 // if the source and destination are in different memory types
385 if (src_memtype != dst_memtype) {
386 pack_size = xt_ddt_get_pack_size(mpi_ddt);
387 orig_dst = dst;
388 dst = xt_gpu_malloc(pack_size, src_memtype);
389 }
390
391 XT_GPU_INSTR_POP; //xt_ddt_pack:initialise
392
394 xt_ddt_from_mpi_ddt(mpi_ddt), src, dst, src_memtype);
395
397
398 // if the source and destination are in different memory types
399 if (src_memtype != dst_memtype) {
400 xt_gpu_memcpy(orig_dst, dst, pack_size, dst_memtype, src_memtype);
401 xt_gpu_free(dst, src_memtype);
402 }
403
404 XT_GPU_INSTR_POP; // xt_ddt_pack:finalise
405 XT_GPU_INSTR_POP; // xt_ddt_pack
406}
407
408static void xt_ddt_unpack_8(
409 size_t count, ssize_t *restrict displs, const uint8_t *restrict src,
410 uint8_t *restrict dst, enum xt_memtype memtype) {
411#ifndef _OPENACC
412 (void)memtype;
413#endif
415 parallel loop independent deviceptr(src, dst, displs)
416 if (memtype != XT_MEMTYPE_HOST))
417 for (size_t i = 0; i < count; ++i)
418 dst[displs[i]] = src[i];
419}
420
421
423 size_t count, ssize_t *restrict displs, const uint16_t *restrict src,
424 uint16_t *restrict dst, enum xt_memtype memtype) {
425#ifndef _OPENACC
426 (void)memtype;
427#endif
429 parallel loop independent deviceptr(src, dst, displs)
430 if (memtype != XT_MEMTYPE_HOST))
431 for (size_t i = 0; i < count; ++i) {
432 uint16_t *dst_ = (void *)((unsigned char *)dst + displs[i]);
433 dst_[0] = src[i];
434 }
435}
436
438 size_t count, ssize_t *restrict displs, const uint32_t *restrict src,
439 uint32_t *restrict dst, enum xt_memtype memtype) {
440#ifndef _OPENACC
441 (void)memtype;
442#endif
444 parallel loop independent deviceptr(src, dst, displs)
445 if (memtype != XT_MEMTYPE_HOST))
446 for (size_t i = 0; i < count; ++i) {
447 uint32_t *dst_ = (void *)((unsigned char *)dst + displs[i]);
448 dst_[0] = src[i];
449 }
450}
451
453 size_t count, ssize_t *restrict displs, const uint32_t (*restrict src)[2],
454 uint32_t *restrict dst, enum xt_memtype memtype) {
455#ifndef _OPENACC
456 (void)memtype;
457#endif
459 parallel loop independent deviceptr(src, dst, displs)
460 if (memtype != XT_MEMTYPE_HOST))
461 for (size_t i = 0; i < count; ++i) {
462 uint32_t *dst_32 = (void *)((unsigned char *)dst + displs[i]);
463 dst_32[0] = src[i][0];
464 dst_32[1] = src[i][1];
465 }
466}
467
469 size_t count, ssize_t *restrict displs, const uint32_t (*restrict src)[3],
470 uint32_t *restrict dst, enum xt_memtype memtype) {
471#ifndef _OPENACC
472 (void)memtype;
473#endif
475 parallel loop independent deviceptr(src, dst, displs)
476 if (memtype != XT_MEMTYPE_HOST))
477 for (size_t i = 0; i < count; ++i) {
478 uint32_t *dst_32 = (void *)((unsigned char *)dst + displs[i]);
479 dst_32[0] = src[i][0];
480 dst_32[1] = src[i][1];
481 dst_32[2] = src[i][2];
482 }
483}
484
486 size_t count, ssize_t *restrict displs, const uint64_t *restrict src,
487 uint64_t *restrict dst, enum xt_memtype memtype) {
488#ifndef _OPENACC
489 (void)memtype;
490#endif
492 parallel loop independent deviceptr(src, dst, displs)
493 if (memtype != XT_MEMTYPE_HOST))
494 for (size_t i = 0; i < count; ++i) {
495 uint64_t *dst_ = (void *)((unsigned char *)dst + displs[i]);
496 dst_[0] = src[i];
497 }
498}
499
501 size_t count, ssize_t *restrict displs, const uint64_t (*restrict src)[2],
502 uint64_t *restrict dst, enum xt_memtype memtype) {
503#ifndef _OPENACC
504 (void)memtype;
505#endif
507 parallel loop independent deviceptr(src, dst, displs)
508 if (memtype != XT_MEMTYPE_HOST))
509 for (size_t i = 0; i < count; ++i) {
510 uint64_t *dst_64 = (void *)((unsigned char *)dst + displs[i]);
511 dst_64[0] = src[i][0];
512 dst_64[1] = src[i][1];
513 }
514}
515
517 size_t count, ssize_t *restrict displs, const uint32_t (*restrict src)[5],
518 uint32_t *restrict dst, enum xt_memtype memtype) {
519#ifndef _OPENACC
520 (void)memtype;
521#endif
523 parallel loop independent deviceptr(src, dst, displs)
524 if (memtype != XT_MEMTYPE_HOST))
525 for (size_t i = 0; i < count; ++i) {
526 uint32_t *dst_32 = (void *)((unsigned char *)dst + displs[i]);
527 dst_32[0] = src[i][0];
528 dst_32[1] = src[i][1];
529 dst_32[2] = src[i][2];
530 dst_32[3] = src[i][3];
531 dst_32[4] = src[i][4];
532 }
533}
534
536 size_t count, ssize_t *restrict displs, const uint64_t (*restrict src)[4],
537 uint64_t *restrict dst, enum xt_memtype memtype) {
538#ifndef _OPENACC
539 (void)memtype;
540#endif
542 parallel loop independent deviceptr(src, dst, displs)
543 if (memtype != XT_MEMTYPE_HOST))
544 for (size_t i = 0; i < count; ++i) {
545 uint64_t *dst_64 = (void *)((unsigned char *)dst + displs[i]);
546 dst_64[0] = src[i][0];
547 dst_64[1] = src[i][1];
548 dst_64[2] = src[i][2];
549 dst_64[3] = src[i][3];
550 }
551}
552
554 Xt_ddt ddt, const void *src, void *dst, enum xt_memtype memtype) {
555
557
558 size_t src_offset = 0;
559
560 // if the displacements are not avaible in the required memory type
561 if (!ddt->displs_available[memtype]) xt_ddt_copy_displs(ddt, memtype);
562
563 size_t count = ddt->count;
564
565 // for all sections with the same elemental datatype extent
566 for (size_t i = 0; i < count; ++i) {
567
568 struct xt_ddt_kernels * kernel =
570 size_t displ_count = ddt->data[i].displ_count;
571 kernel->unpack(
572 displ_count, ddt->data[i].displs[memtype],
573 add_rhs_byte_displ(void, src, src_offset), dst, memtype);
574
575 src_offset += displ_count * kernel->element_size;
576 }
578}
579
580void xt_ddt_unpack(MPI_Datatype mpi_ddt, const void *src, void *dst) {
581
584
585 enum xt_memtype src_memtype = xt_gpu_get_memtype(src);
586 enum xt_memtype dst_memtype = xt_gpu_get_memtype(dst);
587
588 void *src__ = NULL;
589 const void *src_;
590 // if the source and destination are in different memory types
591 if (src_memtype != dst_memtype) {
592 size_t pack_size = xt_ddt_get_pack_size(mpi_ddt);
593 src__ = xt_gpu_malloc(pack_size, dst_memtype);
594 xt_gpu_memcpy(src__, src, pack_size, dst_memtype, src_memtype);
595 src_ = src__;
596 } else
597 src_ = src;
598
599 XT_GPU_INSTR_POP; // xt_ddt_unpack:initialise
600
602 xt_ddt_from_mpi_ddt(mpi_ddt), src_, dst, dst_memtype);
603
605
606 // if the source and destination are in different memory types
607 if (src_memtype != dst_memtype) xt_gpu_free(src__, dst_memtype);
608
609 XT_GPU_INSTR_POP; // xt_ddt_unpack:finalise
610 XT_GPU_INSTR_POP; // xt_ddt_unpack
611}
612
613/*
614 * Local Variables:
615 * c-basic-offset: 2
616 * coding: utf-8
617 * indent-tabs-mode: nil
618 * show-trailing-whitespace: t
619 * require-trailing-newline: t
620 * End:
621 */
add versions of standard API functions not returning on error
size_t count
size_t pack_size
int displs_available[XT_MEMTYPE_COUNT]
struct xt_ddt_data data[]
ssize_t * displs[XT_MEMTYPE_COUNT]
xt_ddt_kernel_func pack
xt_ddt_kernel_func unpack
static void xt_ddt_unpack_96(size_t count, ssize_t *restrict displs, const uint32_t(*restrict src)[3], uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:468
static void xt_ddt_unpack_64(size_t count, ssize_t *restrict displs, const uint64_t *restrict src, uint64_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:485
static void xt_ddt_pack_16(size_t count, ssize_t *restrict displs, const uint16_t *restrict src, uint16_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:218
static void xt_ddt_pack_256(size_t count, ssize_t *restrict displs, const uint64_t *restrict src, uint64_t(*restrict dst)[4], enum xt_memtype memtype)
Definition xt_ddt.c:325
static void xt_ddt_pack_32_2(size_t count, ssize_t *restrict displs, const uint32_t *restrict src, uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:244
static void xt_ddt_pack_160(size_t count, ssize_t *restrict displs, const uint32_t *restrict src, uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:308
static void xt_ddt_pack_96(size_t count, ssize_t *restrict displs, const uint32_t *restrict src, uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:261
void xt_ddt_unpack_internal(Xt_ddt ddt, const void *src, void *dst, enum xt_memtype memtype)
Definition xt_ddt.c:553
void xt_ddt_unpack(MPI_Datatype mpi_ddt, const void *src, void *dst)
Definition xt_ddt.c:580
static void xt_ddt_pack_128(size_t count, ssize_t *restrict displs, const uint64_t *restrict src, uint64_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:291
size_t xt_ddt_get_pack_size_internal(Xt_ddt ddt)
Definition xt_ddt.c:166
void xt_ddt_pack_internal(Xt_ddt ddt, const void *src, void *dst, enum xt_memtype memtype)
Definition xt_ddt.c:341
static void xt_ddt_unpack_32(size_t count, ssize_t *restrict displs, const uint32_t *restrict src, uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:437
#define XtPragmaACC(args)
Definition xt_ddt.c:59
size_t xt_ddt_get_pack_size(MPI_Datatype mpi_ddt)
Definition xt_ddt.c:171
#define add_rhs_byte_displ(rtype, ptr, disp)
Definition xt_ddt.c:202
static void xt_ddt_unpack_256(size_t count, ssize_t *restrict displs, const uint64_t(*restrict src)[4], uint64_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:535
static void xt_ddt_pack_32(size_t count, ssize_t *restrict displs, const uint32_t *restrict src, uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:231
struct xt_ddt_kernels xt_ddt_valid_kernels[]
Definition xt_ddt.c:127
static void xt_ddt_unpack_32_2(size_t count, ssize_t *restrict displs, const uint32_t(*restrict src)[2], uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:452
void xt_ddt_pack(MPI_Datatype mpi_ddt, const void *src, void *dst)
Definition xt_ddt.c:368
static void xt_ddt_pack_64(size_t count, ssize_t *restrict displs, const uint64_t *restrict src, uint64_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:278
static void xt_ddt_copy_displs(Xt_ddt ddt, enum xt_memtype memtype)
Definition xt_ddt.c:176
static void xt_ddt_unpack_160(size_t count, ssize_t *restrict displs, const uint32_t(*restrict src)[5], uint32_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:516
static void xt_ddt_pack_8(size_t count, ssize_t *restrict displs, const uint8_t *restrict src, uint8_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:205
static void xt_ddt_unpack_8(size_t count, ssize_t *restrict displs, const uint8_t *restrict src, uint8_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:408
static void xt_ddt_unpack_16(size_t count, ssize_t *restrict displs, const uint16_t *restrict src, uint16_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:422
static void xt_ddt_unpack_128(size_t count, ssize_t *restrict displs, const uint64_t(*restrict src)[2], uint64_t *restrict dst, enum xt_memtype memtype)
Definition xt_ddt.c:500
utility routines for manual handling of MPI DDT's
struct Xt_ddt_ * Xt_ddt
Definition xt_ddt.h:62
Xt_ddt xt_ddt_from_mpi_ddt(MPI_Datatype mpi_ddt)
internal utility routines for manual handling of MPI DDT's
void(* xt_ddt_kernel_func)(size_t, ssize_t *, const void *, void *, enum xt_memtype)
void * xt_gpu_malloc(size_t alloc_size, enum xt_memtype memtype)
Definition xt_gpu.c:183
enum xt_memtype xt_gpu_get_memtype(const void *ptr)
Definition xt_gpu.c:197
void xt_gpu_memcpy(void *dst, void const *src, size_t buffer_size, enum xt_memtype dst_memtype, enum xt_memtype src_memtype)
Definition xt_gpu.c:191
void xt_gpu_free(void *ptr, enum xt_memtype memtype)
Definition xt_gpu.c:187
#define XT_GPU_INSTR_POP
Definition xt_gpu.h:60
xt_memtype
Definition xt_gpu.h:68
@ XT_MEMTYPE_HOST
Definition xt_gpu.h:69
#define XT_GPU_INSTR_PUSH(arg)
Definition xt_gpu.h:59
static const idxlist_unpack unpack[]
utility routines for MPI