64#if ! HAVE_DECL___BUILTIN_CLZL \
65 && (HAVE_DECL___LZCNT && SIZEOF_LONG == SIZEOF_INT \
66 || HAVE_DECL___LZCNT64 && SIZEOF_LONG == 8 && CHAR_BIT == 8)
121 ddt_list->
size_entries = size_entries = size_entries ? size_entries * 2 : 8;
126#define GROW_DDT_LIST(ddt_list) \
128 if (ddt_list->num_entries == ddt_list->size_entries) \
129 entries = grow_ddt_list(ddt_list); \
135 int num_integers, num_addresses, num_datatypes,
combiner;
136 xt_mpi_call(MPI_Type_get_envelope(*dt, &num_integers,
137 &num_addresses, &num_datatypes, &
combiner), comm);
146 int count, MPI_Datatype oldtype,
153 for (
size_t i = 0; i < num_entries; ++i)
154 if (entries[i].
combiner == MPI_COMBINER_CONTIGUOUS) {
156 = &entries[i].args.contiguous;
158 entries[i].use_count += 1;
159 dt = entries[i].cached_dt;
167 .count = count, .oldtype = oldtype,
171 .combiner = MPI_COMBINER_CONTIGUOUS,
184 int count,
int blocklength,
int stride, MPI_Datatype oldtype,
191 for (
size_t i = 0; i < num_entries; ++i)
192 if (entries[i].
combiner == MPI_COMBINER_VECTOR) {
196 entries[i].use_count += 1;
197 dt = entries[i].cached_dt;
206 .count = count, .blocklength = blocklength,
207 .stride = stride, .oldtype = oldtype,
211 .combiner = MPI_COMBINER_VECTOR,
225 int count,
int blocklength, MPI_Aint stride, MPI_Datatype oldtype,
232 for (
size_t i = 0; i < num_entries; ++i)
233 if (entries[i].
combiner == MPI_COMBINER_HVECTOR) {
237 entries[i].use_count += 1;
238 dt = entries[i].cached_dt;
247 .count = count, .blocklength = blocklength,
248 .stride = stride, .oldtype = oldtype,
252 .combiner = MPI_COMBINER_HVECTOR,
257 oldtype, &dt), comm);
266 int count,
int blocklength,
const int disp[count], MPI_Datatype oldtype,
271 size_t disp_size = (count > 0 ? (size_t)count : (size_t)0) *
sizeof (*disp);
272 uint32_t disp_hash = Xt_memcrc((
const void *)disp,
disp_size);
275 int *disp_cmp = NULL;
276 for (
size_t i = 0; i < num_entries; ++i)
277 if (entries[i].
combiner == MPI_COMBINER_INDEXED_BLOCK) {
279 = &entries[i].args.indexed_block;
284 MPI_Datatype cached_dt = entries[i].cached_dt, oldtype_;
286 disp_cmp, NULL, &oldtype_), comm);
290 entries[i].use_count += 1;
300 .count = count, .blocklength = blocklength,
301 .disp_hash = disp_hash, .oldtype = oldtype,
305 .combiner = MPI_COMBINER_INDEXED_BLOCK,
312 disp, oldtype, &dt, comm);
320 int count,
int blocklength,
const MPI_Aint disp[count], MPI_Datatype oldtype,
325 size_t count_ = count > 0 ? (size_t)count : (size_t)0,
327 uint32_t disp_hash = Xt_memcrc((
const void *)disp,
disp_size);
330 MPI_Aint *disp_cmp = NULL;
331 for (
size_t i = 0; i < num_entries; ++i)
334 = &entries[i].args.indexed_block;
338#define disp_size (disp_size + (count_ + 2) * sizeof (int))
342 MPI_Datatype cached_dt = entries[i].cached_dt, oldtype_;
347 int *icmp = (
void *)(disp_cmp + count_);
350 icmp, disp_cmp, &oldtype_), comm);
354 entries[i].use_count += 1;
364 .count = count, .blocklength = blocklength,
365 .disp_hash = disp_hash, .oldtype = oldtype,
376 disp, oldtype, &dt, comm);
385 int count,
const int blocklength[count],
const int disp[count],
386 MPI_Datatype oldtype,
MPI_Comm comm)
390 size_t asize = (count > 0 ? (size_t)count : (size_t)0) *
sizeof (int);
391 uint32_t disp_hash = Xt_memcrc((
const void *)disp, asize),
392 blocklength_hash = Xt_memcrc((
const void *)blocklength, asize);
396 for (
size_t i = 0; i < num_entries; ++i)
397 if (entries[i].
combiner == MPI_COMBINER_INDEXED) {
399 = &entries[i].args.indexed;
403 acmp =
xmalloc(2 * asize +
sizeof (
int));
404 MPI_Datatype cached_dt = entries[i].cached_dt, oldtype_;
406 acmp, NULL, &oldtype_), comm);
408 if (memcmp(blocklength, acmp+1, asize)
409 || memcmp(disp, acmp+
count+1, asize))
411 entries[i].use_count += 1;
421 .count = count, .blocklength_hash = blocklength_hash,
422 .disp_hash = disp_hash, .oldtype = oldtype,
426 .combiner = MPI_COMBINER_INDEXED,
440 int count,
const int blocklength[count],
const MPI_Aint disp[count],
441 MPI_Datatype oldtype,
MPI_Comm comm)
445 size_t count_ = count > 0 ? (size_t)count : (size_t)0,
447 blocklength_size = count_ *
sizeof (*blocklength);
448 uint32_t disp_hash = Xt_memcrc((
const void *)disp,
disp_size),
449 blocklength_hash = Xt_memcrc((
const void *)blocklength, blocklength_size);
452 MPI_Aint *disp_cmp = NULL;
453 for (
size_t i = 0; i < num_entries; ++i)
454 if (entries[i].
combiner == MPI_COMBINER_HINDEXED) {
456 = &entries[i].args.indexed;
461 MPI_Datatype cached_dt = entries[i].cached_dt, oldtype_;
464 (
void *)(disp_cmp+count_), disp_cmp, &oldtype_), comm);
466 if (memcmp(blocklength, disp_cmp+count_+1, blocklength_size)
469 entries[i].use_count += 1;
479 .count = count, .blocklength_hash = blocklength_hash,
480 .disp_hash = disp_hash, .oldtype = oldtype,
484 .combiner = MPI_COMBINER_HINDEXED,
491 disp, oldtype, &dt, comm);
500 int count,
const int blocklength[count],
501 const MPI_Aint disp[count],
502 const MPI_Datatype oldtype[count],
MPI_Comm comm)
506 size_t count_ = (count > 0 ? (size_t)count : (size_t)0),
508 blocklength_size = count_ *
sizeof (blocklength[0]),
509 oldtype_size = count_ *
sizeof (oldtype[0]);
510 uint32_t disp_hash = Xt_memcrc((
const void *)disp,
disp_size),
511 blocklength_hash = Xt_memcrc((
const void *)blocklength, blocklength_size),
512 oldtype_hash = Xt_memcrc((
const void *)oldtype, oldtype_size);
515 MPI_Aint *disp_cmp = NULL;
516 MPI_Datatype *oldtype_contents = NULL,
519 for (
size_t i = 0; i < num_entries; ++i)
520 if (entries[i].
combiner == MPI_COMBINER_STRUCT) {
522 = &entries[i].args.struct_dt;
528 +
sizeof (
int) + blocklength_size);
529 oldtype_contents = (
void *)(disp_cmp + count_);
530 icmp = (
void *)(oldtype_contents + count_);
532 MPI_Datatype cached_dt = entries[i].cached_dt;
534 icmp, disp_cmp, oldtype_contents), comm);
535 int oldtypes_mismatch = memcmp(oldtype, oldtype_cmp, oldtype_size);
536 for (
size_t j = 0; j < count_; ++j)
538 assert(icmp[0] ==
count);
539 if (!oldtypes_mismatch && !memcmp(blocklength, icmp+1, blocklength_size)
541 entries[i].use_count += 1;
546 oldtype_cmp += args->
count;
549 size_t struct_dt_size = (size_t)(oldtype_cmp - ddt_list->
struct_dt),
550 struct_dt_size_p2 =
next_2_pow(struct_dt_size),
551 struct_dt_needed = struct_dt_size + count_ + (oldtype_cmp == ddt_list->
struct_dt);
552 if (struct_dt_needed > struct_dt_size_p2) {
555 next_2_pow(struct_dt_needed) *
sizeof (*oldtype_cmp));
556 oldtype_cmp = ddt_list->
struct_dt + struct_dt_size;
558 memcpy(oldtype_cmp, oldtype, oldtype_size);
562 .count = count, .blocklength_hash = blocklength_hash,
563 .disp_hash = disp_hash, .oldtype_hash = oldtype_hash,
567 .combiner = MPI_COMBINER_STRUCT,
585 MPI_Datatype dt_ = *dt;
586 for (
size_t i = 0; i < num_entries; ++i)
591 --entries[i].use_count;
592 assert(new_use_count >= 0);
597 *dt = MPI_DATATYPE_NULL;
608 if (ddt_list) ;
else return;
611 for (
size_t i = 0; i < num_entries; ++i)
629 for (
size_t i = 0, n = ddt_list->
num_entries; i < n; ++i)
632 for (
size_t j = 0; j < nmsg; ++j)
634 goto use_count_is_fine;
636 sprintf(buf,
"%d: cache inconsistency: In-use marked datatype "
637 "encountered that is not in any mesage!\n", world_rank);
638 Xt_abort(Xt_default_comm, buf,
"xt_mpi_ddt_cache.c", __LINE__);
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
uint32_t blocklength_hash
uint32_t blocklength_hash
struct Xt_mpi_contiguous_arg_desc contiguous
struct Xt_mpi_struct_arg_desc struct_dt
struct Xt_mpi_vector_arg_desc vector
struct Xt_mpi_indexed_block_arg_desc indexed_block
union Xt_mpiddt_list_entry::@17 args
struct Xt_mpi_indexed_arg_desc indexed
struct Xt_mpi_hvector_arg_desc hvector
struct Xt_mpiddt_list_entry * entries
int MPI_Type_create_hvector(int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype)
int MPI_Type_contiguous(int count, MPI_Datatype oldtype, MPI_Datatype *newtype)
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_vector(int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype)
static size_t next_2_pow(size_t v)
#define xt_mpi_call(call, comm)
MPI_Datatype Xt_mpi_ddt_cache_acquire_hindexed(struct Xt_mpiddt_list *ddt_list, int count, const int blocklength[count], const MPI_Aint disp[count], MPI_Datatype oldtype, MPI_Comm comm)
void Xt_mpi_ddt_cache_free(struct Xt_mpiddt_list *ddt_list, MPI_Comm comm)
MPI_Datatype Xt_mpi_ddt_cache_acquire_indexed(struct Xt_mpiddt_list *ddt_list, int count, const int blocklength[count], const int disp[count], MPI_Datatype oldtype, MPI_Comm comm)
#define GROW_DDT_LIST(ddt_list)
void Xt_mpi_ddt_cache_check_retention(struct Xt_mpiddt_list *ddt_list, size_t nmsg, struct Xt_redist_msg msgs[nmsg])
MPI_Datatype Xt_mpi_ddt_cache_acquire_hvector(struct Xt_mpiddt_list *ddt_list, int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Comm comm)
static struct Xt_mpiddt_list_entry * grow_ddt_list(struct Xt_mpiddt_list *ddt_list)
MPI_Datatype Xt_mpi_ddt_cache_acquire_indexed_block(struct Xt_mpiddt_list *ddt_list, int count, int blocklength, const int disp[count], MPI_Datatype oldtype, MPI_Comm comm)
MPI_Datatype Xt_mpi_ddt_cache_acquire_vector(struct Xt_mpiddt_list *ddt_list, int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Comm comm)
MPI_Datatype Xt_mpi_ddt_cache_acquire_hindexed_block(struct Xt_mpiddt_list *ddt_list, int count, int blocklength, const MPI_Aint disp[count], MPI_Datatype oldtype, MPI_Comm comm)
MPI_Datatype Xt_mpi_ddt_cache_acquire_contiguous(struct Xt_mpiddt_list *ddt_list, int count, MPI_Datatype oldtype, MPI_Comm comm)
MPI_Datatype Xt_mpi_ddt_cache_acquire_struct(struct Xt_mpiddt_list *ddt_list, int count, const int blocklength[count], const MPI_Aint disp[count], const MPI_Datatype oldtype[count], MPI_Comm comm)
void Xt_mpi_ddt_cache_entry_release(struct Xt_mpiddt_list *ddt_list, MPI_Datatype *dt, MPI_Comm comm)
static void free_dt_unless_named(MPI_Datatype *dt, MPI_Comm comm)
#define Xt_Type_create_struct(count, array_of_blocklengths, array_of_displacements, array_of_types, newtype, comm)
@ MPI_COMBINER_HINDEXED_BLOCK
#define Xt_Type_create_indexed_block(count, blocklength, disp, oldtype, newtype, comm)
#define Xt_Type_create_hindexed_block(count, blocklength, array_of_displacements, oldtype, newtype, comm)
#define Xt_Type_indexed(count, blocklength, disp, oldtype, newtype, comm)
#define Xt_Type_create_hindexed(count, blocklength, disp, oldtype, newtype, comm)