67#if __GNUC__ >= 11 && __GNUC__ <= 13 && defined MPICH
68#pragma GCC diagnostic push
69#pragma GCC diagnostic ignored "-Wstringop-overread"
70#pragma GCC diagnostic ignored "-Wstringop-overflow"
82 for (
int i = 0; i < nrecv; ++i) {
84 xt_mpi_call(MPI_Pack_size(1, recv_msgs[i].datatype, comm, &buf_size),
86 accum += (size_t)buf_size;
88 for (
int i = 0; i < nsend; ++i) {
89 buf_ofs[nrecv+i] = accum;
90 xt_mpi_call(MPI_Pack_size(1, send_msgs[i].datatype, comm,
92 accum += (size_t)buf_size;
94 buf_ofs[nsend+nrecv] = accum;
106 for (
int i = 0; i < nrecv; ++i) {
107 xt_mpi_call(MPI_Pack_size(1, recv_msgs[i].datatype, comm, &buf_size),
109 accum += (size_t)buf_size;
111 for (
int i = 0; i < nsend; ++i) {
112 xt_mpi_call(MPI_Pack_size(1, send_msgs[i].datatype, comm,
114 accum += (size_t)buf_size;
123 const size_t *buf_ofs,
124 const void *src_data,
int nsend,
int nrecv,
128 MPI_Request *requests)
130 for (
int i = 0; i < nrecv; ++i) {
131 int recv_size = (int)(buf_ofs[i+1] - buf_ofs[i]);
132 xt_mpi_call(MPI_Irecv(buffer + buf_ofs[i], recv_size, MPI_PACKED,
138 for (
int i = 0; i < nsend; ++i) {
140 int buf_size = (int)(buf_ofs[send_start+i+1] - buf_ofs[send_start+i]);
141 xt_mpi_call(MPI_Pack(CAST_MPI_SEND_BUF(src_data), 1, send_msgs[i].datatype,
142 buffer + buf_ofs[send_start+i], buf_size, &position,
144 xt_mpi_call(MPI_Isend(buffer + buf_ofs[send_start+i], position, MPI_PACKED,
147 requests+nrecv+i), comm);
155 const void *src_data,
void *dst_data,
156 int nsend,
int nrecv,
163 size_t num_tx = (size_t)nrecv + (
size_t)nsend;
165 requests = requests_auto;
166 buf_ofs = buf_ofs_auto;
168 requests =
xmalloc((num_tx+(num_tx&1)) *
sizeof (*requests)
169 + (num_tx+1) *
sizeof (*buf_ofs));
170 buf_ofs = (
void *)(requests + (num_tx+(num_tx&1)));
174 send_msgs, recv_msgs, comm);
175 unsigned char *buffer =
xmalloc(buffer_size);
178 src_data, nsend, nrecv,
179 send_msgs, recv_msgs,
183 xt_mpi_call(MPI_Waitall(nrecv + nsend, requests, MPI_STATUSES_IGNORE), comm);
185 for (
int i = 0; i < nrecv; ++i) {
186 int position = 0, recv_size = (int)(buf_ofs[i+1]-buf_ofs[i]);
187 xt_mpi_call(MPI_Unpack(buffer + buf_ofs[i], recv_size, &position, dst_data,
188 1, recv_msgs[i].datatype, comm), comm);
198xt_exchanger_irecv_isend_packed_s_exchange_omp(
199 const void *src_data,
void *dst_data,
200 int nsend,
int nrecv,
207 size_t num_tx = (size_t)nrecv + (
size_t)nsend;
209 requests = requests_auto;
210 buf_ofs = buf_ofs_auto;
212 requests =
xmalloc((num_tx+(num_tx&1)) *
sizeof (*requests) + (num_tx+1) *
sizeof (*buf_ofs));
213 buf_ofs = (
size_t *)(requests + (num_tx+(num_tx&1)));
217 send_msgs, recv_msgs, comm);
218 unsigned char *buffer =
xmalloc(buffer_size);
222 int num_threads = omp_get_num_threads(),
223 tid = omp_get_thread_num();
224 int start_send = (nsend * tid) / num_threads,
225 nsend_ = (nsend * (tid+1)) / num_threads - start_send,
226 start_recv = (nrecv * tid) / num_threads,
227 end_recv = (nrecv * (tid+1)) / num_threads,
228 nrecv_ = end_recv - start_recv,
229 nreq = nrecv_+nsend_,
230 start_req = start_send+start_recv;
233 src_data, nsend_, nrecv_,
234 send_msgs+start_send, recv_msgs+start_recv,
238 xt_mpi_call(MPI_Waitall(nreq, requests+start_req, MPI_STATUSES_IGNORE),
240 for (
int i = start_recv; i < end_recv; ++i) {
241 int position = 0, recv_size = (int)(buf_ofs[i+1]-buf_ofs[i]);
242 xt_mpi_call(MPI_Unpack(buffer + buf_ofs[i], recv_size, &position,
243 dst_data, 1, recv_msgs[i].datatype, comm), comm);
276 MPI_Datatype *datatypes = (MPI_Datatype *)
277 (
void *)((
unsigned char *)(header+1) +
sizeof (
size_t) * ((
size_t)
nrecv+1));
279 size_t *buf_ofs = (
void *)(header+1);
280 for (
int i = 0; i <
nrecv; ++i) {
281 int position = 0, buffer_size = (int)(buf_ofs[i+1]-buf_ofs[i]);
282 xt_mpi_call(MPI_Unpack((
unsigned char *)buf + buf_ofs[i], buffer_size,
284 1, datatypes[i], comm), comm);
286 for (
int i = 0; i <
nrecv; ++i)
300 size_t num_tx = (size_t)
nrecv + (
size_t)
nsend;
302 buf_ofs = buf_ofs_auto;
304 buf_ofs =
xmalloc((num_tx+1) *
sizeof (*buf_ofs));
307 send_msgs, recv_msgs, comm);
308 size_t inventory_size
310 + sizeof (size_t) * ((size_t)
nrecv+1)
311 +
sizeof (MPI_Datatype) * (size_t)
nrecv;
316 inventory_size + buffer_size, &conf);
319 MPI_Request *tmp_requests
322 unsigned char *buffer
326 src_data, nsend, nrecv,
327 send_msgs, recv_msgs,
332 for (
int i = 0; i <= nrecv; ++i) {
333 buf_ofs_[i] = buf_ofs[i]+inventory_size;
341 MPI_Datatype *datatypes = (
void *)(buffer +
sizeof (
struct inventory_header)
342 +
sizeof (
size_t) * ((
size_t)
nrecv+1));
343 for (
int i = 0; i <
nrecv; ++i)
353finalize_packed_a_exchange_mt(
Xt_request request,
void *buf)
360 MPI_Datatype *datatypes = (MPI_Datatype *)
361 ((
unsigned char *)(header+1) +
sizeof (
size_t) * ((
size_t)
nrecv+1));
363 size_t *buf_ofs = (
void *)(header+1);
364 int num_threads = omp_get_num_threads(),
365 tid = omp_get_thread_num();
366 int start_recv = (
nrecv * tid) / num_threads,
367 end_recv = (
nrecv * (tid+1)) / num_threads;
368 for (
int i = start_recv; i < end_recv; ++i) {
369 int position = 0, buffer_size = (int)(buf_ofs[i+1]-buf_ofs[i]);
370 xt_mpi_call(MPI_Unpack((
unsigned char *)buf + buf_ofs[i], buffer_size,
372 1, datatypes[i], comm), comm);
374 for (
int i = start_recv; i < end_recv; ++i)
379xt_exchanger_irecv_isend_packed_a_exchange_omp(
const void *src_data,
void *
dst_data,
388 size_t num_tx = (size_t)
nrecv + (
size_t)
nsend;
390 buf_ofs = buf_ofs_auto;
392 buf_ofs =
xmalloc((num_tx+1) *
sizeof (*buf_ofs));
395 send_msgs, recv_msgs, comm);
396 size_t inventory_size
398 + sizeof (size_t) * ((size_t)
nrecv+1)
399 +
sizeof (MPI_Datatype) * (size_t)
nrecv;
404 inventory_size + buffer_size, &conf);
406 requests, finalize_packed_a_exchange_mt);
407#pragma omp parallel firstprivate(requests, inventory_size)
409 MPI_Request *prequests
412 unsigned char *buffer
415 int num_threads = omp_get_num_threads(),
416 tid = omp_get_thread_num();
417 int start_send = (nsend * tid) / num_threads,
418 nsend_ = (nsend * (tid+1)) / num_threads - start_send,
419 start_recv = (nrecv * tid) / num_threads,
420 end_recv = (nrecv * (tid+1)) / num_threads,
421 nrecv_ = end_recv - start_recv,
422 start_req = start_send+start_recv;
425 start_send+nrecv-start_recv,
427 src_data, nsend_, nrecv_,
428 send_msgs+start_send, recv_msgs+start_recv,
430 prequests+start_req);
433 for (
int i = start_recv; i < end_recv+(end_recv==nrecv); ++i) {
434 buf_ofs_[i] = buf_ofs[i]+inventory_size;
443 MPI_Datatype *datatypes = (
void *)(buffer +
sizeof (
struct inventory_header)
444 +
sizeof (
size_t) * ((
size_t)
nrecv+1));
445 for (
int i = start_recv; i < end_recv; ++i)
463 size_t inventory_size
465 + sizeof (size_t) * ((size_t)
nrecv+1)
466 +
sizeof (MPI_Datatype) * (size_t)
nrecv;
471 inventory_size + buf_size, &conf);
488 s_exch_by_mthread_mode[] = {
491 xt_exchanger_irecv_isend_packed_s_exchange_omp,
497 a_exch_by_mthread_mode[] = {
500 xt_exchanger_irecv_isend_packed_a_exchange_omp,
507 nsend, nrecv, send_msgs, recv_msgs,
509 s_exch_by_mthread_mode[mthread_mode],
510 a_exch_by_mthread_mode[mthread_mode],
add versions of standard API functions not returning on error
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_dup(MPI_Datatype oldtype, MPI_Datatype *newtype)
struct Xt_config_ xt_default_config
void xt_config_set_redist_mthread_mode(Xt_config config, int mode)
struct Xt_config_ * Xt_config
int xt_config_get_redist_mthread_mode(Xt_config config)
implementation of configuration object
struct Xt_exchanger_omp_share_ * Xt_exchanger_omp_share
struct Xt_exchanger_ * Xt_exchanger
static void xt_exchanger_irecv_isend_packed_a_exchange(const void *src_data, void *dst_data, int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, int tag_offset, MPI_Comm comm, Xt_request *request)
static size_t get_buffer_offsets(size_t *restrict buf_ofs, int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, MPI_Comm comm)
static size_t get_buffer_size(int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, MPI_Comm comm)
static void start_packed_transfer(unsigned char *buffer, int send_start, const size_t *buf_ofs, const void *src_data, int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, MPI_Comm comm, int tag_offset, MPI_Request *requests)
static Xt_exchanger_omp_share xt_exchanger_irecv_isend_packed_create_omp_share(int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, MPI_Comm comm)
static void finalize_packed_a_exchange(Xt_request request, void *buf)
static void xt_exchanger_irecv_isend_packed_s_exchange(const void *src_data, void *dst_data, int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, int tag_offset, MPI_Comm comm)
Xt_exchanger xt_exchanger_irecv_isend_packed_new(int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, MPI_Comm comm, int tag_offset, Xt_config config)
Xt_exchanger xt_exchanger_simple_base_new(int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, MPI_Comm comm, int tag_offset, xt_simple_s_exchange_func s_func, xt_simple_a_exchange_func a_func, xt_simple_create_omp_share_func create_omp_share_func, Xt_config config)
void(* xt_simple_s_exchange_func)(const void *src_data, void *dst_data, int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, int tag_offset, MPI_Comm comm)
void(* xt_simple_a_exchange_func)(const void *src_data, void *dst_data, int nsend, int nrecv, const struct Xt_redist_msg *send_msgs, const struct Xt_redist_msg *recv_msgs, int tag_offset, MPI_Comm comm, Xt_request *request)
#define xt_mpi_call(call, comm)
@ xt_mpi_tag_exchange_msg
redistribution of data, non-public declarations
struct Xt_request_ * Xt_request
void xt_request_msgs_ebuf_set_finalizer(Xt_request request, Xt_request_msgs_ebuf_finalizer finalizer)
MPI_Comm xt_request_msgs_ebuf_get_comm(Xt_request request)
Xt_request xt_request_msgs_ebuf_alloc(int n_requests, MPI_Comm comm, size_t extra_buf_size, Xt_config config)
MPI_Request * xt_request_msgs_ebuf_get_req_ptr(Xt_request request)
void * xt_request_msgs_ebuf_get_extra_buf(Xt_request request)
functions to create collection of request handles augmented with user-defined buffer
internal interfaces for xt_request_msgs_ebuf