M4RI 20250128
ple_russian_template.h
1#include <m4ri/misc.h>
2
3void __M4RI_TEMPLATE_NAME(_mzd_process_rows_ple)(mzd_t *M, rci_t startrow, rci_t stoprow,
4 rci_t startcol, int const k[N],
5 const ple_table_t *table[N]) {
6 assert(1 <= N && N <= 8);
7
8 const mzd_t *T[N];
9 const rci_t *E[N];
10 const word *B[N];
11 word bm[N];
12 int sh[N];
13 int x[N];
14 const word *t[N];
15
16 switch (N) { /* we rely on the compiler to optimise this switch away, it reads nicer than #if */
17 case 8:
18 T[7] = table[7]->T;
19 E[7] = table[7]->E;
20 B[7] = table[7]->B;
21 bm[7] = __M4RI_LEFT_BITMASK(k[7]);
22 sh[7] = k[0] + k[1] + k[2] + k[3] + k[4] + k[5] + k[6];
23 case 7:
24 T[6] = table[6]->T;
25 E[6] = table[6]->E;
26 B[6] = table[6]->B;
27 bm[6] = __M4RI_LEFT_BITMASK(k[6]);
28 sh[6] = k[0] + k[1] + k[2] + k[3] + k[4] + k[5];
29 case 6:
30 T[5] = table[5]->T;
31 E[5] = table[5]->E;
32 B[5] = table[5]->B;
33 bm[5] = __M4RI_LEFT_BITMASK(k[5]);
34 sh[5] = k[0] + k[1] + k[2] + k[3] + k[4];
35 case 5:
36 T[4] = table[4]->T;
37 E[4] = table[4]->E;
38 B[4] = table[4]->B;
39 bm[4] = __M4RI_LEFT_BITMASK(k[4]);
40 sh[4] = k[0] + k[1] + k[2] + k[3];
41 case 4:
42 T[3] = table[3]->T;
43 E[3] = table[3]->E;
44 B[3] = table[3]->B;
45 bm[3] = __M4RI_LEFT_BITMASK(k[3]);
46 sh[3] = k[0] + k[1] + k[2];
47 case 3:
48 T[2] = table[2]->T;
49 E[2] = table[2]->E;
50 B[2] = table[2]->B;
51 bm[2] = __M4RI_LEFT_BITMASK(k[2]);
52 sh[2] = k[0] + k[1];
53 case 2:
54 T[1] = table[1]->T;
55 E[1] = table[1]->E;
56 B[1] = table[1]->B;
57 bm[1] = __M4RI_LEFT_BITMASK(k[1]);
58 sh[1] = k[0];
59 case 1:
60 T[0] = table[0]->T;
61 E[0] = table[0]->E;
62 B[0] = table[0]->B;
63 bm[0] = __M4RI_LEFT_BITMASK(k[0]);
64 sh[0] = 0;
65 }
66
67 wi_t const block = startcol / m4ri_radix;
68 wi_t const wide = M->width - block;
69
70 for (rci_t r = startrow; r < stoprow; ++r) {
71 word bits = mzd_read_bits(M, r, startcol, sh[N - 1] + k[N - 1]);
72 word *m = mzd_row(M, r) + block;
73
74 switch (N) { /* we rely on the compiler to optimise this switch away, it reads nicer than #if */
75 case 8:
76 x[N - 8] = E[N - 8][(bits >> sh[N - 8]) & bm[N - 8]];
77 bits ^= B[N - 8][x[N - 8]];
78 t[N - 8] = mzd_row_const(T[N - 8], x[N - 8]) + block;
79 case 7:
80 x[N - 7] = E[N - 7][(bits >> sh[N - 7]) & bm[N - 7]];
81 bits ^= B[N - 7][x[N - 7]];
82 t[N - 7] = mzd_row_const(T[N - 7], x[N - 7]) + block;
83 case 6:
84 x[N - 6] = E[N - 6][(bits >> sh[N - 6]) & bm[N - 6]];
85 bits ^= B[N - 6][x[N - 6]];
86 t[N - 6] = mzd_row_const(T[N - 6], x[N - 6]) + block;
87 case 5:
88 x[N - 5] = E[N - 5][(bits >> sh[N - 5]) & bm[N - 5]];
89 bits ^= B[N - 5][x[N - 5]];
90 t[N - 5] = mzd_row_const(T[N - 5], x[N - 5]) + block;
91 case 4:
92 x[N - 4] = E[N - 4][(bits >> sh[N - 4]) & bm[N - 4]];
93 bits ^= B[N - 4][x[N - 4]];
94 t[N - 4] = mzd_row_const(T[N - 4], x[N - 4]) + block;
95 case 3:
96 x[N - 3] = E[N - 3][(bits >> sh[N - 3]) & bm[N - 3]];
97 bits ^= B[N - 3][x[N - 3]];
98 t[N - 3] = mzd_row_const(T[N - 3], x[N - 3]) + block;
99 case 2:
100 x[N - 2] = E[N - 2][(bits >> sh[N - 2]) & bm[N - 2]];
101 bits ^= B[N - 2][x[N - 2]];
102 t[N - 2] = mzd_row_const(T[N - 2], x[N - 2]) + block;
103 case 1:
104 x[N - 1] = E[N - 1][(bits >> sh[N - 1]) & bm[N - 1]];
105 bits ^= B[N - 1][x[N - 1]];
106 t[N - 1] = mzd_row_const(T[N - 1], x[N - 1]) + block;
107 }
108
109 __M4RI_TEMPLATE_NAME(_mzd_combine)(m, t, wide);
110 }
111
112 __M4RI_DD_MZD(M);
113}
114
115void __M4RI_TEMPLATE_NAME(_mzd_ple_a11)(mzd_t *A, rci_t const start_row, rci_t const stop_row,
116 rci_t const start_col, wi_t const block, int const k[N],
117 ple_table_t const *table[N]) {
118
119 wi_t const wide = A->width - block;
120
121 if (wide <= 0) return;
122
123 const mzd_t *T[N];
124 const rci_t *M[N];
125 word bm[N];
126 int sh[N];
127 int x[N];
128 const word *t[N];
129
130 switch (N) { /* we rely on the compiler to optimise this switch away, it reads nicer than #if */
131 case 8:
132 T[7] = table[7]->T;
133 M[7] = table[7]->M;
134 bm[7] = __M4RI_LEFT_BITMASK(k[7]);
135 sh[7] = k[0] + k[1] + k[2] + k[3] + k[4] + k[5] + k[6];
136 case 7:
137 T[6] = table[6]->T;
138 M[6] = table[6]->M;
139 bm[6] = __M4RI_LEFT_BITMASK(k[6]);
140 sh[6] = k[0] + k[1] + k[2] + k[3] + k[4] + k[5];
141 case 6:
142 T[5] = table[5]->T;
143 M[5] = table[5]->M;
144 bm[5] = __M4RI_LEFT_BITMASK(k[5]);
145 sh[5] = k[0] + k[1] + k[2] + k[3] + k[4];
146 case 5:
147 T[4] = table[4]->T;
148 M[4] = table[4]->M;
149 bm[4] = __M4RI_LEFT_BITMASK(k[4]);
150 sh[4] = k[0] + k[1] + k[2] + k[3];
151 case 4:
152 T[3] = table[3]->T;
153 M[3] = table[3]->M;
154 bm[3] = __M4RI_LEFT_BITMASK(k[3]);
155 sh[3] = k[0] + k[1] + k[2];
156 case 3:
157 T[2] = table[2]->T;
158 M[2] = table[2]->M;
159 bm[2] = __M4RI_LEFT_BITMASK(k[2]);
160 sh[2] = k[0] + k[1];
161 case 2:
162 T[1] = table[1]->T;
163 M[1] = table[1]->M;
164 bm[1] = __M4RI_LEFT_BITMASK(k[1]);
165 sh[1] = k[0];
166 case 1:
167 T[0] = table[0]->T;
168 M[0] = table[0]->M;
169 bm[0] = __M4RI_LEFT_BITMASK(k[0]);
170 sh[0] = 0;
171 };
172
173 const rci_t bits_to_read = sh[N - 1] + k[N - 1];
174
175 for (rci_t i = start_row; i < stop_row; ++i) {
176 const word bits = mzd_read_bits(A, i, start_col, bits_to_read);
177 word *m = mzd_row(A, i) + block;
178
179 switch (N) { /* we rely on the compiler to optimise this switch away, it reads nicer than #if */
180 case 8:
181 x[N - 8] = M[N - 8][(bits >> sh[N - 8]) & bm[N - 8]];
182 t[N - 8] = mzd_row_const(T[N - 8], x[N - 8]) + block;
183 case 7:
184 x[N - 7] = M[N - 7][(bits >> sh[N - 7]) & bm[N - 7]];
185 t[N - 7] = mzd_row_const(T[N - 7], x[N - 7]) + block;
186 case 6:
187 x[N - 6] = M[N - 6][(bits >> sh[N - 6]) & bm[N - 6]];
188 t[N - 6] = mzd_row_const(T[N - 6], x[N - 6]) + block;
189 case 5:
190 x[N - 5] = M[N - 5][(bits >> sh[N - 5]) & bm[N - 5]];
191 t[N - 5] = mzd_row_const(T[N - 5], x[N - 5]) + block;
192 case 4:
193 x[N - 4] = M[N - 4][(bits >> sh[N - 4]) & bm[N - 4]];
194 t[N - 4] = mzd_row_const(T[N - 4], x[N - 4]) + block;
195 case 3:
196 x[N - 3] = M[N - 3][(bits >> sh[N - 3]) & bm[N - 3]];
197 t[N - 3] = mzd_row_const(T[N - 3], x[N - 3]) + block;
198 case 2:
199 x[N - 2] = M[N - 2][(bits >> sh[N - 2]) & bm[N - 2]];
200 t[N - 2] = mzd_row_const(T[N - 2], x[N - 2]) + block;
201 case 1:
202 x[N - 1] = M[N - 1][(bits >> sh[N - 1]) & bm[N - 1]];
203 t[N - 1] = mzd_row_const(T[N - 1], x[N - 1]) + block;
204 }
205 __M4RI_TEMPLATE_NAME(_mzd_combine)(m, t, wide);
206 }
207 __M4RI_DD_MZD(A);
208}
Helper functions.
int rci_t
Type of row and column indexes.
Definition misc.h:72
int64_t wi_t
Type of word indexes.
Definition misc.h:81
uint64_t word
A word is the typical packed data structure to represent packed bits.
Definition misc.h:87
static int const m4ri_radix
The number of bits in a word.
Definition misc.h:141
#define __M4RI_LEFT_BITMASK(n)
create a bit mask to zero out all but the (n - 1) % m4ri_radix + 1 leftmost bits.
Definition misc.h:272
static word * mzd_row(mzd_t *M, rci_t row)
Get pointer to first word of row.
Definition mzd.h:185
static word mzd_read_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n)
Definition mzd.h:892
Dense matrices over GF(2).
Definition mzd.h:68
PLE Elimination Tables.
Definition ple_russian.h:39