1/*
2 This file is part of Mtproto-proxy Library.
3
4 Mtproto-proxy Library is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 Mtproto-proxy Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with Mtproto-proxy Library. If not, see <http://www.gnu.org/licenses/>.
16
17 Copyright 2009-2012 Vkontakte Ltd
18 2009-2012 Nikolai Durov
19 2009-2012 Andrey Lopatin
20 2012 Anton Maydell
21
22 Copyright 2014 Telegram Messenger Inc
23 2014 Anton Maydell
24*/
25
26#include <assert.h>
27#include <stdlib.h>
28#include <math.h>
29
30#include "crc32.h"
31#include "common/cpuid.h"
32#include "common/kprintf.h"
33
34
35#define FASTMOV_RMI32_TO_SSE(dst, src) \
36 asm volatile ("movd %1, %0\n\t" : "=x" (dst) : "g" (src))
37
38#ifdef __LP64__
39
40#define FASTMOV_SSE_TO_LO_HI_DW(sse, lo, hi)\
41 do {\
42 uint64_t T; \
43 asm volatile ("movq %1, %0\n\t" : "=r" (T) : "x" (sse)); \
44 lo = (uint32_t) T; \
45 hi = (uint32_t) (T >> 32); \
46 } while(0)
47
48#else
49
50#define FASTMOV_SSE_TO_LO_HI_DW(sse, lo, hi)\
51 do {\
52 asm volatile ("movd %1, %0\n\t" : "=r" (lo) : "x" (sse)); \
53 sse = __builtin_ia32_psrldqi128(sse, 32); \
54 asm volatile ("movd %1, %0\n\t" : "=r" (hi) : "x" (sse)); \
55 } while(0)
56
57#endif
58
59
60#ifdef __LP64__
61
62#define RETURN_SSE_UINT64(sse)\
63 do {\
64 uint64_t T; \
65 asm volatile ("movq %1, %0\n\t" : "=r" (T) : "x" (sse)); \
66 return T; \
67 } while(0)
68
69// RMI == reg, mem, imm
70#define FASTMOV_RMI64_TO_SSE(dst, src) \
71 asm volatile ("movq %1, %0\n\t" : "=x" (dst) : "g" (src))
72
73#else
74
75#define RETURN_SSE_UINT64(sse)\
76 do {\
77 uint32_t lo, hi;\
78 FASTMOV_SSE_TO_LO_HI_DW(sse, lo, hi);\
79 uint64_t T; \
80 T = (((uint64_t)hi)<<32) | lo; \
81 return T; \
82 } while(0)
83
84#define FASTMOV_RMI64_TO_SSE(dst, src) \
85 do { \
86 uint64_t T = src; \
87 asm volatile ("movsd %1, %0\n\t" : "=x" (dst) : "m" (T)); \
88 } while(0)
89
90#endif
91
92
93static const unsigned int crc32_table[256] =
94{
95 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
96 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
97 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
98 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
99 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
100 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
101 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
102 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
103 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
104 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
105 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
106 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
107 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
108 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
109 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
110 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
111 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
112 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
113 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
114 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
115 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
116 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
117 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
118 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
119 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
120 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
121 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
122 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
123 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
124 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
125 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
126 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
127 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
128 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
129 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
130 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
131 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
132 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
133 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
134 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
135 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
136 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
137 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
138 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
139 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
140 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
141 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
142 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
143 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
144 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
145 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
146 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
147 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
148 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
149 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
150 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
151 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
152 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
153 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
154 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
155 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
156 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
157 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
158 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
159};
160
161static const unsigned int crc32_table2[256] =
162{
163 0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3,
164 0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7,
165 0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb,
166 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf,
167 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
168 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496,
169 0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a,
170 0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e,
171 0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761,
172 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
173 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69,
174 0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d,
175 0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530,
176 0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034,
177 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
178 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c,
179 0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6,
180 0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2,
181 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce,
182 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
183 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97,
184 0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93,
185 0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f,
186 0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b,
187 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
188 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60,
189 0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c,
190 0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768,
191 0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35,
192 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
193 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d,
194 0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539,
195 0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88,
196 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c,
197 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
198 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484,
199 0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9,
200 0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd,
201 0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1,
202 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
203 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a,
204 0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e,
205 0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522,
206 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026,
207 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
208 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f,
209 0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773,
210 0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277,
211 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d,
212 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
213 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85,
214 0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81,
215 0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc,
216 0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8,
217 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
218 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0,
219 0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f,
220 0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b,
221 0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27,
222 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
223 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e,
224 0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a,
225 0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876,
226 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72,
227};
228
229static const unsigned int crc32_table1[256] =
230{
231 0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59,
232 0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685,
233 0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1,
234 0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d,
235 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
236 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5,
237 0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91,
238 0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d,
239 0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9,
240 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
241 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901,
242 0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd,
243 0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9,
244 0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315,
245 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
246 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad,
247 0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399,
248 0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45,
249 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221,
250 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
251 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9,
252 0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835,
253 0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151,
254 0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d,
255 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
256 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5,
257 0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1,
258 0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d,
259 0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609,
260 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
261 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1,
262 0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d,
263 0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9,
264 0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05,
265 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
266 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd,
267 0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9,
268 0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75,
269 0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711,
270 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
271 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339,
272 0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5,
273 0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281,
274 0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d,
275 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
276 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895,
277 0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1,
278 0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d,
279 0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819,
280 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
281 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1,
282 0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d,
283 0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69,
284 0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5,
285 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
286 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d,
287 0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9,
288 0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625,
289 0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41,
290 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
291 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89,
292 0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555,
293 0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31,
294 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed,
295};
296
297static const unsigned int crc32_table0[256] = {
298 0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee,
299 0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9,
300 0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701,
301 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056,
302 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
303 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26,
304 0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e,
305 0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9,
306 0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0,
307 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
308 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f,
309 0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68,
310 0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f,
311 0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018,
312 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
313 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7,
314 0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3,
315 0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084,
316 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c,
317 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
318 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c,
319 0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b,
320 0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3,
321 0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4,
322 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
323 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba,
324 0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002,
325 0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755,
326 0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72,
327 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
328 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d,
329 0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca,
330 0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5,
331 0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82,
332 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
333 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d,
334 0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a,
335 0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d,
336 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5,
337 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
338 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb,
339 0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc,
340 0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04,
341 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953,
342 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
343 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623,
344 0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b,
345 0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc,
346 0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8,
347 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
348 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907,
349 0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50,
350 0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677,
351 0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120,
352 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
353 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf,
354 0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6,
355 0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981,
356 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639,
357 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
358 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949,
359 0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e,
360 0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6,
361 0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1,
362};
363
364crc32_partial_func_t crc32_partial;
365crc32_combine_func_t compute_crc32_combine;
366
367unsigned crc32_partial_generic (const void *data, long len, unsigned crc) {
368 const int *p = (const int *) data;
369 long x;
370#define DO_ONE(v) crc ^= v; crc = crc32_table0[crc & 0xff] ^ crc32_table1[(crc & 0xff00) >> 8] ^ crc32_table2[(crc & 0xff0000) >> 16] ^ crc32_table[crc >> 24];
371#define DO_FOUR(p) DO_ONE((p)[0]); DO_ONE((p)[1]); DO_ONE((p)[2]); DO_ONE((p)[3]);
372
373 for (x = (len >> 5); x > 0; x--) {
374 DO_FOUR (p);
375 DO_FOUR (p + 4);
376 p += 8;
377 }
378 if (len & 16) {
379 DO_FOUR (p);
380 p += 4;
381 }
382 if (len & 8) {
383 DO_ONE (p[0]);
384 DO_ONE (p[1]);
385 p += 2;
386 }
387 if (len & 4) {
388 DO_ONE (*p++);
389 }
390#undef DO_ONE
391#undef DO_FOUR
392 const char *q = (const char *) p;
393 if (len & 2) {
394 crc = crc32_table[(crc ^ q[0]) & 0xff] ^ (crc >> 8);
395 crc = crc32_table[(crc ^ q[1]) & 0xff] ^ (crc >> 8);
396 q += 2;
397 }
398 if (len & 1) {
399 crc = crc32_table[(crc ^ *q++) & 0xff] ^ (crc >> 8);
400 }
401 return crc;
402}
403
404/******************** CLMUL ********************/
405
406#define CRC32_POLY 0x04c11db7u
407#define CRC32_REFLECTED_POLY 0xedb88320u
408
409#define CRC32_REFLECTED_X319 0x9570d49500000000ll
410#define CRC32_REFLECTED_X255 0x01b5fd1d00000000ll
411#define CRC32_REFLECTED_X191 0x65673b4600000000ll
412#define CRC32_REFLECTED_X127 0x9ba54c6f00000000ll
413#define CRC32_REFLECTED_X95 0xccaa009e00000000ll
414#define CRC32_REFLECTED_X63 0xb8bc676500000000ll
415
416#define CRC32_REFLECTED_POLY_33_BIT 0x1db710641ll
417#define CRC32_REFLECTED_MU 0x1f7011641ll
418
419#define CRC64_REFLECTED_X319 0x60095b008a9efa44ll
420#define CRC64_REFLECTED_X191 0xe05dd497ca393ae4ll
421#define CRC64_REFLECTED_X255 0x3be653a30fe1af51ll
422#define CRC64_REFLECTED_X127 0xdabe95afc7875f40ll
423#define CRC64_REFLECTED_X95 0x1dee8a5e222ca1dcll
424#define CRC64_REFLECTED_POLY_65_BIT 0x92d8af2baf0e1e85ll
425//mu(65-bit): 01001110000111110010001100110110000010111001010010110001111010101
426#define CRC64_REFLECTED_MU 0x9c3e466c172963d5ll
427
428static const char mask[64] __attribute__ ((aligned (64))) = {
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
431 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
432 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
433};
434
435static const v2di CRC32_K256 = { CRC32_REFLECTED_X319, CRC32_REFLECTED_X255 };
436static const v2di CRC32_K128 = { CRC32_REFLECTED_X191, CRC32_REFLECTED_X127 };
437static const v2di CRC32_K64 = { CRC32_REFLECTED_X95, CRC32_REFLECTED_X63 };
438#ifndef CRC32_BARRETT_REDUCTION
439static const v2di CRC32_MU __attribute__ ((unused));
440#endif
441static const v2di CRC32_MU = { CRC32_REFLECTED_MU, CRC32_REFLECTED_POLY_33_BIT };
442static const v2di CRC64_K256 = { CRC64_REFLECTED_X319, CRC64_REFLECTED_X255 };
443static const v2di CRC64_K128 = { CRC64_REFLECTED_X191, CRC64_REFLECTED_X127 };
444static const v2di CRC64_MU = { CRC64_REFLECTED_MU, CRC64_REFLECTED_POLY_65_BIT };
445
446static v2di crcXX_partial_clmul (const void *q, long len, v2di D, v2di E, v2di K256, v2di K128) __attribute__((aligned(32)));
447v2di crcXX_partial_clmul (const void *q, long len, v2di D, v2di E, v2di K256, v2di K128) {
448 v2di G, H;
449 if (len >= 32) {
450 const void *e = ((const char *) q) + (len & -32l);
451 do {
452 G = *((v2di *) q);
453 H = *((v2di *) (q + 16));
454 G ^= __builtin_ia32_pclmulqdq128 (D, K256, 0x00);
455 H ^= __builtin_ia32_pclmulqdq128 (E, K256, 0x00);
456 D = __builtin_ia32_pclmulqdq128 (D, K256, 0x11);
457 E = __builtin_ia32_pclmulqdq128 (E, K256, 0x11);
458 D ^= G;
459 E ^= H;
460 q += 32;
461 } while (q != e);
462 }
463
464 if (len & 16) {
465 G = __builtin_ia32_pclmulqdq128 (D, K256, 0x00);
466 H = __builtin_ia32_pclmulqdq128 (E, K128, 0x00);
467 D = __builtin_ia32_pclmulqdq128 (D, K256, 0x11);
468 E = __builtin_ia32_pclmulqdq128 (E, K128, 0x11);
469 D ^= *((v2di *) q) ^ G ^ H ^ E;
470 q += 16;
471 } else {
472 G = __builtin_ia32_pclmulqdq128 (D, K128, 0x00);
473 D = __builtin_ia32_pclmulqdq128 (D, K128, 0x11);
474 D ^= G ^ E;
475 }
476
477 if ((len &= 15)) {
478 E = (v2di) __builtin_ia32_pshufb128 ( (v16qi) D, __builtin_ia32_loaddqu (mask + 32 + len));
479 H = (v2di) __builtin_ia32_loaddqu (mask + 16 + len);
480 D = (v2di) __builtin_ia32_pshufb128 ( (v16qi) D, (v16qi) H);
481 E ^= (v2di) __builtin_ia32_pshufb128 (*((v16qi *) q), (v16qi) H);
482 G = __builtin_ia32_pclmulqdq128 (D, K128, 0x00);
483 D = __builtin_ia32_pclmulqdq128 (D, K128, 0x11);
484 D ^= G ^ E;
485 }
486 return D;
487}
488
489unsigned crc32_partial_clmul (const void *data, long len, unsigned crc) {
490 if (len < 40) {
491 return crc32_partial_generic (data, len, crc);
492 }
493
494 /* works only for len >= 32 */
495 const char *q = (const char *) (((uintptr_t) data) & -16L);
496 int o = (int)(32 - ((uintptr_t) data & 15));
497
498 v2di D = (* (v2di *) q), E = (*(v2di *)(q + 16)), G, H;
499
500 {
501 v2di C;
502 FASTMOV_RMI32_TO_SSE(C, crc);
503 asm volatile ("pcmpeqw %0, %0\n\t" : "=x" (G)); //G := 2 ^ 128 - 1
504
505 H = (v2di) __builtin_ia32_loaddqu (mask + o);
506 G = (v2di) __builtin_ia32_pshufb128 ((v16qi) G, (v16qi) H );
507 D ^= (v2di) __builtin_ia32_pshufb128 ((v16qi) C, (v16qi) H );
508 D &= G;
509
510 if (__builtin_expect (o <= 19, 0)) {
511 E ^= (v2di) __builtin_ia32_pshufb128 ((v16qi) C, (v16qi) __builtin_ia32_loaddqu (mask + 16 + o));
512 }
513 }
514
515 len -= o;
516 q += 32;
517
518 D = crcXX_partial_clmul (q, len, D, E, CRC32_K256, CRC32_K128);
519
520 D = __builtin_ia32_pslldqi128 (__builtin_ia32_psrldqi128 (D, 64), 32) ^ (v2di) __builtin_ia32_pclmulqdq128 (D, CRC32_K64, 0x00);
521
522 D ^= (v2di) __builtin_ia32_pclmulqdq128 (D, CRC32_K64, 0x10);
523
524 unsigned lo, hi;
525#ifdef CRC32_BARRETT_REDUCTION
526 H = (v2di) __builtin_ia32_punpckhdq128 ((v4si) (G ^ G), (v4si) D);
527 H = (v2di) __builtin_ia32_pclmulqdq128 (H, CRC32_MU, 0x00);
528 H = (v2di) __builtin_ia32_pclmulqdq128 (H, CRC32_MU, 0x10);
529 D ^= __builtin_ia32_pslldqi128 (H, 32);
530 D = __builtin_ia32_punpckhqdq128 (D, D);
531
532 FASTMOV_SSE_TO_LO_HI_DW(D, lo, hi);
533 return hi;
534#else
535 D = __builtin_ia32_punpckhqdq128 (D, D);
536 FASTMOV_SSE_TO_LO_HI_DW(D, lo, hi);
537 return crc32_table0[lo & 0xff] ^ crc32_table1[(lo & 0xff00) >> 8] ^ crc32_table2[(lo & 0xff0000) >> 16] ^ crc32_table[lo >> 24] ^ ((unsigned) hi);
538#endif
539}
540
541/******************** CRC-64 ********************/
542
543static const uint64_t crc64_table[256] = {
544 0x0000000000000000LL, 0xb32e4cbe03a75f6fLL, 0xf4843657a840a05bLL, 0x47aa7ae9abe7ff34LL,
545 0x7bd0c384ff8f5e33LL, 0xc8fe8f3afc28015cLL, 0x8f54f5d357cffe68LL, 0x3c7ab96d5468a107LL,
546 0xf7a18709ff1ebc66LL, 0x448fcbb7fcb9e309LL, 0x0325b15e575e1c3dLL, 0xb00bfde054f94352LL,
547 0x8c71448d0091e255LL, 0x3f5f08330336bd3aLL, 0x78f572daa8d1420eLL, 0xcbdb3e64ab761d61LL,
548 0x7d9ba13851336649LL, 0xceb5ed8652943926LL, 0x891f976ff973c612LL, 0x3a31dbd1fad4997dLL,
549 0x064b62bcaebc387aLL, 0xb5652e02ad1b6715LL, 0xf2cf54eb06fc9821LL, 0x41e11855055bc74eLL,
550 0x8a3a2631ae2dda2fLL, 0x39146a8fad8a8540LL, 0x7ebe1066066d7a74LL, 0xcd905cd805ca251bLL,
551 0xf1eae5b551a2841cLL, 0x42c4a90b5205db73LL, 0x056ed3e2f9e22447LL, 0xb6409f5cfa457b28LL,
552 0xfb374270a266cc92LL, 0x48190ecea1c193fdLL, 0x0fb374270a266cc9LL, 0xbc9d3899098133a6LL,
553 0x80e781f45de992a1LL, 0x33c9cd4a5e4ecdceLL, 0x7463b7a3f5a932faLL, 0xc74dfb1df60e6d95LL,
554 0x0c96c5795d7870f4LL, 0xbfb889c75edf2f9bLL, 0xf812f32ef538d0afLL, 0x4b3cbf90f69f8fc0LL,
555 0x774606fda2f72ec7LL, 0xc4684a43a15071a8LL, 0x83c230aa0ab78e9cLL, 0x30ec7c140910d1f3LL,
556 0x86ace348f355aadbLL, 0x3582aff6f0f2f5b4LL, 0x7228d51f5b150a80LL, 0xc10699a158b255efLL,
557 0xfd7c20cc0cdaf4e8LL, 0x4e526c720f7dab87LL, 0x09f8169ba49a54b3LL, 0xbad65a25a73d0bdcLL,
558 0x710d64410c4b16bdLL, 0xc22328ff0fec49d2LL, 0x85895216a40bb6e6LL, 0x36a71ea8a7ace989LL,
559 0x0adda7c5f3c4488eLL, 0xb9f3eb7bf06317e1LL, 0xfe5991925b84e8d5LL, 0x4d77dd2c5823b7baLL,
560 0x64b62bcaebc387a1LL, 0xd7986774e864d8ceLL, 0x90321d9d438327faLL, 0x231c512340247895LL,
561 0x1f66e84e144cd992LL, 0xac48a4f017eb86fdLL, 0xebe2de19bc0c79c9LL, 0x58cc92a7bfab26a6LL,
562 0x9317acc314dd3bc7LL, 0x2039e07d177a64a8LL, 0x67939a94bc9d9b9cLL, 0xd4bdd62abf3ac4f3LL,
563 0xe8c76f47eb5265f4LL, 0x5be923f9e8f53a9bLL, 0x1c4359104312c5afLL, 0xaf6d15ae40b59ac0LL,
564 0x192d8af2baf0e1e8LL, 0xaa03c64cb957be87LL, 0xeda9bca512b041b3LL, 0x5e87f01b11171edcLL,
565 0x62fd4976457fbfdbLL, 0xd1d305c846d8e0b4LL, 0x96797f21ed3f1f80LL, 0x2557339fee9840efLL,
566 0xee8c0dfb45ee5d8eLL, 0x5da24145464902e1LL, 0x1a083bacedaefdd5LL, 0xa9267712ee09a2baLL,
567 0x955cce7fba6103bdLL, 0x267282c1b9c65cd2LL, 0x61d8f8281221a3e6LL, 0xd2f6b4961186fc89LL,
568 0x9f8169ba49a54b33LL, 0x2caf25044a02145cLL, 0x6b055fede1e5eb68LL, 0xd82b1353e242b407LL,
569 0xe451aa3eb62a1500LL, 0x577fe680b58d4a6fLL, 0x10d59c691e6ab55bLL, 0xa3fbd0d71dcdea34LL,
570 0x6820eeb3b6bbf755LL, 0xdb0ea20db51ca83aLL, 0x9ca4d8e41efb570eLL, 0x2f8a945a1d5c0861LL,
571 0x13f02d374934a966LL, 0xa0de61894a93f609LL, 0xe7741b60e174093dLL, 0x545a57dee2d35652LL,
572 0xe21ac88218962d7aLL, 0x5134843c1b317215LL, 0x169efed5b0d68d21LL, 0xa5b0b26bb371d24eLL,
573 0x99ca0b06e7197349LL, 0x2ae447b8e4be2c26LL, 0x6d4e3d514f59d312LL, 0xde6071ef4cfe8c7dLL,
574 0x15bb4f8be788911cLL, 0xa6950335e42fce73LL, 0xe13f79dc4fc83147LL, 0x521135624c6f6e28LL,
575 0x6e6b8c0f1807cf2fLL, 0xdd45c0b11ba09040LL, 0x9aefba58b0476f74LL, 0x29c1f6e6b3e0301bLL,
576 0xc96c5795d7870f42LL, 0x7a421b2bd420502dLL, 0x3de861c27fc7af19LL, 0x8ec62d7c7c60f076LL,
577 0xb2bc941128085171LL, 0x0192d8af2baf0e1eLL, 0x4638a2468048f12aLL, 0xf516eef883efae45LL,
578 0x3ecdd09c2899b324LL, 0x8de39c222b3eec4bLL, 0xca49e6cb80d9137fLL, 0x7967aa75837e4c10LL,
579 0x451d1318d716ed17LL, 0xf6335fa6d4b1b278LL, 0xb199254f7f564d4cLL, 0x02b769f17cf11223LL,
580 0xb4f7f6ad86b4690bLL, 0x07d9ba1385133664LL, 0x4073c0fa2ef4c950LL, 0xf35d8c442d53963fLL,
581 0xcf273529793b3738LL, 0x7c0979977a9c6857LL, 0x3ba3037ed17b9763LL, 0x888d4fc0d2dcc80cLL,
582 0x435671a479aad56dLL, 0xf0783d1a7a0d8a02LL, 0xb7d247f3d1ea7536LL, 0x04fc0b4dd24d2a59LL,
583 0x3886b22086258b5eLL, 0x8ba8fe9e8582d431LL, 0xcc0284772e652b05LL, 0x7f2cc8c92dc2746aLL,
584 0x325b15e575e1c3d0LL, 0x8175595b76469cbfLL, 0xc6df23b2dda1638bLL, 0x75f16f0cde063ce4LL,
585 0x498bd6618a6e9de3LL, 0xfaa59adf89c9c28cLL, 0xbd0fe036222e3db8LL, 0x0e21ac88218962d7LL,
586 0xc5fa92ec8aff7fb6LL, 0x76d4de52895820d9LL, 0x317ea4bb22bfdfedLL, 0x8250e80521188082LL,
587 0xbe2a516875702185LL, 0x0d041dd676d77eeaLL, 0x4aae673fdd3081deLL, 0xf9802b81de97deb1LL,
588 0x4fc0b4dd24d2a599LL, 0xfceef8632775faf6LL, 0xbb44828a8c9205c2LL, 0x086ace348f355aadLL,
589 0x34107759db5dfbaaLL, 0x873e3be7d8faa4c5LL, 0xc094410e731d5bf1LL, 0x73ba0db070ba049eLL,
590 0xb86133d4dbcc19ffLL, 0x0b4f7f6ad86b4690LL, 0x4ce50583738cb9a4LL, 0xffcb493d702be6cbLL,
591 0xc3b1f050244347ccLL, 0x709fbcee27e418a3LL, 0x3735c6078c03e797LL, 0x841b8ab98fa4b8f8LL,
592 0xadda7c5f3c4488e3LL, 0x1ef430e13fe3d78cLL, 0x595e4a08940428b8LL, 0xea7006b697a377d7LL,
593 0xd60abfdbc3cbd6d0LL, 0x6524f365c06c89bfLL, 0x228e898c6b8b768bLL, 0x91a0c532682c29e4LL,
594 0x5a7bfb56c35a3485LL, 0xe955b7e8c0fd6beaLL, 0xaeffcd016b1a94deLL, 0x1dd181bf68bdcbb1LL,
595 0x21ab38d23cd56ab6LL, 0x9285746c3f7235d9LL, 0xd52f0e859495caedLL, 0x6601423b97329582LL,
596 0xd041dd676d77eeaaLL, 0x636f91d96ed0b1c5LL, 0x24c5eb30c5374ef1LL, 0x97eba78ec690119eLL,
597 0xab911ee392f8b099LL, 0x18bf525d915feff6LL, 0x5f1528b43ab810c2LL, 0xec3b640a391f4fadLL,
598 0x27e05a6e926952ccLL, 0x94ce16d091ce0da3LL, 0xd3646c393a29f297LL, 0x604a2087398eadf8LL,
599 0x5c3099ea6de60cffLL, 0xef1ed5546e415390LL, 0xa8b4afbdc5a6aca4LL, 0x1b9ae303c601f3cbLL,
600 0x56ed3e2f9e224471LL, 0xe5c372919d851b1eLL, 0xa26908783662e42aLL, 0x114744c635c5bb45LL,
601 0x2d3dfdab61ad1a42LL, 0x9e13b115620a452dLL, 0xd9b9cbfcc9edba19LL, 0x6a978742ca4ae576LL,
602 0xa14cb926613cf817LL, 0x1262f598629ba778LL, 0x55c88f71c97c584cLL, 0xe6e6c3cfcadb0723LL,
603 0xda9c7aa29eb3a624LL, 0x69b2361c9d14f94bLL, 0x2e184cf536f3067fLL, 0x9d36004b35545910LL,
604 0x2b769f17cf112238LL, 0x9858d3a9ccb67d57LL, 0xdff2a94067518263LL, 0x6cdce5fe64f6dd0cLL,
605 0x50a65c93309e7c0bLL, 0xe388102d33392364LL, 0xa4226ac498dedc50LL, 0x170c267a9b79833fLL,
606 0xdcd7181e300f9e5eLL, 0x6ff954a033a8c131LL, 0x28532e49984f3e05LL, 0x9b7d62f79be8616aLL,
607 0xa707db9acf80c06dLL, 0x14299724cc279f02LL, 0x5383edcd67c06036LL, 0xe0ada17364673f59LL
608};
609
610crc64_partial_func_t crc64_partial;
611crc64_combine_func_t compute_crc64_combine;
612
613uint64_t crc64_feed_byte (uint64_t crc, unsigned char b) {
614 return crc64_table[(crc ^ b) & 0xff] ^ (crc >> 8);
615}
616
617uint64_t crc64_partial_one_table (const void *data, long len, uint64_t crc) {
618 const char *p = data;
619 for (; len > 0; len--) {
620 crc = crc64_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
621 }
622 return crc;
623}
624
625static uint64_t crc64_barrett_reduction (v2di D) {
626 /* After reflection mu constant is 64 bit */
627 v2di E = __builtin_ia32_pclmulqdq128 (D, CRC64_MU, 0x00);
628 /* The carry-less multiplication has to be performed with a PCLMULQDQ and an XOR operation
629 since P(x) is 65 bit constant. */
630 D ^= __builtin_ia32_pclmulqdq128 (E, CRC64_MU, 0x10);
631 D = __builtin_ia32_punpckhqdq128 (D, D);
632 D ^= E;
633 RETURN_SSE_UINT64(D);
634}
635
636uint64_t crc64_partial_clmul (const void *data, long len, uint64_t crc) {
637 if (len <= 31) {
638 return crc64_partial_one_table (data, len, crc);
639 }
640
641 /* works only for len >= 32 */
642 const char *q = (const char *) (((uintptr_t) data) & -16L);
643 int o = (int)(32 - ((uintptr_t) data & 15));
644
645 v2di D = (* (v2di *) q), E = (*(v2di *)(q + 16)), C, G, H;
646 FASTMOV_RMI64_TO_SSE(C, crc);
647 asm volatile ("pcmpeqw %0, %0\n\t" : "=x" (G)); //G := 2 ^ 128 - 1
648
649 H = (v2di) __builtin_ia32_loaddqu (mask + o);
650 G = (v2di) __builtin_ia32_pshufb128 ((v16qi) G, (v16qi) H );
651 D ^= (v2di) __builtin_ia32_pshufb128 ((v16qi) C, (v16qi) H );
652 D &= G;
653
654 if (o <= (32 - 9)) {
655 E ^= (v2di) __builtin_ia32_pshufb128 ((v16qi) C, (v16qi) __builtin_ia32_loaddqu (mask + 16 + o));
656 }
657
658 len -= o;
659 q += 32;
660
661 D = crcXX_partial_clmul (q, len, D, E, CRC64_K256, CRC64_K128);
662
663 D = (v2di) __builtin_ia32_pclmulqdq128 (CRC64_K128, D, 0x01) ^ __builtin_ia32_psrldqi128 (D, 64);
664
665 return crc64_barrett_reduction (D);
666}
667
668/* {{{ GF-32 */
669
670unsigned gf32_mulx (unsigned a, unsigned poly) {
671 unsigned r = a >> 1;
672 if (a & 1) {
673 r ^= poly;
674 }
675 return r;
676}
677
678unsigned gf32_mul (unsigned a, unsigned b, unsigned poly) {
679 unsigned x = 0;
680 int i = 0;
681 do {
682 x = gf32_mulx (x, poly);
683 if (b & 1) {
684 x ^= a;
685 }
686 b >>= 1;
687 } while (++i < 32);
688 return x;
689}
690
691unsigned gf32_pow (unsigned a, int k, unsigned poly) {
692 if (!k) { return 0x80000000; }
693 unsigned x = gf32_pow (gf32_mul (a, a, poly), k >> 1, poly);
694 if (k & 1) {
695 x = gf32_mul (x, a, poly);
696 }
697 return x;
698}
699
700static unsigned gf32_matrix_times (unsigned *matrix, unsigned vector) {
701 unsigned sum = 0;
702 while (vector) {
703 if (vector & 1) {
704 sum ^= *matrix;
705 }
706 vector >>= 1;
707 matrix++;
708 }
709 return sum;
710}
711
712static void gf32_matrix_square (unsigned *square, unsigned *matrix) {
713 int n = 0;
714 do {
715 square[n] = gf32_matrix_times (matrix, matrix[n]);
716 } while (++n < 32);
717}
718
719void gf32_compute_powers_generic (unsigned *P, int size, unsigned poly) {
720 int n;
721 assert (size >= 0 && !(size & 31));
722 P[0] = poly;
723 for (n = 0; n < 31; n++) {
724 P[n+1] = 1U << n;
725 }
726 for (n = 1; n < (size / 32); n++) {
727 gf32_matrix_square (P + (n << 5), P + ((n - 1) << 5));
728 }
729 assert (P[size - 1]);
730}
731
732void gf32_compute_powers_clmul (unsigned *P, unsigned poly) {
733 int n;
734 assert (!((uintptr_t) P & 15l));
735 unsigned a = 1 << (31-7);
736 const unsigned b = gf32_mul (poly, poly, poly);
737 for (n = 0; n < 63; n++) {
738 P[0] = 0;
739 P[1] = gf32_mul (a, b, poly);
740 P[2] = 0;
741 P[3] = a;
742 a = gf32_mulx (gf32_mul (a, a, poly), poly);
743 P += 4;
744 }
745}
746
747unsigned gf32_combine_generic (unsigned *powers, unsigned crc1, int64_t len2) {
748 unsigned *p = powers + 64;
749 do {
750 p += 32;
751 if (len2 & 1) {
752 crc1 = gf32_matrix_times (p, crc1);
753 }
754 len2 >>= 1;
755 } while (len2);
756 return crc1;
757}
758
759uint64_t gf32_combine_clmul (unsigned *powers, unsigned crc1, int64_t len2) {
760 v2di D;
761 FASTMOV_RMI32_TO_SSE(D, crc1);
762 D = __builtin_ia32_pslldqi128 (D, 96);
763
764 int n = __builtin_ffsll (len2);
765 unsigned int *p = powers + (4 * (n - 1));
766 len2 >>= n;
767
768 D = __builtin_ia32_pclmulqdq128 ( * ((v2di *) p), D, 0x11);
769
770 while (len2) {
771 p += 4;
772 if (len2 & 1) {
773 v2di E = *((v2di *) p);
774 D = __builtin_ia32_pclmulqdq128 (E, D, 0x11) ^ __builtin_ia32_pclmulqdq128 (E, D, 0x00);
775 }
776 len2 >>= 1;
777 }
778
779 D ^= (v2di) __builtin_ia32_pclmulqdq128 (* ((v2di *) (powers + 12)), D, 0x01);
780 D = __builtin_ia32_punpckhqdq128 (D, D);
781 RETURN_SSE_UINT64(D);
782}
783
784/* }}} */
785
786static unsigned compute_crc32_combine_generic (unsigned crc1, unsigned crc2, int64_t len2) {
787 #define N (32*67)
788 static unsigned crc32_powers[N];
789 /* degenerate case (also disallow negative lengths) */
790 if (len2 <= 0) {
791 return crc1;
792 }
793 if (!crc32_powers[N-1]) {
794 gf32_compute_powers_generic (crc32_powers, N, CRC32_REFLECTED_POLY);
795 assert (crc32_powers[N-1]);
796 }
797 return gf32_combine_generic (crc32_powers, crc1, len2) ^ crc2;
798 #undef N
799}
800
801static unsigned compute_crc32_combine_clmul (unsigned crc1, unsigned crc2, int64_t len2) {
802 static unsigned int crc32_powers[252] __attribute__ ((aligned(16)));
803 if (len2 <= 0) {
804 return crc1;
805 }
806 unsigned int *p;
807 if (!crc32_powers[251]) {
808 gf32_compute_powers_clmul (crc32_powers, CRC32_REFLECTED_POLY);
809 p = crc32_powers + 8;
810 assert ( *((uint64_t *) (p + 0)) == CRC32_REFLECTED_X95);
811 assert ( *((uint64_t *) (p + 4)) == CRC32_REFLECTED_X127);
812 assert ( *((uint64_t *) (p + 6)) == CRC32_REFLECTED_X63);
813 assert ( *((uint64_t *) (p + 8)) == CRC32_REFLECTED_X191);
814 assert ( *((uint64_t *) (p + 10)) == CRC32_REFLECTED_X127);
815 assert ( *((uint64_t *) (p + 12)) == CRC32_REFLECTED_X319);
816 assert ( *((uint64_t *) (p + 14)) == CRC32_REFLECTED_X255);
817 assert (crc32_powers[251]);
818 }
819
820 uint64_t T = gf32_combine_clmul (crc32_powers, crc1, len2);
821 crc1 = (unsigned) T;
822 crc2 ^= (unsigned) (T >> 32);
823 return (crc32_table0[crc1 & 0xff] ^ crc32_table1[(crc1 & 0xff00) >> 8] ^ crc32_table2[(crc1 & 0xff0000) >> 16] ^ crc32_table[crc1 >> 24]) ^ crc2;
824}
825
826/******************** GF-64 (reversed) ********************/
827
828static uint64_t gf64_mulx (uint64_t a) __attribute__ ((pure));
829static uint64_t gf64_mulx (uint64_t a) {
830 uint64_t r = a >> 1;
831 if (a & 1) {
832 r ^= 0xc96c5795d7870f42ll;
833 }
834 return r;
835}
836
837static uint64_t gf64_mul (uint64_t a, uint64_t b) {
838 uint64_t x = 0;
839 int i = 0;
840 do {
841 x = gf64_mulx (x);
842 if (b & 1) {
843 x ^= a;
844 }
845 b >>= 1;
846 } while (++i < 64);
847 return x;
848}
849
850static uint64_t crc64_power_buf[126] __attribute__ ((aligned(16)));
851
852void crc64_init_power_buf (void) {
853 int n;
854 uint64_t *p = crc64_power_buf;
855 assert (!((uintptr_t) p & 15l));
856 uint64_t a = 1ll << (63 - 7);
857 const uint64_t b = 0xc96c5795d7870f42ll;
858 for (n = 0; n < 63; n++) {
859 p[0] = gf64_mul (a, b);
860 p[1] = a;
861 a = gf64_mulx (gf64_mul (a, a));
862 p += 2;
863 }
864 p = crc64_power_buf;
865 assert (p[3] == 1ll << (63 - 15));
866 assert (p[4] == CRC64_REFLECTED_X95);
867 assert (p[5] == 1ll << (63 - 31));
868 assert (p[6] == CRC64_REFLECTED_X127);
869 assert (p[7] == 1ll);
870 assert (p[8] == CRC64_REFLECTED_X191);
871 assert (p[9] == CRC64_REFLECTED_X127);
872 assert (p[10] == CRC64_REFLECTED_X319);
873 assert (p[11] == CRC64_REFLECTED_X255);
874 assert (crc64_power_buf[125]);
875}
876
877static uint64_t compute_crc64_combine_clmul (uint64_t crc1, uint64_t crc2, int64_t len2) {
878 if (len2 <= 0) {
879 return crc1;
880 }
881 if (!crc64_power_buf[125]) {
882 crc64_init_power_buf ();
883 }
884 v2di D;
885 FASTMOV_RMI64_TO_SSE(D, crc1);
886 D = __builtin_ia32_pslldqi128 (D, 64);
887
888 int n = __builtin_ffsll (len2);
889 uint64_t *p = crc64_power_buf + (2 * (n - 1));
890 len2 >>= n;
891
892 D = __builtin_ia32_pclmulqdq128 ( * ((v2di *) p), D, 0x11);
893
894 while (len2) {
895 p += 2;
896 if (len2 & 1) {
897 v2di E = *((v2di *) p);
898 D = __builtin_ia32_pclmulqdq128 (E, D, 0x11) ^ __builtin_ia32_pclmulqdq128 (E, D, 0x00);
899 }
900 len2 >>= 1;
901 }
902 return crc64_barrett_reduction (D) ^ crc2;
903}
904
905static uint64_t compute_crc64_combine_generic (uint64_t crc1, uint64_t crc2, int64_t len2) {
906 if (len2 <= 0) {
907 return crc1;
908 }
909 if (!crc64_power_buf[125]) {
910 crc64_init_power_buf ();
911 }
912
913 int n = __builtin_ffsll (len2);
914 uint64_t *p = crc64_power_buf + ((2 * (n - 1)) + 1);
915 len2 >>= n;
916
917 crc1 = gf64_mul (crc1, gf64_mulx (*p));
918
919 while (len2) {
920 p += 2;
921 if (len2 & 1) {
922 crc1 = gf64_mul (crc1, gf64_mulx (*p));
923 }
924 len2 >>= 1;
925 }
926 return crc1 ^ crc2;
927}
928
929/********************************* crc32 repair ************************/
930struct fcb_table_entry {
931 unsigned p; //zeta ^ k
932 int i;
933};
934
935static int cmp_fcb_table_entry (const void *a, const void *b) {
936 const struct fcb_table_entry *x = a;
937 const struct fcb_table_entry *y = b;
938 if (x->p < y->p) { return -1; }
939 if (x->p > y->p) { return 1; }
940 if (x->i < y->i) { return -1; }
941 if (x->i > y->i) { return 1; }
942 return 0;
943}
944
945int crc32_find_corrupted_bit (int size, unsigned d) {
946 int i, j;
947 size += 4;
948 int n = size << 3;
949 int r = (int) (sqrt (n) + 0.5);
950 vkprintf (3, "n = %d, r = %d, d = 0x%08x\n", n, r, d);
951 struct fcb_table_entry *T = calloc (r, sizeof (struct fcb_table_entry));
952 assert (T != NULL);
953 T[0].i = 0;
954 T[0].p = 0x80000000u;
955 for (i = 1; i < r; i++) {
956 T[i].i = i;
957 T[i].p = gf32_mulx (T[i-1].p, CRC32_REFLECTED_POLY);
958 }
959 assert (gf32_mulx (0xdb710641, CRC32_REFLECTED_POLY) == 0x80000000);
960 qsort (T, r, sizeof (T[0]), cmp_fcb_table_entry);
961 const unsigned q = gf32_pow (0xdb710641, r, CRC32_REFLECTED_POLY);
962
963 unsigned A[32];
964 A[31] = q;
965 for (i = 30; i >= 0; i--) {
966 A[i] = gf32_mulx (A[i+1], CRC32_REFLECTED_POLY);
967 }
968
969 unsigned x = d;
970 int max_j = n / r, res = -1;
971 for (j = 0; j <= max_j; j++) {
972 int a = -1, b = r;
973 while (b - a > 1) {
974 int c = ((a + b) >> 1);
975 if (T[c].p <= x) { a = c; } else { b = c; }
976 }
977 if (a >= 0 && T[a].p == x) {
978 res = T[a].i + r * j;
979 break;
980 }
981 x = gf32_matrix_times (A, x);
982 }
983 free (T);
984 return res;
985}
986
987int crc32_repair_bit (unsigned char *input, int l, int k) {
988 if (k < 0) {
989 return -1;
990 }
991 int idx = k >> 5, bit = k & 31, i = (l - 1) - (idx - 1) * 4;
992 while (bit >= 8) {
993 i--;
994 bit -= 8;
995 }
996 if (i < 0) {
997 return -2;
998 }
999 if (i >= l) {
1000 return -3;
1001 }
1002 int j = 7 - bit;
1003 input[i] = (unsigned char)(input[i] ^ (1 << j));
1004 return 0;
1005}
1006
1007int crc32_check_and_repair (void *input, int l, unsigned *input_crc32, int force_exit) {
1008 unsigned computed_crc32 = compute_crc32 (input, l);
1009 const unsigned crc32_diff = computed_crc32 ^ (*input_crc32);
1010 if (!crc32_diff) {
1011 return 0;
1012 }
1013 int k = crc32_find_corrupted_bit (l, crc32_diff);
1014 vkprintf (3, "find_corrupted_bit returns %d.\n", k);
1015 int r = crc32_repair_bit (input, l, k);
1016 vkprintf (3, "repair_bit returns %d.\n", r);
1017 if (!r) {
1018 assert (compute_crc32 (input, l) == *input_crc32);
1019 if (force_exit) {
1020 kprintf ("crc32_check_and_repair successfully repair one bit in %d bytes block.\n", l);
1021 }
1022 return 1;
1023 }
1024 if (!(crc32_diff & (crc32_diff - 1))) { /* crc32_diff is power of 2 */
1025 *input_crc32 = computed_crc32;
1026 if (force_exit) {
1027 kprintf ("crc32_check_and_repair successfully repair one bit in crc32 (%d bytes block).\n", l);
1028 }
1029 return 2;
1030 }
1031 assert (!force_exit);
1032 *input_crc32 = computed_crc32;
1033 return -1;
1034}
1035
1036static void crc32_init (void) __attribute__ ((constructor));
1037void crc32_init (void) {
1038 kdb_cpuid_t *p = kdb_cpuid ();
1039 if (p->ecx & 2) {
1040 crc32_partial = crc32_partial_clmul;
1041 crc64_partial = crc64_partial_clmul;
1042 compute_crc32_combine = compute_crc32_combine_clmul;
1043 compute_crc64_combine = compute_crc64_combine_clmul;
1044 } else {
1045 crc32_partial = crc32_partial_generic;
1046 crc64_partial = crc64_partial_one_table;
1047 compute_crc32_combine = compute_crc32_combine_generic;
1048 compute_crc64_combine = compute_crc64_combine_generic;
1049 }
1050}
1051