1 | /* Author: Justin Cappos |
---|
2 | * File: fastsimplexordatastore.c |
---|
3 | * Purpose: The fastsimplexordatastore. A simple, C-based datastore |
---|
4 | */ |
---|
5 | |
---|
6 | #include "Python.h" |
---|
7 | #include "fastsimplexordatastore.h" |
---|
8 | |
---|
9 | /* I've decided not to mess with making this a Python object. |
---|
10 | * Undoubtably I could do so, but it is harder to understand and verify |
---|
11 | * it doesn't have some sort of bug. I'll make the C <-> Python portion |
---|
12 | * straightforward and have an intermediate module that makes this |
---|
13 | * more Pythonic. |
---|
14 | */ |
---|
15 | |
---|
16 | |
---|
17 | // This should be *waaaay* more than we would ever need |
---|
18 | #define STARTING_XORDATASTORE_TABLESIZE 16 |
---|
19 | |
---|
20 | static int xordatastorestablesize = STARTING_XORDATASTORE_TABLESIZE; |
---|
21 | static int xordatastoreinited = 0; |
---|
22 | |
---|
23 | static XORDatastore xordatastoretable[STARTING_XORDATASTORE_TABLESIZE]; |
---|
24 | |
---|
25 | |
---|
26 | |
---|
27 | // Helper |
---|
28 | static inline void XOR_fullblocks(uint64_t *dest, uint64_t *data, long count) { |
---|
29 | register long i; |
---|
30 | for (i=0; i<count; i++) { |
---|
31 | *dest ^= *data; |
---|
32 | dest++; |
---|
33 | data++; |
---|
34 | } |
---|
35 | } |
---|
36 | |
---|
37 | // Helper |
---|
38 | static inline void XOR_byteblocks(char *dest, const char *data, long count) { |
---|
39 | register long i; |
---|
40 | for (i=0; i<count; i++) { |
---|
41 | *dest ^= *data; |
---|
42 | dest++; |
---|
43 | data++; |
---|
44 | } |
---|
45 | } |
---|
46 | |
---|
47 | |
---|
48 | |
---|
49 | // Moves ptr to the next DWORD aligned address. If ptr is DWORD aligned, |
---|
50 | // return ptr. |
---|
51 | static inline char *dword_align(char *ptr) { |
---|
52 | return ptr + (sizeof(uint64_t) - (((long)ptr) % sizeof(uint64_t))) % sizeof(uint64_t); |
---|
53 | } |
---|
54 | |
---|
55 | |
---|
56 | // A helper function that checks to see if the table entry is used or free |
---|
57 | static int is_table_entry_used(int i) { |
---|
58 | return (xordatastoretable[i].raw_datastore != NULL); |
---|
59 | } |
---|
60 | |
---|
61 | |
---|
62 | |
---|
63 | |
---|
64 | // This allocates memory and stores the size / num_blocks for |
---|
65 | // error checking later |
---|
66 | static datastore_descriptor allocate(long block_size, long num_blocks) { |
---|
67 | int i; |
---|
68 | |
---|
69 | // If it isn't inited, let's fill in the table with empty entries |
---|
70 | if (!xordatastoreinited) { |
---|
71 | // start the table as entry |
---|
72 | for (i=0; i<STARTING_XORDATASTORE_TABLESIZE; i++) { |
---|
73 | xordatastoretable[i].numberofblocks = 0; |
---|
74 | xordatastoretable[i].sizeofablock = 0; |
---|
75 | xordatastoretable[i].raw_datastore = NULL; |
---|
76 | xordatastoretable[i].datastore = NULL; |
---|
77 | } |
---|
78 | // We've initted now! |
---|
79 | xordatastoreinited = 1; |
---|
80 | } |
---|
81 | |
---|
82 | for (i=0; i<xordatastorestablesize; i++) { |
---|
83 | // Look for an empty entry |
---|
84 | if (!is_table_entry_used(i)) { |
---|
85 | xordatastoretable[i].numberofblocks = num_blocks; |
---|
86 | xordatastoretable[i].sizeofablock = block_size; |
---|
87 | |
---|
88 | // I allocate a little bit extra so that I can DWORD align it |
---|
89 | xordatastoretable[i].raw_datastore = malloc(num_blocks * block_size + sizeof(uint64_t)); |
---|
90 | // Zero it out |
---|
91 | bzero(xordatastoretable[i].raw_datastore, num_blocks * block_size + sizeof(uint64_t)); |
---|
92 | |
---|
93 | // and align it... |
---|
94 | xordatastoretable[i].datastore = (uint64_t *) dword_align(xordatastoretable[i].raw_datastore); |
---|
95 | return i; |
---|
96 | } |
---|
97 | } |
---|
98 | |
---|
99 | // The table is full! I really should expand it... |
---|
100 | printf("Internal Error: I need to expand the table size (unimplemented)\n"); |
---|
101 | return -1; |
---|
102 | } |
---|
103 | |
---|
104 | |
---|
105 | |
---|
106 | // Python wrapper... |
---|
107 | static PyObject *Allocate(PyObject *module, PyObject *args) { |
---|
108 | long blocksize, numblocks; |
---|
109 | |
---|
110 | if (!PyArg_ParseTuple(args, "ll", &blocksize,&numblocks)) { |
---|
111 | // Incorrect args... |
---|
112 | return NULL; |
---|
113 | } |
---|
114 | |
---|
115 | // This needs to be 64 byte aligned... |
---|
116 | if (blocksize % 64) { |
---|
117 | PyErr_SetString(PyExc_ValueError, "Block size must be a multiple of 64"); |
---|
118 | return NULL; |
---|
119 | } |
---|
120 | |
---|
121 | |
---|
122 | return Py_BuildValue("i",allocate(blocksize, numblocks)); |
---|
123 | |
---|
124 | } |
---|
125 | |
---|
126 | |
---|
127 | |
---|
128 | |
---|
129 | |
---|
130 | |
---|
131 | |
---|
132 | |
---|
133 | |
---|
134 | // This function needs to be fast. It is a good candidate for releasing |
---|
135 | // Python's GIL |
---|
136 | |
---|
137 | static void bitstring_xor_worker(int ds, char *bit_string, long bit_string_length, uint64_t *resultbuffer) { |
---|
138 | long remaininglength = bit_string_length * 8; // convert bytes to bits |
---|
139 | char *current_bit_string_pos; |
---|
140 | current_bit_string_pos = bit_string; |
---|
141 | long long offset = 0; |
---|
142 | int block_size = xordatastoretable[ds].sizeofablock; |
---|
143 | char *datastorebase; |
---|
144 | datastorebase = (char *) xordatastoretable[ds].datastore; |
---|
145 | |
---|
146 | int dwords_per_block = block_size / sizeof(uint64_t); |
---|
147 | |
---|
148 | int bit = 128; |
---|
149 | |
---|
150 | |
---|
151 | while (remaininglength >0) { |
---|
152 | if ((*current_bit_string_pos) & bit) { |
---|
153 | XOR_fullblocks(resultbuffer, (uint64_t *) (datastorebase+offset), dwords_per_block); |
---|
154 | } |
---|
155 | offset += block_size; |
---|
156 | bit /= 2; |
---|
157 | remaininglength -=1; |
---|
158 | if (bit == 0) { |
---|
159 | bit = 128; |
---|
160 | current_bit_string_pos++; |
---|
161 | } |
---|
162 | } |
---|
163 | } |
---|
164 | |
---|
165 | |
---|
166 | |
---|
167 | |
---|
168 | // Does XORs given a bit string. This is the common case and so should be |
---|
169 | // optimized. |
---|
170 | |
---|
171 | // Python Wrapper object |
---|
172 | static PyObject *Produce_Xor_From_Bitstring(PyObject *module, PyObject *args) { |
---|
173 | datastore_descriptor ds; |
---|
174 | int bitstringlength; |
---|
175 | char *bitstringbuffer; |
---|
176 | char *raw_resultbuffer; |
---|
177 | uint64_t *resultbuffer; |
---|
178 | |
---|
179 | if (!PyArg_ParseTuple(args, "is#", &ds, &bitstringbuffer, &bitstringlength)) { |
---|
180 | // Incorrect args... |
---|
181 | return NULL; |
---|
182 | } |
---|
183 | |
---|
184 | // Is the ds valid? |
---|
185 | if (!is_table_entry_used(ds)) { |
---|
186 | PyErr_SetString(PyExc_ValueError, "Bad index for Produce_Xor_From_Bitstring"); |
---|
187 | return NULL; |
---|
188 | } |
---|
189 | |
---|
190 | // Let's prepare a place to put this... |
---|
191 | raw_resultbuffer = malloc(xordatastoretable[ds].sizeofablock+sizeof(uint64_t)); |
---|
192 | // ...and zero it out... |
---|
193 | bzero(raw_resultbuffer, xordatastoretable[ds].sizeofablock+sizeof(uint64_t)); |
---|
194 | |
---|
195 | // ... now let's get a DWORD aligned offset |
---|
196 | resultbuffer = (uint64_t *) dword_align(raw_resultbuffer); |
---|
197 | |
---|
198 | // Let's actually calculate this! |
---|
199 | bitstring_xor_worker(ds, bitstringbuffer, bitstringlength, resultbuffer); |
---|
200 | |
---|
201 | |
---|
202 | |
---|
203 | // okay, let's put it in a buffer |
---|
204 | PyObject *return_str_obj = Py_BuildValue("s#",(char *)resultbuffer,xordatastoretable[ds].sizeofablock); |
---|
205 | |
---|
206 | // clear the buffer |
---|
207 | free(raw_resultbuffer); |
---|
208 | |
---|
209 | return return_str_obj; |
---|
210 | } |
---|
211 | |
---|
212 | |
---|
213 | |
---|
214 | |
---|
215 | |
---|
216 | |
---|
217 | // This is used to populate the datastore. It can also be used to add |
---|
218 | // memoization data. |
---|
219 | |
---|
220 | // Python wrapper (only)... |
---|
221 | static PyObject *SetData(PyObject *module, PyObject *args) { |
---|
222 | long long offset; |
---|
223 | datastore_descriptor ds; |
---|
224 | char *stringbuffer; |
---|
225 | int quantity; // BUG: This probably should be a larger type. |
---|
226 | |
---|
227 | |
---|
228 | if (!PyArg_ParseTuple(args, "iLs#", &ds, &offset, &stringbuffer, &quantity)) { |
---|
229 | // Incorrect args... |
---|
230 | return NULL; |
---|
231 | } |
---|
232 | |
---|
233 | // Is the ds valid? |
---|
234 | if (!is_table_entry_used(ds)) { |
---|
235 | PyErr_SetString(PyExc_ValueError, "Bad index for SetData"); |
---|
236 | return NULL; |
---|
237 | } |
---|
238 | |
---|
239 | // Is this outside of the bounds... |
---|
240 | if (offset + quantity > xordatastoretable[ds].numberofblocks * xordatastoretable[ds].sizeofablock) { |
---|
241 | PyErr_SetString(PyExc_ValueError, "SetData out of bounds"); |
---|
242 | return NULL; |
---|
243 | } |
---|
244 | |
---|
245 | memcpy(((char *)xordatastoretable[ds].datastore)+offset, stringbuffer, quantity); |
---|
246 | |
---|
247 | return Py_BuildValue(""); |
---|
248 | |
---|
249 | } |
---|
250 | |
---|
251 | |
---|
252 | |
---|
253 | |
---|
254 | |
---|
255 | // Returns the data stored at an offset. Note that we move away from |
---|
256 | // blocks here. We might as well do the math in Python. We use this to do |
---|
257 | // integrity checking and serve legacy clients. It is not needed for the |
---|
258 | // usual mirror actions. |
---|
259 | |
---|
260 | // Python wrapper (only)... |
---|
261 | static PyObject *GetData(PyObject *module, PyObject *args) { |
---|
262 | long long offset, quantity; |
---|
263 | datastore_descriptor ds; |
---|
264 | |
---|
265 | if (!PyArg_ParseTuple(args, "iLL", &ds, &offset, &quantity)) { |
---|
266 | // Incorrect args... |
---|
267 | return NULL; |
---|
268 | } |
---|
269 | |
---|
270 | // Is the ds valid? |
---|
271 | if (!is_table_entry_used(ds)) { |
---|
272 | PyErr_SetString(PyExc_ValueError, "Bad index for GetData"); |
---|
273 | return NULL; |
---|
274 | } |
---|
275 | |
---|
276 | // Is this outside of the bounds... |
---|
277 | if (offset + quantity > xordatastoretable[ds].numberofblocks * xordatastoretable[ds].sizeofablock) { |
---|
278 | PyErr_SetString(PyExc_ValueError, "GetData out of bounds"); |
---|
279 | return NULL; |
---|
280 | } |
---|
281 | |
---|
282 | return Py_BuildValue("s#", ((char *)xordatastoretable[ds].datastore)+offset, quantity); |
---|
283 | |
---|
284 | } |
---|
285 | |
---|
286 | |
---|
287 | |
---|
288 | |
---|
289 | // Cleans up the datastore. I don't know when or why this would be used, but |
---|
290 | // it is included for completeness. |
---|
291 | static void deallocate(datastore_descriptor ds){ |
---|
292 | if (!is_table_entry_used(ds)) { |
---|
293 | printf("Error, double deallocate on %d. Ignoring.\n",ds); |
---|
294 | } |
---|
295 | else { |
---|
296 | free(xordatastoretable[ds].raw_datastore); |
---|
297 | xordatastoretable[ds].numberofblocks = 0; |
---|
298 | xordatastoretable[ds].sizeofablock = 0; |
---|
299 | xordatastoretable[ds].raw_datastore = NULL; |
---|
300 | xordatastoretable[ds].datastore = NULL; |
---|
301 | } |
---|
302 | } |
---|
303 | |
---|
304 | |
---|
305 | |
---|
306 | // Python wrapper... |
---|
307 | static PyObject *Deallocate(PyObject *module, PyObject *args) { |
---|
308 | datastore_descriptor ds; |
---|
309 | |
---|
310 | if (!PyArg_ParseTuple(args, "i", &ds)) { |
---|
311 | // Incorrect args... |
---|
312 | return NULL; |
---|
313 | } |
---|
314 | |
---|
315 | deallocate(ds); |
---|
316 | |
---|
317 | return Py_BuildValue(""); |
---|
318 | |
---|
319 | } |
---|
320 | |
---|
321 | |
---|
322 | |
---|
323 | |
---|
324 | // I just have this around for testing |
---|
325 | static char *slow_XOR(char *dest, const char *data, long stringlength) { |
---|
326 | XOR_byteblocks(dest,data,stringlength); |
---|
327 | return dest; |
---|
328 | } |
---|
329 | |
---|
330 | |
---|
331 | // This XORs data with the starting data in dest |
---|
332 | static char *fast_XOR(char *dest, const char *data, long stringlength) { |
---|
333 | int leadingmisalignedbytes; |
---|
334 | long fulllengthblocks; |
---|
335 | int remainingbytes; |
---|
336 | |
---|
337 | // If it's shorter than a block, use char-based XOR |
---|
338 | if (stringlength <= sizeof(uint64_t)) { |
---|
339 | return slow_XOR(dest,data,stringlength); |
---|
340 | } |
---|
341 | |
---|
342 | // I would guess these should be similarly DWORD aligned... |
---|
343 | if (((long) dest) % sizeof(uint64_t) == ((long) data) % sizeof(uint64_t)) { |
---|
344 | printf("Error, assumed that dest and data are identically DWORD aligned!\n"); |
---|
345 | return NULL; |
---|
346 | } |
---|
347 | |
---|
348 | |
---|
349 | // Let's XOR any stray bytes at the front... |
---|
350 | |
---|
351 | // This is the number of bytes that are before we get DWORD aligned |
---|
352 | // To compute this we do (8 - (pos % 8)) % 8) |
---|
353 | leadingmisalignedbytes = (sizeof(uint64_t) - (((long)data) % sizeof(uint64_t))) % sizeof(uint64_t); |
---|
354 | |
---|
355 | XOR_byteblocks(dest, data, leadingmisalignedbytes); |
---|
356 | |
---|
357 | |
---|
358 | // The middle will be done with full sized blocks... |
---|
359 | fulllengthblocks = (stringlength-leadingmisalignedbytes) / sizeof(uint64_t); |
---|
360 | |
---|
361 | XOR_fullblocks((uint64_t *) (dest+leadingmisalignedbytes), (uint64_t *) (data + leadingmisalignedbytes), fulllengthblocks); |
---|
362 | |
---|
363 | |
---|
364 | |
---|
365 | // XOR anything left over at the end... |
---|
366 | remainingbytes = stringlength - (leadingmisalignedbytes + fulllengthblocks * sizeof(uint64_t)); |
---|
367 | XOR_byteblocks(dest+stringlength-remainingbytes, data+stringlength-remainingbytes, remainingbytes); |
---|
368 | |
---|
369 | return dest; |
---|
370 | |
---|
371 | } |
---|
372 | |
---|
373 | |
---|
374 | // A convenience function for XORing blocks of data together. It is used by |
---|
375 | // the client to compute the result and XOR bitstrings |
---|
376 | static PyObject *do_xor(PyObject *module, PyObject *args) { |
---|
377 | const char *str1, *str2; |
---|
378 | long length; |
---|
379 | char *destbuffer; |
---|
380 | char *useddestbuffer; |
---|
381 | |
---|
382 | |
---|
383 | // Parse the calling arguments |
---|
384 | if (!PyArg_ParseTuple(args, "ssl", &str1,&str2, &length)) { |
---|
385 | return NULL; |
---|
386 | } |
---|
387 | |
---|
388 | // Allocate enough memory to hold the result... |
---|
389 | destbuffer = malloc(length+sizeof(uint64_t)); |
---|
390 | |
---|
391 | if (destbuffer == NULL) { |
---|
392 | PyErr_NoMemory(); |
---|
393 | PyErr_SetString(PyExc_MemoryError, "Could not allocate memory for XOR."); |
---|
394 | return NULL; |
---|
395 | } |
---|
396 | |
---|
397 | // let's align this... |
---|
398 | useddestbuffer = destbuffer + sizeof(uint64_t) - ((long) destbuffer %sizeof(uint64_t)); |
---|
399 | |
---|
400 | // ... copy str1 over |
---|
401 | memcpy(useddestbuffer, str1, length); |
---|
402 | |
---|
403 | // Now, let's do the XOR... |
---|
404 | fast_XOR(useddestbuffer, str2, length); |
---|
405 | |
---|
406 | // Okay, let's return the answer! |
---|
407 | PyObject *return_str_obj = Py_BuildValue("s#",useddestbuffer,length); |
---|
408 | |
---|
409 | // (after freeing the buffer) |
---|
410 | free(destbuffer); |
---|
411 | |
---|
412 | return return_str_obj; |
---|
413 | |
---|
414 | } |
---|
415 | |
---|
416 | |
---|
417 | |
---|
418 | static PyMethodDef MyFastSimpleXORDatastoreMethods [] = { |
---|
419 | {"Allocate", Allocate, METH_VARARGS, "Allocate a datastore."}, |
---|
420 | {"Deallocate", Deallocate, METH_VARARGS, "Deallocate a datastore."}, |
---|
421 | {"GetData", GetData, METH_VARARGS, "Reads data out of a datastore."}, |
---|
422 | {"SetData", SetData, METH_VARARGS, "Puts data into the datastore."}, |
---|
423 | {"Produce_Xor_From_Bitstring", Produce_Xor_From_Bitstring, METH_VARARGS, "Extract XOR from datastore."}, |
---|
424 | {"do_xor", do_xor, METH_VARARGS, "does the XOR of two equal length strings."}, |
---|
425 | {NULL, NULL, 0, NULL} |
---|
426 | }; |
---|
427 | |
---|
428 | |
---|
429 | PyMODINIT_FUNC initfastsimplexordatastore_c(void) { |
---|
430 | Py_InitModule("fastsimplexordatastore_c", MyFastSimpleXORDatastoreMethods); |
---|
431 | } |
---|
432 | |
---|
433 | |
---|