source: code/fastsimplexordatastore.c

Last change on this file was 23, checked in by trishank, 7 years ago

Add pre-release upPIR from June 2011.

File size: 11.4 KB
Line 
1/* Author: Justin Cappos
2 * File: fastsimplexordatastore.c
3 * Purpose: The fastsimplexordatastore.   A simple, C-based datastore
4 */
5
6#include "Python.h"
7#include "fastsimplexordatastore.h"
8
9/* I've decided not to mess with making this a Python object.   
10 * Undoubtably I could do so, but it is harder to understand and verify
11 * it doesn't have some sort of bug.   I'll make the C <-> Python portion
12 * straightforward and have an intermediate module that makes this
13 * more Pythonic.
14 */
15
16
17// This should be *waaaay* more than we would ever need
18#define STARTING_XORDATASTORE_TABLESIZE 16
19
20static int xordatastorestablesize = STARTING_XORDATASTORE_TABLESIZE;
21static int xordatastoreinited = 0;
22
23static XORDatastore xordatastoretable[STARTING_XORDATASTORE_TABLESIZE];
24
25
26
27// Helper
28static inline void XOR_fullblocks(uint64_t *dest, uint64_t *data, long count) {
29  register long i;
30  for (i=0; i<count; i++) {
31    *dest ^= *data;
32    dest++;
33    data++;
34  }
35}
36
37// Helper
38static inline void XOR_byteblocks(char *dest, const char *data, long count) {
39  register long i;
40  for (i=0; i<count; i++) {
41    *dest ^= *data;
42    dest++;
43    data++;
44  }
45}
46
47
48
49// Moves ptr to the next DWORD aligned address.   If ptr is DWORD aligned,
50// return ptr.
51static inline char *dword_align(char *ptr) {
52  return ptr + (sizeof(uint64_t) - (((long)ptr) % sizeof(uint64_t))) % sizeof(uint64_t);
53}
54
55
56// A helper function that checks to see if the table entry is used or free
57static int is_table_entry_used(int i) {
58  return (xordatastoretable[i].raw_datastore != NULL);
59}
60
61
62
63
64// This allocates memory and stores the size / num_blocks for
65// error checking later
66static datastore_descriptor allocate(long block_size, long num_blocks)  {
67  int i;
68
69  // If it isn't inited, let's fill in the table with empty entries
70  if (!xordatastoreinited) {
71    // start the table as entry
72    for (i=0; i<STARTING_XORDATASTORE_TABLESIZE; i++) {
73      xordatastoretable[i].numberofblocks = 0;
74      xordatastoretable[i].sizeofablock = 0;
75      xordatastoretable[i].raw_datastore = NULL;
76      xordatastoretable[i].datastore = NULL;
77    }
78    // We've initted now!
79    xordatastoreinited = 1;
80  }
81
82  for (i=0; i<xordatastorestablesize; i++) {
83    // Look for an empty entry
84    if (!is_table_entry_used(i)) {
85      xordatastoretable[i].numberofblocks = num_blocks;
86      xordatastoretable[i].sizeofablock = block_size;
87
88      // I allocate a little bit extra so that I can DWORD align it
89      xordatastoretable[i].raw_datastore = malloc(num_blocks * block_size + sizeof(uint64_t));
90      // Zero it out
91      bzero(xordatastoretable[i].raw_datastore, num_blocks * block_size + sizeof(uint64_t));
92     
93      // and align it...
94      xordatastoretable[i].datastore = (uint64_t *) dword_align(xordatastoretable[i].raw_datastore);
95      return i;
96    }
97  }
98
99  // The table is full!   I really should expand it...
100  printf("Internal Error: I need to expand the table size (unimplemented)\n");
101  return -1;
102}
103
104
105
106// Python wrapper...
107static PyObject *Allocate(PyObject *module, PyObject *args) {
108  long blocksize, numblocks;
109
110  if (!PyArg_ParseTuple(args, "ll", &blocksize,&numblocks)) {
111    // Incorrect args...
112    return NULL;
113  }
114
115  // This needs to be 64 byte aligned...
116  if (blocksize % 64) {
117    PyErr_SetString(PyExc_ValueError, "Block size must be a multiple of 64");
118    return NULL;
119  }
120
121
122  return Py_BuildValue("i",allocate(blocksize, numblocks));
123
124}
125
126
127
128
129
130
131
132
133
134// This function needs to be fast.   It is a good candidate for releasing
135// Python's GIL
136
137static void bitstring_xor_worker(int ds, char *bit_string, long bit_string_length, uint64_t *resultbuffer) {
138  long remaininglength = bit_string_length * 8;  // convert bytes to bits
139  char *current_bit_string_pos;
140  current_bit_string_pos = bit_string;
141  long long offset = 0;
142  int block_size = xordatastoretable[ds].sizeofablock;
143  char *datastorebase;
144  datastorebase = (char *) xordatastoretable[ds].datastore;
145
146  int dwords_per_block = block_size / sizeof(uint64_t);
147
148  int bit = 128;
149
150
151  while (remaininglength >0) {
152    if ((*current_bit_string_pos) & bit) {
153      XOR_fullblocks(resultbuffer, (uint64_t *) (datastorebase+offset), dwords_per_block);
154    }
155    offset += block_size;
156    bit /= 2;
157    remaininglength -=1;
158    if (bit == 0) {
159      bit = 128;
160      current_bit_string_pos++;
161    }
162  }
163}
164
165
166
167
168// Does XORs given a bit string.   This is the common case and so should be
169// optimized.
170
171// Python Wrapper object
172static PyObject *Produce_Xor_From_Bitstring(PyObject *module, PyObject *args) {
173  datastore_descriptor ds;
174  int bitstringlength;
175  char *bitstringbuffer;
176  char *raw_resultbuffer;
177  uint64_t *resultbuffer;
178
179  if (!PyArg_ParseTuple(args, "is#", &ds, &bitstringbuffer, &bitstringlength)) {
180    // Incorrect args...
181    return NULL;
182  }
183
184  // Is the ds valid?
185  if (!is_table_entry_used(ds)) {
186    PyErr_SetString(PyExc_ValueError, "Bad index for Produce_Xor_From_Bitstring");
187    return NULL;
188  }
189
190  // Let's prepare a place to put this...
191  raw_resultbuffer = malloc(xordatastoretable[ds].sizeofablock+sizeof(uint64_t));
192  // ...and zero it out...
193  bzero(raw_resultbuffer, xordatastoretable[ds].sizeofablock+sizeof(uint64_t));
194
195  // ... now let's get a DWORD aligned offset
196  resultbuffer = (uint64_t *) dword_align(raw_resultbuffer);
197
198  // Let's actually calculate this!
199  bitstring_xor_worker(ds, bitstringbuffer, bitstringlength, resultbuffer);
200
201
202
203  // okay, let's put it in a buffer
204  PyObject *return_str_obj = Py_BuildValue("s#",(char *)resultbuffer,xordatastoretable[ds].sizeofablock);
205
206  // clear the buffer
207  free(raw_resultbuffer);
208
209  return return_str_obj;
210}
211
212
213
214
215
216
217// This is used to populate the datastore.   It can also be used to add
218// memoization data.
219
220// Python wrapper (only)...
221static PyObject *SetData(PyObject *module, PyObject *args) {
222  long long offset;
223  datastore_descriptor ds;
224  char *stringbuffer;
225  int quantity; // BUG: This probably should be a larger type.   
226
227
228  if (!PyArg_ParseTuple(args, "iLs#", &ds, &offset, &stringbuffer, &quantity)) {
229    // Incorrect args...
230    return NULL;
231  }
232
233  // Is the ds valid?
234  if (!is_table_entry_used(ds)) {
235    PyErr_SetString(PyExc_ValueError, "Bad index for SetData");
236    return NULL;
237  }
238
239  // Is this outside of the bounds...
240  if (offset + quantity > xordatastoretable[ds].numberofblocks * xordatastoretable[ds].sizeofablock) {
241    PyErr_SetString(PyExc_ValueError, "SetData out of bounds");
242    return NULL;
243  }
244
245  memcpy(((char *)xordatastoretable[ds].datastore)+offset, stringbuffer, quantity);
246
247  return Py_BuildValue("");
248
249}
250
251
252
253
254
255// Returns the data stored at an offset.   Note that we move away from
256// blocks here.   We might as well do the math in Python.   We use this to do
257// integrity checking and serve legacy clients.   It is not needed for the
258// usual mirror actions.
259
260// Python wrapper (only)...
261static PyObject *GetData(PyObject *module, PyObject *args) {
262  long long offset, quantity;
263  datastore_descriptor ds;
264
265  if (!PyArg_ParseTuple(args, "iLL", &ds, &offset, &quantity)) {
266    // Incorrect args...
267    return NULL;
268  }
269
270  // Is the ds valid?
271  if (!is_table_entry_used(ds)) {
272    PyErr_SetString(PyExc_ValueError, "Bad index for GetData");
273    return NULL;
274  }
275
276  // Is this outside of the bounds...
277  if (offset + quantity > xordatastoretable[ds].numberofblocks * xordatastoretable[ds].sizeofablock) {
278    PyErr_SetString(PyExc_ValueError, "GetData out of bounds");
279    return NULL;
280  }
281
282  return Py_BuildValue("s#", ((char *)xordatastoretable[ds].datastore)+offset, quantity);
283
284}
285
286
287
288
289// Cleans up the datastore.   I don't know when or why this would be used, but
290// it is included for completeness.
291static void deallocate(datastore_descriptor ds){
292  if (!is_table_entry_used(ds)) {
293    printf("Error, double deallocate on %d.   Ignoring.\n",ds);
294  }
295  else {
296    free(xordatastoretable[ds].raw_datastore);
297    xordatastoretable[ds].numberofblocks = 0;
298    xordatastoretable[ds].sizeofablock = 0;
299    xordatastoretable[ds].raw_datastore = NULL;
300    xordatastoretable[ds].datastore = NULL;
301  }
302}
303
304
305
306// Python wrapper...
307static PyObject *Deallocate(PyObject *module, PyObject *args) {
308  datastore_descriptor ds;
309
310  if (!PyArg_ParseTuple(args, "i", &ds)) {
311    // Incorrect args...
312    return NULL;
313  }
314
315  deallocate(ds);
316
317  return Py_BuildValue("");
318
319}
320
321
322
323
324// I just have this around for testing
325static char *slow_XOR(char *dest, const char *data, long stringlength) {
326  XOR_byteblocks(dest,data,stringlength);
327  return dest;
328}
329
330
331// This XORs data with the starting data in dest
332static char *fast_XOR(char *dest, const char *data, long stringlength) {
333  int leadingmisalignedbytes;
334  long fulllengthblocks;
335  int remainingbytes;
336
337  // If it's shorter than a block, use char-based XOR
338  if (stringlength <= sizeof(uint64_t)) {
339    return slow_XOR(dest,data,stringlength);
340  }
341
342  // I would guess these should be similarly DWORD aligned...
343  if (((long) dest) % sizeof(uint64_t) == ((long) data) % sizeof(uint64_t)) {
344    printf("Error, assumed that dest and data are identically DWORD aligned!\n");
345    return NULL;
346  }
347
348
349  // Let's XOR any stray bytes at the front...
350
351  // This is the number of bytes that are before we get DWORD aligned
352  // To compute this we do (8 - (pos % 8)) % 8)
353  leadingmisalignedbytes = (sizeof(uint64_t) - (((long)data) % sizeof(uint64_t))) % sizeof(uint64_t);
354
355  XOR_byteblocks(dest, data, leadingmisalignedbytes);
356
357
358  // The middle will be done with full sized blocks...
359  fulllengthblocks = (stringlength-leadingmisalignedbytes) / sizeof(uint64_t);
360
361  XOR_fullblocks((uint64_t *) (dest+leadingmisalignedbytes), (uint64_t *) (data + leadingmisalignedbytes), fulllengthblocks);
362
363
364
365  // XOR anything left over at the end...
366  remainingbytes = stringlength - (leadingmisalignedbytes + fulllengthblocks * sizeof(uint64_t));
367  XOR_byteblocks(dest+stringlength-remainingbytes, data+stringlength-remainingbytes, remainingbytes);
368
369  return dest;
370
371}
372
373
374// A convenience function for XORing blocks of data together.   It is used by
375// the client to compute the result and XOR bitstrings
376static PyObject *do_xor(PyObject *module, PyObject *args) {
377  const char *str1, *str2;
378  long length;
379  char *destbuffer;
380  char *useddestbuffer;
381
382
383  // Parse the calling arguments
384  if (!PyArg_ParseTuple(args, "ssl", &str1,&str2, &length)) {
385    return NULL;
386  }
387
388  // Allocate enough memory to hold the result...
389  destbuffer = malloc(length+sizeof(uint64_t));
390
391  if (destbuffer == NULL) {
392    PyErr_NoMemory();
393    PyErr_SetString(PyExc_MemoryError, "Could not allocate memory for XOR.");
394    return NULL;
395  }
396
397  // let's align this...
398  useddestbuffer = destbuffer + sizeof(uint64_t) - ((long) destbuffer %sizeof(uint64_t));
399
400  // ... copy str1 over
401  memcpy(useddestbuffer, str1, length);
402
403  // Now, let's do the XOR...
404  fast_XOR(useddestbuffer, str2, length);
405 
406  // Okay, let's return the answer!
407  PyObject *return_str_obj = Py_BuildValue("s#",useddestbuffer,length);
408
409  // (after freeing the buffer)
410  free(destbuffer);
411
412  return return_str_obj;
413
414}
415
416
417
418static PyMethodDef MyFastSimpleXORDatastoreMethods [] = {
419  {"Allocate", Allocate, METH_VARARGS, "Allocate a datastore."},
420  {"Deallocate", Deallocate, METH_VARARGS, "Deallocate a datastore."},
421  {"GetData", GetData, METH_VARARGS, "Reads data out of a datastore."},
422  {"SetData", SetData, METH_VARARGS, "Puts data into the datastore."},
423  {"Produce_Xor_From_Bitstring", Produce_Xor_From_Bitstring, METH_VARARGS, "Extract XOR from datastore."},
424  {"do_xor", do_xor, METH_VARARGS, "does the XOR of two equal length strings."},
425  {NULL, NULL, 0, NULL}
426};
427
428
429PyMODINIT_FUNC initfastsimplexordatastore_c(void) {
430  Py_InitModule("fastsimplexordatastore_c", MyFastSimpleXORDatastoreMethods);
431}
432
433
Note: See TracBrowser for help on using the repository browser.