source: code/simplexordatastore.py

Last change on this file was 23, checked in by trishank, 7 years ago

Add pre-release upPIR from June 2011.

File size: 9.8 KB
Line 
1"""
2<Author>
3  Justin Cappos
4  (inspired from a previous version by Geremy Condra)
5
6<Start Date>
7  May 15th, 2011
8
9<Description>
10  Library code that emulates the upPIR XORdatastore.   This is likely to only
11  be used for testing.   There will be a C version that will be much, much
12  faster
13
14"""
15
16import math
17
18
19def do_xor(string_a, string_b):
20  """
21  <Purpose>
22    Produce the XOR of two equal length strings
23
24  <Arguments>
25    string_a, string_b: the strings to XOR
26 
27  <Side Effects>
28    None
29
30  <Exceptions>   
31    ValueError if the strings are of unequal lengths
32    TypeError if the strings are not strings
33
34  <Returns>
35    The XORed result.   
36  """
37  if type(string_a) != str or type(string_b) != str:
38    raise TypeError("do_xor called with a non-string")
39
40  if len(string_a) != len(string_b):
41    raise ValueError("do_xor requires strings of the same length")
42
43  result = ''
44
45  for pos in range(len(string_a)):
46    # add the XOR of the two characters.   I must convert them to do so...
47    result = result + chr( ord(string_a[pos]) ^ ord(string_b[pos]) )
48
49  return result
50
51
52class XORDatastore:
53  """
54  <Purpose>
55    Class that has information for an XORdatastore.   This data structure can
56    quickly XOR blocks of data that it stores.   
57
58  <Side Effects>
59    None.
60
61  <Example Use>
62    # build an XORdatastore with 16 1KB blocks.
63    letterxordatastore = XORDatastore(1024, 16)
64
65    # fill the XORdatastore with 1KB blocks with "A"-"P"
66    startpos = 0
67    for char in range(ord("A"), ord("Q")):
68      # put 1K of those chars in...
69      letterxordatastore.set_data(startpos, chr(char) * 1024)
70      startpos = startpos + 1024
71
72    # can read data out...
73    print letterxordatastore.get_data(2000, 1)
74    # Should print 'B' as this is the 1 thousandth letter...
75
76    # let's create a bitstring that uses A, C, and P.   
77    bitstring = chr(int('10100000', 2)) + chr(int('00000001',2))
78    xorresult = letterxordatastore.produce_xor_from_bitstring(bitstring)
79
80    print xorresult[0]
81    # this should be 'A' ^ 'C' ^ 'P' which is 'R'
82
83  """
84
85  # this is the private, internal storage area for data...
86  _blocks = []
87
88  # these are public so that a caller can read information about a created
89  # datastore.   They should not be changed.   
90  numberofblocks = None
91  sizeofblocks = None
92 
93  def __init__(self, block_size, num_blocks):  # allocate
94    """
95    <Purpose>
96      Allocate a place to store data for efficient XOR.   
97
98    <Arguments>
99      block_size: the size of each block.   This must be a positive int / long.
100                  The value must be a multiple of 64
101     
102      num_blocks: the number of blocks.   This must be a positive integer
103
104    <Exceptions>
105      TypeError is raised if invalid parameters are given.
106
107    """
108
109    if type(block_size) != int and type(block_size) != long:
110      raise TypeError("Block size must be an integer")
111
112    if block_size <= 0:
113      raise TypeError("Block size must be positive")
114
115    if block_size %64 != 0:
116      raise TypeError("Block size must be a multiple of 64")
117
118    if type(num_blocks) != int and type(num_blocks) != long:
119      raise TypeError("Number of blocks must be an integer")
120
121    if num_blocks <= 0:
122      raise TypeError("Number of blocks must be positive")
123
124
125    self.numberofblocks = num_blocks
126    self.sizeofblocks = block_size
127
128    # let's create appropriately sized strings of all zero characters.   This
129    # will serve as padding if there are 'gaps' in the data added.
130    self._blocks = []
131    for currentblocknumber in range(self.numberofblocks):
132      self._blocks.append(chr(0) * self.sizeofblocks)
133
134   
135
136  def produce_xor_from_bitstring(self, bitstring):
137    """
138    <Purpose>
139      Returns an XORed block from an XORdatastore.   It will always return
140      a string of the size of the datastore blocks
141
142    <Arguments>
143      bitstring: a string of bits that indicates what to XOR.   The length
144                 of this string must be ceil(numberofblocks / 8.0).   Extra
145                 bits are ignored (e.g. if are 10 blocks, the last
146                 six bits are ignored).
147     
148    <Exceptions>
149      TypeError is raised if the bitstring is invalid
150
151    <Returns>
152      The XORed block.
153
154    """
155    if type(bitstring) != str:
156      raise TypeError("bitstring must be a string")
157
158    if len(bitstring) != math.ceil(self.numberofblocks/8.0):
159      raise TypeError("bitstring is not of the correct length")
160
161    # start with an empty string of the right size...
162    currentblock = chr(0) * self.sizeofblocks
163   
164    # This is the block we are looking at now
165    currentblocknumber = 0
166
167    for thisbyte in bitstring:
168
169      for bitnumber in range(0,8):
170
171        # find the value we need to & the data with.
172        bitvalue = 2**(7-bitnumber)
173       
174        # If it's a one...
175        if ord(thisbyte) & bitvalue:
176   
177          # ... and we're not past the end of the string...
178          if currentblocknumber < self.numberofblocks:
179            # ... do the xor
180            currentblock = do_xor(currentblock, self._blocks[currentblocknumber])
181
182        # regardless, we are done with this block.
183        currentblocknumber = currentblocknumber + 1
184
185    # let's return the result!
186    return currentblock
187     
188
189
190
191  def set_data(self, offset, data_to_add):
192    """
193    <Purpose>
194      Sets the raw data in an XORdatastore.   It ignores block layout, etc.
195
196    <Arguments>
197      offset: this is a non-negative integer that must be less than the
198              numberofblocks * blocksize.   
199     
200      data_to_add: the string that should be added.   offset + len(data_to_add)
201                must be less than the numberofblocks * blocksize.
202     
203    <Exceptions>
204      TypeError if the arguments are the wrong type or have invalid values.
205
206    <Returns>
207      None
208
209    """
210    if type(offset) != int and type(offset) != long:
211      raise TypeError("Offset must be an integer")
212
213    if offset < 0:
214      raise TypeError("Offset must be non-negative")
215
216    if type(data_to_add) != str:
217      raise TypeError("Data_to_add to XORdatastore must be a string.")
218
219    if offset + len(data_to_add) > self.numberofblocks * self.sizeofblocks:
220      raise TypeError("Offset + added data overflows the XORdatastore")
221
222    (startblock,startoffset) = self._find_blockloc_from_offset(offset)
223    (endblock, endoffset) = self._find_blockloc_from_offset(offset+len(data_to_add))
224   
225    # Case 1: this does not cross blocks
226    if startblock == endblock:
227
228      # insert the string into the block...
229      self._blocks[startblock] = self._blocks[startblock][:startoffset] + data_to_add + self._blocks[startblock][endoffset:]
230 
231      return
232
233    # Case 2: this crosses blocks (or ends a single block)
234
235    # we'll add the string starting with the first block...
236    self._blocks[startblock] = self._blocks[startblock][:startoffset] + data_to_add[:self.sizeofblocks-startoffset]
237
238    amountadded = self.sizeofblocks - startoffset
239
240    # now add in the 'middle' blocks.   This is all of the blocks
241    # after the start and before the end
242    for currentblock in range(startblock+1, endblock):
243      self._blocks[currentblock] = data_to_add[amountadded:amountadded+self.sizeofblocks]
244      amountadded = amountadded + self.sizeofblocks
245
246
247    # finally, add the end block.   Add everything left over and pad with the
248    # previous block data.
249    if endoffset > 0:
250      self._blocks[endblock] = data_to_add[amountadded:] + self._blocks[endblock][len(data_to_add)-amountadded:]
251   
252
253
254
255
256
257
258  def get_data(self, offset, quantity):
259    """
260    <Purpose>
261      Returns raw data from an XORdatastore.   It ignores block layout, etc.
262
263    <Arguments>
264      offset: this is a non-negative integer that must be less than the
265              numberofblocks * blocksize.   
266     
267      quantity: quantity must be a positive integer.   offset + quantity
268                must be less than the numberofblocks * blocksize.
269     
270    <Exceptions>
271      TypeError if the arguments are the wrong type or have invalid values.
272
273    <Returns>
274      A string containing the data.
275
276    """
277    if type(offset) != int and type(offset) != long:
278      raise TypeError("Offset must be an integer")
279
280    if offset < 0:
281      raise TypeError("Offset must be non-negative")
282
283    if type(quantity) != int and type(quantity) != long:
284      raise TypeError("Quantity must be an integer")
285
286    if quantity <= 0:
287      raise TypeError("Quantity must be positive")
288
289    if offset + quantity > self.numberofblocks * self.sizeofblocks:
290      raise TypeError("Quantity + offset is larger than XORdatastore")
291
292
293    # Let's get the block information
294    (startblock,startoffset) = self._find_blockloc_from_offset(offset)
295    (endblock, endoffset) = self._find_blockloc_from_offset(offset+quantity)
296
297    # Case 1: this does not cross blocks
298    if startblock == endblock:
299      return self._blocks[startblock][startoffset:endoffset]
300
301    # Case 2: this crosses blocks
302
303    # we'll build up the string starting with the first block...
304    currentstring = self._blocks[startblock][startoffset:]
305
306    # now add in the 'middle' blocks.   This is all of the blocks
307    # after the start and before the end
308    for currentblock in range(startblock+1, endblock):
309      currentstring += self._blocks[currentblock]
310
311    # this check is needed because we might be past the last block.
312    if endoffset > 0:
313      # finally, add the end block.
314      currentstring += self._blocks[endblock][:endoffset]
315   
316    # and return the result
317    return currentstring
318
319
320
321  def _find_blockloc_from_offset(self, offset):
322    # Private helper function that translates an offset into (block, offset)
323    assert(offset >=0)
324    assert(offset <= self.numberofblocks * self.sizeofblocks)
325
326    return (offset / self.sizeofblocks, offset % self.sizeofblocks)
327
328
329
330  def __del__(self):   # deallocate
331    """
332    <Purpose>
333      Deallocate the XORdatastore
334
335    <Arguments>
336      None
337
338    <Exceptions>
339      None
340
341    """
342    # Other implementations may need to do something here.   We need not...
343    pass
344
345
Note: See TracBrowser for help on using the repository browser.