source: code/uppirlib.py @ 204

Last change on this file since 204 was 23, checked in by trishank, 7 years ago

Add pre-release upPIR from June 2011.

File size: 23.8 KB
Line 
1"""
2<Author>  Justin Cappos
3  (inspired from a previous version by Geremy Condra)
4
5<Start Date>
6  May 16th, 2011
7
8<Description>
9  Lots of helper code for upPIR.   Much of this code will be used multiple
10  places, but some many not.   Anything that is at least somewhat general will
11  live here.
12 
13"""
14
15import sys
16
17# used for os.path.exists, os.path.join and os.walk
18import os
19
20# only need ceil
21import math
22
23
24import socket
25
26# use this to turn the stream abstraction into a message abstraction...
27import session
28
29
30# Check the python version.   It's pretty crappy to do this from a library,
31# but it's an easy way to check this universally
32if sys.version_info[0] != 2 or sys.version_info[1] < 5:
33  print "Requires Python >= 2.5 and < 3.0"
34  sys.exit(1)
35
36# A safe way to serialize / deserialize network data
37if sys.version_info[1] == 5:
38  try:
39    import simplejson as json
40  except ImportError:
41    # This may have plausibly been forgotten
42    print "Requires simplejson on Python 2.5.X"
43    sys.exit(1)
44else:
45  # This really should be there.   Let's ignore the try-except block...
46  import json
47
48
49import hashlib
50
51
52# Exceptions...
53
54class FileNotFound(Exception):
55  """The file could not be found"""
56
57class IncorrectFileContents(Exception):
58  """The contents of the file do not match the manifest"""
59
60
61
62# these keys must exist in a manifest dictionary.
63_required_manifest_keys = ['manifestversion', 'blocksize', 'blockcount',
64                           'blockhashlist', 'hashalgorithm',
65                           'vendorhostname', 'vendorport',
66                           'manifesthash', 'fileinfolist' ]
67
68# an example manifest might look like:
69# {'manifestversion':"1.0", 'blocksize':1024, 'blockcount':100,
70#  'blockhashlist':['ab3...', ''2de...', ...], 'hashalgorithm':'sha1-base64',
71#  'vendorhostname':'blackbox.cs.washington.edu', vendorport:62293,
72#  'manifesthash':'42a...',
73#  'fileinfolist':[{'filename':'file1',
74#                   'hash':'a8...',
75#                   'offset':1584,
76#                   'length':1023),   # (do I need this?)
77#                  {'filename':'foo/file2', # (next file listed...)
78#                   'hash':'4f...',
79#                   'offset':2607,   
80#                   'length':63451},  #  (do I need this?)
81#                   ...]
82
83def _validate_manifest(manifest):
84  # private function that validates the manifest is okay
85  # it raises a TypeError if it's not valid for some reason
86  if type(manifest) != dict:
87    raise TypeError("Manifest must be a dict!")
88
89  # check for the required keys
90  for key in _required_manifest_keys:
91    if key not in manifest:
92      raise TypeError("Manifest must contain key: "+key+"!")
93
94  # check specific things
95  if len(manifest['blockhashlist']) != manifest['blockcount']:
96    raise TypeError("There must be a hash for every manifest block")
97
98  # otherwise, I guess I'll let this slide.   I don't want the checking to
99  # be too version specific 
100  # JAC: Is this a dumb idea?   Should I just check it all?   Do I want
101  # this to fail later?   Can the version be used as a proxy check for this?
102
103
104_supported_hashalgorithms = ['md5', 'sha1', 'sha224', 'sha256', 'sha384',
105                             'sha512']
106
107_supported_hashencodings = ['hex','raw']
108
109def find_hash(contents, algorithm):
110  # Helper function for hashing...   
111
112  # first, if it's a noop, do nothing.   THIS IS FOR TESTING ONLY
113  if algorithm == 'noop':
114    return ''
115 
116  # accept things like: "sha1", "sha256-raw", etc.
117  # before the '-' is one of the types known to hashlib.   After is
118
119  hashencoding = 'hex'
120  if '-' in algorithm:
121    # yes, this will raise an exception in some cases...
122    hashalgorithmname, hashencoding = algorithm.split('-')
123
124  # check the args
125  if hashalgorithmname not in _supported_hashalgorithms:
126    raise TypeError("Do not understand hash algorithm: '"+algorithm+"'")
127
128  if hashencoding not in _supported_hashencodings:
129    raise TypeError("Do not understand hash algorithm: '"+algorithm+"'")
130
131 
132  hashobj = hashlib.new(hashalgorithmname)
133
134  hashobj.update(contents)
135
136  if hashencoding == 'raw':
137    return hashobj.digest()
138  elif hashencoding == 'hex':
139    return hashobj.hexdigest()
140  else:
141    raise Exception("Internal Error!   Unknown hashencoding '"+hashencoding+"'")
142 
143
144 
145def transmit_mirrorinfo(mirrorinfo, vendorlocation, defaultvendorport=62293):
146  """
147  <Purpose>
148    Sends our mirror information to a vendor.   
149
150  <Arguments>
151    vendorlocation: A string that contains the vendor location.   This can be
152                    of the form "IP:port", "hostname:port", "IP", or "hostname"
153
154    defaultvendorport: the port to use if the vendorlocation does not include
155                       one.
156   
157
158  <Exceptions>
159    TypeError if the args are the wrong types or malformed...
160
161    various socket errors if the connection fails.
162
163    ValueError if vendor does not accept the mirrorinfo
164
165  <Side Effects>
166    Contacts the vendor and retrieves data from it
167
168  <Returns>
169    None
170  """
171  if type(mirrorinfo) != dict:
172    raise TypeError("Mirror information must be a dictionary")
173
174  # do the actual communication...
175  answer = _remote_query_helper(vendorlocation, "MIRRORADVERTISE"+json.dumps(mirrorinfo), defaultvendorport)
176
177  if answer != "OK":
178    # JAC: I don't really like using ValueError.   I should define a new one
179    raise ValueError(answer)
180
181
182
183def retrieve_rawmanifest(vendorlocation, defaultvendorport=62293):
184  """
185  <Purpose>
186    Retrieves the manifest data from a vendor.   It does not parse this
187    data in any way.
188
189  <Arguments>
190    vendorlocation: A string that contains the vendor location.   This can be
191                    of the form "IP:port", "hostname:port", "IP", or "hostname"
192
193    defaultvendorport: the port to use if the vendorlocation does not include
194                       one.
195   
196
197  <Exceptions>
198    TypeError if the vendorlocation is the wrong type or malformed.
199
200    various socket errors if the connection fails.
201
202  <Side Effects>
203    Contacts the vendor and retrieves data from it
204
205  <Returns>
206    A string containing the manifest data (unprocessed).   It is a good idea
207    to use parse_manifest to ensure this data is correct.
208  """
209  return _remote_query_helper(vendorlocation, "GET MANIFEST", defaultvendorport)
210
211
212
213
214def retrieve_xorblock_from_mirror(mirrorip, mirrorport,bitstring):
215  """
216  <Purpose>
217    Retrieves a block from a mirror.   
218
219  <Arguments>
220    mirrorip: the mirror's IP address or hostname
221
222    mirrorport: the mirror's port number
223
224    bitstring: a bit string that contains an appropriately sized request that
225               specifies which blocks to combine.
226
227  <Exceptions>
228    TypeError if the arguments are the wrong types.  ValueError if the
229    bitstring is the wrong size
230
231    various socket errors if the connection fails.
232
233  <Side Effects>
234    Contacts the mirror and retrieves data from it
235
236  <Returns>
237    A string containing the manifest data (unprocessed).   It is a good idea
238    to use parse_manifest to ensure this data is correct.
239  """
240
241  response = _remote_query_helper(mirrorip, "XORBLOCK"+bitstring,mirrorport)
242  if response == 'Invalid request length':
243    raise ValueError(response)
244
245  return response
246
247
248
249
250
251def retrieve_mirrorinfolist(vendorlocation, defaultvendorport=62293):
252  """
253  <Purpose>
254    Retrieves the mirrorinfolist from a vendor. 
255
256  <Arguments>
257    vendorlocation: A string that contains the vendor location.   This can be
258                    of the form "IP:port", "hostname:port", "IP", or "hostname"
259   
260    defaultvendorport: the port to use if the vendorlocation does not include
261                       one.
262
263  <Exceptions>
264    TypeError if the vendorlocation is the wrong type or malformed.
265
266    various socket errors if the connection fails.
267
268    SessionEOF or ValueError may be raised if the other end is not speaking the
269    correct protocol
270
271  <Side Effects>
272    Contacts the vendor and retrieves data from it
273
274  <Returns>
275    A list of mirror information dictionaries.   
276  """
277  rawmirrordata = _remote_query_helper(vendorlocation, "GET MIRRORLIST", defaultvendorport)
278
279  mirrorinfolist = json.loads(rawmirrordata)
280
281  # the mirrorinfolist must be a list (duh)
282  if type(mirrorinfolist) != list:
283    raise TypeError("Malformed mirror list from vendor.   Is a "+str(type(mirrorinfolist))+" not a list")
284 
285  for mirrorlocation in mirrorinfolist:
286    # must be a string
287    if type(mirrorlocation) != dict:
288      raise TypeError("Malformed mirrorlocation from vendor.   Is a "+str(type(mirrorlocation))+" not a dict")
289
290  # everything checked out
291  return mirrorinfolist
292
293
294 
295
296
297
298def _remote_query_helper(serverlocation, command, defaultserverport):
299  # private function that contains the guts of server communication.   It
300  # issues a single query and then closes the connection.   This is used
301  # both to talk to the vendor and also to talk to mirrors
302  if type(serverlocation) != str and type(serverlocation) != unicode:
303    raise TypeError("Server location must be a string, not "+str(type(serverlocation)))
304
305  # now let's split it and ensure there are 0 or 1 colons
306  splitlocationlist = serverlocation.split(':')
307 
308  if len(splitlocationlist) >2:
309    raise TypeError("Server location may not contain more than one colon")
310
311
312  # now either set the port or use the default
313  if len(splitlocationlist) == 2:
314    serverport = int(splitlocationlist[1])
315  else:
316    serverport = defaultserverport
317
318  # check that this port is in the right range
319  if serverport <= 0 or serverport > 65535:
320    raise TypeError("Server location's port is not in the allowed range")
321
322  serverhostname = splitlocationlist[0]
323
324
325  # now we actually download the information...
326 
327  # first open the socket
328  serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
329  serversocket.connect((serverhostname, serverport))
330
331  # then issue the relevant command
332  session.sendmessage(serversocket, command)
333
334  # and return the answer
335  rawanswer = session.recvmessage(serversocket)
336
337  serversocket.close()
338
339  return rawanswer
340
341
342
343
344
345
346
347def parse_manifest(rawmanifestdata):
348  """
349  <Purpose>
350    Given raw manifest data, returns a dictionary containing a manifest
351    dictionary.
352
353  <Arguments>
354    rawmanifestdata: a string containing the raw manifest data as is produced
355                     by the json module.
356                     
357  <Exceptions>
358    TypeError or ValueError if the manifest data is corrupt
359
360  <Side Effects>
361    None
362
363  <Returns>
364    A dictionary containing the manifest.   
365  """
366
367  if type(rawmanifestdata) != str:
368    raise TypeError("Raw manifest data must be a string")
369
370  manifestdict = json.loads(rawmanifestdata)
371
372  _validate_manifest(manifestdict)
373 
374  return manifestdict
375
376
377
378
379
380def populate_xordatastore(manifestdict, xordatastore, rootdir="."):
381  """
382  <Purpose>
383    Adds the files listed in the manifestdict to the datastore
384
385  <Arguments>
386    manifestdict: a manifest dictionary.
387
388    xordatastore: the XOR datastore that we should populate.
389
390    rootdir: The location to look for the files mentioned in the manifest
391                     
392  <Exceptions>
393    TypeError if the manifest is corrupt or the rootdir is the wrong type.   
394   
395    FileNotFound if the rootdir does not contain a manifest file.
396
397    IncorrectFileContents if the file listed in the manifest file has the
398                          wrong size or hash
399
400  <Side Effects>
401    None
402
403  <Returns>
404    None
405  """
406
407  if type(manifestdict) != dict:
408    raise TypeError("Manifest dict must be a string")
409
410  if type(rootdir) != str and type(rootdir) != unicode:
411    raise TypeError("Mirror root must be a string")
412
413  _add_data_to_datastore(xordatastore,manifestdict['fileinfolist'],rootdir, manifestdict['hashalgorithm'])
414
415  hashlist = _compute_block_hashlist(xordatastore, manifestdict['blockcount'], manifestdict['blocksize'], manifestdict['hashalgorithm'])
416 
417  for blocknum in range(manifestdict['blockcount']):
418 
419    if hashlist[blocknum] != manifestdict['blockhashlist'][blocknum]:
420      raise TypeError("Despite matching file hashes, block '"+str(blocknum)+"' has an invalid hash.\nCorrupt manifest or dirty xordatastore")
421
422  # We're done!
423
424
425def _add_data_to_datastore(xordatastore, fileinfolist, rootdir, hashalgorithm):
426  # Private helper to populate the datastore
427
428  # go through the files one at a time and populate the xordatastore
429  for thisfiledict in fileinfolist:
430   
431    thisrelativefilename = thisfiledict['filename']
432    thisfilehash = thisfiledict['hash']
433    thisoffset = thisfiledict['offset']
434    thisfilelength = thisfiledict['length']
435
436   
437    thisfilename = os.path.join(rootdir, thisrelativefilename)
438
439    # read in the files and populate the xordatastore
440    if not os.path.exists(thisfilename):
441      raise FileNotFound("File '"+thisrelativefilename+"' listed in manifest cannot be found in manifest root: '"+rootdir+"'.")
442
443    # can't go above the root!
444    # JAC: I would use relpath, but it's 2.6 and on
445    if not os.path.normpath(os.path.abspath(thisfilename)).startswith(os.path.abspath(rootdir)):
446      raise TypeError("File in manifest cannot go back from the root dir!!!")
447
448    # get the relevant data
449    thisfilecontents = open(thisfilename).read()
450   
451    # let's see if this has the right size
452    if len(thisfilecontents) != thisfilelength:
453      raise IncorrectFileContents("File '"+thisrelativefilename+"' has the wrong size")
454   
455    # let's see if this has the right hash
456    if thisfilehash != find_hash(thisfilecontents, hashalgorithm):
457      raise IncorrectFileContents("File '"+thisrelativefilename+"' has the wrong hash")
458
459
460    # and add it to the datastore
461    xordatastore.set_data(thisoffset, thisfilecontents)
462     
463
464
465def _compute_block_hashlist(xordatastore, blockcount, blocksize, hashalgorithm):
466  # private helper, used both the compute and check hashes
467
468  currenthashlist = []
469 
470  # Now I'll check the blocks have the right hash...
471  for blocknum in range(blockcount):
472    # read the block ...
473    thisblock = xordatastore.get_data(blocksize*blocknum, blocksize)
474   
475    # ... and check its hash
476    currenthashlist.append(find_hash(thisblock, hashalgorithm))
477   
478  return currenthashlist
479   
480
481
482
483
484def nogaps_offset_assignment_function(fileinfolist, rootdir, blocksize):
485  """
486  <Purpose>
487    Specifies how to map a set of files into offsets in an xordatastore.
488    This simple function just adds them linearly.
489
490  <Arguments>
491    fileinfolist: a list of dictionaries with file information
492
493    rootdir: the root directory where the files live
494
495    block_size: The size of a block of data.   
496
497  <Exceptions>
498    TypeError, IndexError, or KeyError if the arguements are incorrect
499   
500  <Side Effects>
501    Modifies the fileinfolist to add offset elements to each dict
502
503  <Returns>
504    None
505  """
506
507  # Note, this algorithm doesn't use the blocksize.   Most of algorithms will.
508  # We also don't use the rootdir.   I think this is typical
509
510  currentoffset = 0
511
512  for thisfileinfo in fileinfolist:
513    thisfileinfo['offset'] = currentoffset
514    currentoffset = currentoffset + thisfileinfo['length']
515
516
517
518
519def _find_blockloc_from_offset(offset, sizeofblocks):
520  # Private helper function that translates an offset into (block, offset)
521  assert(offset >=0)
522
523  return (offset / sizeofblocks, offset % sizeofblocks)
524
525
526
527def extract_file_from_blockdict(filename, manifestdict, blockdict):
528  """
529  <Purpose>
530    Reconstitutes a file from a block dict
531
532  <Arguments>
533    filename: the file within the release we are asking about
534
535    manifestdict: the manifest for the release
536
537    blockdict: a dictionary of blocknum -> blockcontents
538
539  <Exceptions>
540    TypeError, IndexError, or KeyError if the args are incorrect
541   
542  <Side Effects>
543    None
544
545  <Returns>
546    A string containing the file contents
547  """
548 
549  blocksize = manifestdict['blocksize']
550
551  for fileinfo in manifestdict['fileinfolist']:
552    if filename == fileinfo['filename']:
553
554      offset = fileinfo['offset']
555      quantity = fileinfo['length']
556
557      # Let's get the block information
558      (startblock,startoffset) = _find_blockloc_from_offset(offset, blocksize)
559      (endblock, endoffset) = _find_blockloc_from_offset(offset+quantity, blocksize)
560
561      # Case 1: this does not cross blocks
562      if startblock == endblock:
563        return blockdict[startblock][startoffset:endoffset]
564
565      # Case 2: this crosses blocks
566
567      # we'll build up the string starting with the first block...
568      currentstring = blockdict[startblock][startoffset:]
569
570      # now add in the 'middle' blocks.   This is all of the blocks
571      # after the start and before the end
572      for currentblock in range(startblock+1, endblock):
573        currentstring += blockdict[currentblock]
574
575      # this check is needed because we might be past the last block.
576      if endoffset > 0:
577        # finally, add the end block.
578        currentstring += blockdict[endblock][:endoffset]
579
580      # and return the result
581      return currentstring
582
583     
584
585 
586
587
588
589
590
591
592def get_blocklist_for_file(filename, manifestdict):
593  """
594  <Purpose>
595    Get the list of blocks needed to reconstruct a file
596
597  <Arguments>
598    filename: the file within the release we are asking about
599
600    manifestdict: the manifest for the release
601
602  <Exceptions>
603    TypeError, IndexError, or KeyError if the manifestdict / filename are
604    corrupt
605   
606  <Side Effects>
607    None
608
609  <Returns>
610    A list of blocks numbers
611  """
612 
613  for fileinfo in manifestdict['fileinfolist']:
614    if filename == fileinfo['filename']:
615      # it's the starting offset / blocksize until the
616      # ending offset -1 divided by the blocksize
617      # I do + 1 because range will otherwise omit the last block
618      return range(fileinfo['offset'] / manifestdict['blocksize'], (fileinfo['offset'] + fileinfo['length'] - 1) / manifestdict['blocksize'] + 1)
619
620  raise TypeError("File is not in manifest")
621 
622
623
624
625
626
627
628def get_filenames_in_release(manifestdict):
629  """
630  <Purpose>
631    Get the list of files in a manifest
632
633  <Arguments>
634    manifestdict: the manifest for the release
635
636  <Exceptions>
637    TypeError, IndexError, or KeyError if the manifestdict is corrupt
638   
639  <Side Effects>
640    None
641
642  <Returns>
643    A list of file names
644  """
645 
646  filenamelist = []
647
648  for fileinfo in manifestdict['fileinfolist']:
649    filenamelist.append(fileinfo['filename'])
650 
651  return filenamelist
652
653
654
655
656def _generate_fileinfolist(startdirectory, hashalgorithm="sha1-base64"):
657  # private helper.   Generates a list of file information dictionaries for
658  # all files under startdirectory.
659
660  fileinfo_list = []
661
662  # let's walk through the directories and add the files + sizes
663  for parentdir, junkchilddirectories, filelist in os.walk(startdirectory):
664    for filename in filelist:
665      thisfiledict = {}
666      # we want the relative name in the manifest, not the actual path / name
667      thisfiledict['filename'] = filename
668      fullfilename = os.path.join(parentdir, filename)
669
670      thisfiledict['length'] = os.path.getsize(fullfilename)
671
672      # get the hash
673      filecontents = open(fullfilename).read()
674      thisfiledict['hash'] = find_hash(filecontents, hashalgorithm)
675
676      fileinfo_list.append(thisfiledict)
677     
678
679  return fileinfo_list
680 
681
682def compute_bitstring_length(num_blocks):
683  # quick function to compute bitstring length
684  return int(math.ceil(num_blocks/8.0))
685
686def set_bitstring_bit(bitstring, bitnum,valuetoset):
687  # quick function to set a bit in a bitstring...
688  bytepos = bitnum / 8
689  bitpos = 7-(bitnum % 8)
690 
691  bytevalue = ord(bitstring[bytepos])
692  # if setting to 1...
693  if valuetoset:
694    if bytevalue & (2**bitpos):
695      # nothing to do, it's set.
696      return bitstring
697    else:
698      return bitstring[:bytepos]+ chr(bytevalue + (2**bitpos)) +bitstring[bytepos+1:]
699
700  else: # I'm setting it to 0...
701   
702    if bytevalue & (2**bitpos):
703      return bitstring[:bytepos]+ chr(bytevalue - (2**bitpos)) +bitstring[bytepos+1:]
704    else:
705      # nothing to do, it's not set.
706      return bitstring
707   
708
709def get_bitstring_bit(bitstring, bitnum):
710  # returns a bit...
711  bytepos = bitnum / 8
712  bitpos = 7-(bitnum % 8)
713
714  # we want to return 0 or 1.   I'll AND 2^bitpos and then divide by it
715  return (ord(bitstring[bytepos]) & (2**bitpos)) / (2**bitpos)
716
717
718def flip_bitstring_bit(bitstring, bitnum):
719  # reverses the setting of a bit
720  targetbit = get_bitstring_bit(bitstring, bitnum)
721
722  # 0 -> 1, 1 -> 0
723  targetbit = 1-targetbit   
724 
725  return set_bitstring_bit(bitstring, bitnum, targetbit)
726
727
728
729def create_manifest(rootdir=".", hashalgorithm="sha1-base64", block_size=1024*1024, offset_assignment_function=nogaps_offset_assignment_function, vendorhostname=None, vendorport=62293):
730  """
731  <Purpose>
732    Create a manifest  (and an xordatastore ?)
733
734  <Arguments>
735    rootdir: The area to walk looking for files to add to the manifest
736
737    hashalgorithm: The hash algorithm to use to validate file contents
738                     
739    block_size: The size of a block of data.   
740
741    offset_assignment_function: specifies how to lay out the files in blocks.
742
743  <Exceptions>
744    TypeError if the arguments are corrupt or of the wrong type
745   
746    FileNotFound if the rootdir does not contain a manifest file.
747
748    IncorrectFileContents if the file listed in the manifest file has the
749                          wrong size or hash
750
751  <Side Effects>
752    This function creates an XORdatastore while processing.   This may use
753    a very large amount of memory.   (This is not fundamental, and is done only
754    for convenience).
755
756  <Returns>
757    The manifest dictionary
758  """
759
760  if vendorhostname == None:
761    raise TypeError("Must specify vendor server name")
762
763  if ':' in vendorhostname:
764    raise TypeError("Vendor server name must not contain ':'")
765
766  # general workflow:
767  #   set the global parameters
768  #   build an xordatastore and add file information as you go...
769  #   derive hash information from the xordatastore
770
771  manifestdict = {}
772
773  manifestdict['manifestversion'] = "1.0"
774  manifestdict['hashalgorithm'] = hashalgorithm
775  manifestdict['blocksize'] = block_size
776  manifestdict['vendorhostname'] = vendorhostname
777  manifestdict['vendorport'] = vendorport
778
779
780  # first get the file information
781  fileinfolist = _generate_fileinfolist(rootdir, manifestdict['hashalgorithm'])
782
783  # now let's assign the files to offsets as the caller requess...
784  offset_assignment_function(fileinfolist, rootdir, manifestdict['blocksize'])
785
786  # let's ensure the offsets are valid...
787
788  # build a list of tuples with offset, etc. info...
789  offsetlengthtuplelist = []
790  for fileinfo in fileinfolist:
791    offsetlengthtuplelist.append((fileinfo['offset'], fileinfo['length']))
792
793  # ...sort the tuples so that it's easy to walk down them and check for
794  # overlapping entries...
795  offsetlengthtuplelist.sort()
796
797  # ...now, we need to ensure the values don't overlap.
798  nextfreeoffset = 0
799  for offset, length in offsetlengthtuplelist:
800    if offset < 0:
801      raise TypeError("Offset generation led to negative offset!")
802    if length < 0:
803      raise TypeError("File lengths must be positive!")
804
805    if nextfreeoffset > offset:
806      raise TypeError("Error! Offset generation led to overlapping files!")
807
808    # since this list is sorted by offset, this should ensure the property we
809    # want is upheld.
810    nextfreeoffset = offset + length
811
812  # great!   The fileinfolist is okay!
813  manifestdict['fileinfolist'] = fileinfolist
814
815
816  # The nextfreeoffset value is the end of the datastore...   Let's see how
817  # many blocks we need
818  manifestdict['blockcount'] = int(math.ceil(nextfreeoffset * 1.0 / manifestdict['blocksize']))
819
820
821  # TODO: Improve this.  It really shouldn't use a datastore...
822  import fastsimplexordatastore
823
824  xordatastore = fastsimplexordatastore.XORDatastore(manifestdict['blocksize'], manifestdict['blockcount'])
825
826  # now let's put the files in the datastore
827  _add_data_to_datastore(xordatastore, manifestdict['fileinfolist'], rootdir, manifestdict['hashalgorithm'])
828   
829  # and it is time to get the blockhashlist...
830  manifestdict['blockhashlist'] = _compute_block_hashlist(xordatastore, manifestdict['blockcount'], manifestdict['blocksize'], manifestdict['hashalgorithm'])
831
832  # let's generate the manifest's hash
833  rawmanifest = json.dumps(manifestdict)
834  manifestdict['manifesthash'] = find_hash(rawmanifest, manifestdict['hashalgorithm'])
835
836  # we are done!
837  return manifestdict
Note: See TracBrowser for help on using the repository browser.