source: code/uppir_client.py @ 133

Last change on this file since 133 was 23, checked in by trishank, 7 years ago

Add pre-release upPIR from June 2011.

File size: 10.1 KB
Line 
1"""
2<Author>
3  Justin Cappos
4  (inspired from a previous version by Geremy Condra)
5
6<Start Date>
7  May 15th, 2011
8
9<Description>
10  Client code for retrieving upPIR files.   This program uses a manifest
11  to communicate with a vendor and retrieve a list of mirrors.   The client
12  then _privately_ downloads the appropriate files from mirrors in the mirror
13  list.  None of the mirrors can tell what file or files were downloaded.
14
15  For more technical explanation, please see the upPIR papers on my website.
16 
17
18<Usage>
19  $ python uppir_client.py file1 [file2 ...]
20 
21  This will anonymously download file1, file2, ... from the vendor specified.
22
23
24<Options>
25  See below
26 
27
28"""
29
30
31# This file is laid out in two main parts.   First, there are some helper
32# functions to do moderately complex things like retrieving a block from a
33# mirror or split a file into blocks.   The second part contains the option
34# parsing and main.   To get an overall feel for the code, it is recommended
35# to follow the execution from main on.
36#
37# EXTENSION POINTS:
38#
39# Making the client extensible is a major problem.   In particular, we will
40# need to modify mirror selection, block selection, malicious mirror detection,
41# and avoiding slow nodes simultaneously.   To do this effectively, we need
42# some sort of mechanism that gives the programmer control over how to handle
43# these.
44#
45# The XORRequestor interface is used to address these issues.   A programmer
46# The programmer defines an object that is provided the manifest,
47# mirrorlist, and blocks to retrieve.   The XORRequestor object must support
48# several methods: get_next_xorrequest(), notify_failure(xorrequest),
49# notify_success(xorrequest, xordata), and return_block(blocknum).   The
50# request_blocks_from_mirrors function in this file will use threads to call
51# these methods to determine what to retrieve.   The notify_* routines are
52# used to inform the XORRequestor object of prior results so that it can
53# decide how to issue future block requests.   This separates out the 'what'
54# from the 'how' but has a slight loss of control.  Note that the block
55# reconstruction, etc. is done here to allow easy extensibility of malicious
56# mirror detection / vendor notification.
57#
58#
59# The manifest file could also be extended to support huge files (those that
60# span multiple releases).   The client would need to download files from
61# multiple releases and then stitch them back together.   This would require
62# minor changes (or possibly could be done using this code as a black box).
63#
64
65
66import sys
67
68import optparse
69
70
71# helper functions that are shared
72import uppirlib
73
74
75# used to issue requests in parallel
76import threading
77
78
79# I really should have a way to do this based upon command line options
80import simplexorrequestor
81
82
83# for basename
84import os.path
85
86
87
88def _request_helper(rxgobj):
89  # Private helper to get requests.   Multiple threads will execute this...
90
91  thisrequest = rxgobj.get_next_xorrequest()
92 
93  # go until there are no more requests
94  while thisrequest != ():
95    mirrorip = thisrequest[0]['ip']
96    mirrorport = thisrequest[0]['port']
97    bitstring = thisrequest[2]
98    try:
99      # request the XOR block...
100      xorblock = uppirlib.retrieve_xorblock_from_mirror(mirrorip, mirrorport, bitstring)
101
102    except Exception, e:
103      if 'socked' in str(e):
104        rxgobj.notify_failure(thisrequest)
105        sys.stdout.write('F')
106        sys.stdout.flush()
107      else:
108        # otherwise, re-raise...
109        raise 
110
111    else:
112      # we retrieved it successfully...
113      rxgobj.notify_success(thisrequest, xorblock)
114      sys.stdout.write('.')
115      sys.stdout.flush()
116   
117    # regardless of failure or success, get another request...
118    thisrequest = rxgobj.get_next_xorrequest()
119
120  # and that's it!
121  return
122
123
124def request_blocks_from_mirrors(requestedblocklist, manifestdict):
125  """
126  <Purpose>
127    Retrieves blocks from mirrors
128
129  <Arguments>
130    requestedblocklist: the blocks to acquire
131
132    manifestdict: the manifest with information about the release
133 
134  <Side Effects>
135    Contacts mirrors to retrieve blocks.    It uses some global options
136
137  <Exceptions>
138    TypeError may be raised if the provided lists are invalid.   
139    socket errors may be raised if communications fail.
140
141  <Returns>
142    A dict mapping blocknumber -> blockcontents.
143  """
144
145  # let's get the list of mirrors...
146  mirrorinfolist = uppirlib.retrieve_mirrorinfolist(manifestdict['vendorhostname'], manifestdict['vendorport'])
147  print "Mirrors: ",mirrorinfolist
148
149
150  # let's set up a requestor object...
151  rxgobj = simplexorrequestor.RandomXORRequestor(mirrorinfolist, requestedblocklist, manifestdict, _commandlineoptions.numberofmirrors)
152
153  # let's fire up the requested number of threads.   Our thread will also
154  # participate
155  # (-1 because of us!)
156  for threadnum in range(_commandlineoptions.numberofthreads - 1):
157    threading.Thread(target=_request_helper, args=[rxgobj]).start()
158
159  _request_helper(rxgobj)
160  print
161
162  # okay, now we have them all...   Let's get the returned dict ready...
163  retdict = {}
164  for blocknum in requestedblocklist:
165    retdict[blocknum] = rxgobj.return_block(blocknum)
166
167  return retdict
168 
169
170 
171
172
173
174
175def request_files_from_mirrors(requestedfilelist, manifestdict):
176  """
177  <Purpose>
178    Reconstitutes files by privately contacting mirrors
179
180  <Arguments>
181    requestedfilelist: the files to acquire
182
183    manifestdict: the manifest with information about the release
184 
185  <Side Effects>
186    Contacts mirrors to retrieve files.   They are written to disk
187
188  <Exceptions>
189    TypeError may be raised if the provided lists are invalid.   
190    socket errors may be raised if communications fail.
191
192  <Returns>
193    None
194  """
195 
196  neededblocks = []
197  # let's figure out what blocks we need
198  for filename in requestedfilelist:
199    theseblocks = uppirlib.get_blocklist_for_file(filename, manifestdict)
200    print filename, theseblocks
201
202    # add the blocks we don't already know we need to request
203    for blocknum in theseblocks:
204      if blocknum not in neededblocks:
205        neededblocks.append(blocknum)
206   
207
208  # do the actual retrieval work
209  blockdict = request_blocks_from_mirrors(neededblocks, manifestdict)
210
211  # now we should write out the files
212  for filename in requestedfilelist:
213    filedata = uppirlib.extract_file_from_blockdict(filename, manifestdict, blockdict) 
214
215    # let's check the hash
216    thisfilehash = uppirlib.find_hash(filedata, manifestdict['hashalgorithm'])
217
218    for fileinfo in manifestdict['fileinfolist']:
219      # find this entry
220      if fileinfo['filename'] == filename:
221        if thisfilehash == fileinfo['hash']:
222          # we found it and it checks out!
223          break
224        else:
225          raise Exception("Corrupt manifest has incorrect file hash despite passing block hash checks")
226    else:
227      raise Exception("Internal Error: Cannot locate fileinfo in manifest")
228
229
230    # open the filename w/o the dir and write it
231    filenamewithoutpath = os.path.basename(filename)
232    open(filenamewithoutpath,"w").write(filedata)
233    print "wrote",filenamewithoutpath
234
235
236
237
238########################### Option parsing and main ###########################
239_commandlineoptions = None
240
241def parse_options():
242  """
243  <Purpose>
244    Parses command line arguments.
245
246  <Arguments>
247    None
248 
249  <Side Effects>
250    All relevant data is added to _commandlineoptions
251
252  <Exceptions>
253    These are handled by optparse internally.   I believe it will print / exit
254    itself without raising exceptions further.   I do print an error and
255    exit if there are extra args...
256
257  <Returns>
258    The list of files to retreive
259  """
260  global _commandlineoptions
261
262  # should be true unless we're initing twice...
263  assert(_commandlineoptions==None)
264
265  parser = optparse.OptionParser()
266
267  parser.add_option("","--retrievemanifestfrom", dest="retrievemanifestfrom",
268        type="string", metavar="vendorIP:port", default="",
269        help="Specifies the vendor to retrieve the manifest from (default None).")
270
271  parser.add_option("","--manifestfile", dest="manifestfilename",
272        type="string", default="manifest.dat",
273        help="The manifest file to use (default manifest.dat).")
274
275  parser.add_option("-n","--numberofmirrors", dest="numberofmirrors",
276        type="int", default=3,
277        help="How many mirrors should need to collude to break privacy? (default 3)")
278
279  parser.add_option("","--numberofthreads", dest="numberofthreads",
280        type="int", default=None,
281        help="How many threads should concurrently contact mirrors? (default numberofmirrors)")
282
283
284
285  # let's parse the args
286  (_commandlineoptions, remainingargs) = parser.parse_args()
287
288  if _commandlineoptions.numberofmirrors < 1:
289    print "Mirrors to contact must be positive"
290    sys.exit(1)
291
292  if _commandlineoptions.numberofthreads == None:
293    _commandlineoptions.numberofthreads = _commandlineoptions.numberofmirrors
294
295  if _commandlineoptions.numberofthreads < 1:
296    print "Number of threads must be positive"
297    sys.exit(1)
298
299
300  if len(remainingargs) == 0:
301    print "Must specify some files to retrieve!"
302    sys.exit(1)
303
304  _commandlineoptions.filestoretrieve = remainingargs
305
306
307
308
309
310
311def main():
312
313 
314  # If we were asked to retrieve the mainfest file, do so...
315  if _commandlineoptions.retrievemanifestfrom:
316    # We need to download this file...
317    rawmanifestdata = uppirlib.retrieve_rawmanifest(_commandlineoptions.retrievemanifestfrom)
318
319    # ...make sure it is valid...
320    manifestdict = uppirlib.parse_manifest(rawmanifestdata)
321   
322    # ...and write it out if it's okay
323    open(_commandlineoptions.manifestfilename, "w").write(rawmanifestdata)
324
325
326  else:
327    # Simply read it in from disk
328
329    rawmanifestdata = open(_commandlineoptions.manifestfilename).read()
330
331    manifestdict = uppirlib.parse_manifest(rawmanifestdata)
332 
333
334  # we will check that the files are in the release
335
336  # find the list of files
337  manifestfilelist = uppirlib.get_filenames_in_release(manifestdict)
338
339  print manifestfilelist
340  # ensure the requested files are in there...
341  for filename in _commandlineoptions.filestoretrieve:
342
343    if filename not in manifestfilelist:
344      print "File:",filename,"is not listed in the manifest."
345      sys.exit(2)
346   
347
348
349 
350  request_files_from_mirrors(_commandlineoptions.filestoretrieve, manifestdict)
351
352
353
354if __name__ == '__main__':
355  parse_options()
356  main()
357
Note: See TracBrowser for help on using the repository browser.