You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							106 lines
						
					
					
						
							2.9 KiB
						
					
					
				
			
		
		
	
	
							106 lines
						
					
					
						
							2.9 KiB
						
					
					
				| import os
 | |
| import sys
 | |
| import time
 | |
| import tempfile
 | |
| import threading
 | |
| import urllib.parse
 | |
| import pycurl
 | |
| import hashlib
 | |
| from io import BytesIO
 | |
| from tenacity import retry, wait_random_exponential, stop_after_attempt
 | |
| from common.file_helpers import mkdirs_exists_ok, atomic_write_in_dir
 | |
| 
 | |
| class URLFile(object):
 | |
|   _tlocal = threading.local()
 | |
| 
 | |
|   def __init__(self, url, debug=False):
 | |
|     self._url = url
 | |
|     self._pos = 0
 | |
|     self._local_file = None
 | |
|     self._debug = debug
 | |
| 
 | |
|     try:
 | |
|       self._curl = self._tlocal.curl
 | |
|     except AttributeError:
 | |
|       self._curl = self._tlocal.curl = pycurl.Curl()
 | |
| 
 | |
|   def __enter__(self):
 | |
|     return self
 | |
| 
 | |
|   def __exit__(self, type, value, traceback):
 | |
|     if self._local_file is not None:
 | |
|       os.remove(self._local_file.name)
 | |
|       self._local_file.close()
 | |
|       self._local_file = None
 | |
| 
 | |
|   @retry(wait=wait_random_exponential(multiplier=1, max=5), stop=stop_after_attempt(3), reraise=True)
 | |
|   def read(self, ll=None):
 | |
|     if ll is None:
 | |
|       trange = 'bytes=%d-' % self._pos
 | |
|     else:
 | |
|       trange = 'bytes=%d-%d' % (self._pos, self._pos+ll-1)
 | |
| 
 | |
|     dats = BytesIO()
 | |
|     c = self._curl
 | |
|     c.setopt(pycurl.URL, self._url)
 | |
|     c.setopt(pycurl.WRITEDATA, dats)
 | |
|     c.setopt(pycurl.NOSIGNAL, 1)
 | |
|     c.setopt(pycurl.TIMEOUT_MS, 500000)
 | |
|     c.setopt(pycurl.HTTPHEADER, ["Range: " + trange, "Connection: keep-alive"])
 | |
|     c.setopt(pycurl.FOLLOWLOCATION, True)
 | |
| 
 | |
|     if self._debug:
 | |
|       print("downloading", self._url)
 | |
|       def header(x):
 | |
|         if b'MISS' in x:
 | |
|           print(x.strip())
 | |
|       c.setopt(pycurl.HEADERFUNCTION, header)
 | |
|       def test(debug_type, debug_msg):
 | |
|        print("  debug(%d): %s" % (debug_type, debug_msg.strip()))
 | |
|       c.setopt(pycurl.VERBOSE, 1)
 | |
|       c.setopt(pycurl.DEBUGFUNCTION, test)
 | |
|       t1 = time.time()
 | |
| 
 | |
|     c.perform()
 | |
| 
 | |
|     if self._debug:
 | |
|       t2 = time.time()
 | |
|       if t2-t1 > 0.1:
 | |
|         print("get %s %r %.f slow" % (self._url, trange, t2-t1))
 | |
| 
 | |
|     response_code = c.getinfo(pycurl.RESPONSE_CODE)
 | |
|     if response_code == 416: #  Requested Range Not Satisfiable
 | |
|       return ""
 | |
|     if response_code != 206 and response_code != 200:
 | |
|       raise Exception("Error {}: {}".format(response_code, repr(dats.getvalue())[:500]))
 | |
| 
 | |
|     ret = dats.getvalue()
 | |
|     self._pos += len(ret)
 | |
|     return ret
 | |
| 
 | |
|   def seek(self, pos):
 | |
|     self._pos = pos
 | |
| 
 | |
|   @property
 | |
|   def name(self):
 | |
|     """Returns a local path to file with the URLFile's contents.
 | |
| 
 | |
|        This can be used to interface with modules that require local files.
 | |
|     """
 | |
|     if self._local_file is None:
 | |
|       _, ext = os.path.splitext(urllib.parse.urlparse(self._url).path)
 | |
|       local_fd, local_path = tempfile.mkstemp(suffix=ext)
 | |
|       try:
 | |
|         os.write(local_fd, self.read())
 | |
|         local_file = open(local_path, "rb")
 | |
|       except:
 | |
|         os.remove(local_path)
 | |
|         raise
 | |
|       finally:
 | |
|         os.close(local_fd)
 | |
| 
 | |
|       self._local_file = local_file
 | |
|       self.read = self._local_file.read
 | |
|       self.seek = self._local_file.seek
 | |
| 
 | |
|     return self._local_file.name
 | |
| 
 |