You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							106 lines
						
					
					
						
							2.9 KiB
						
					
					
				
			
		
		
	
	
							106 lines
						
					
					
						
							2.9 KiB
						
					
					
				import os
 | 
						|
import sys
 | 
						|
import time
 | 
						|
import tempfile
 | 
						|
import threading
 | 
						|
import urllib.parse
 | 
						|
import pycurl
 | 
						|
import hashlib
 | 
						|
from io import BytesIO
 | 
						|
from tenacity import retry, wait_random_exponential, stop_after_attempt
 | 
						|
from common.file_helpers import mkdirs_exists_ok, atomic_write_in_dir
 | 
						|
 | 
						|
class URLFile(object):
 | 
						|
  _tlocal = threading.local()
 | 
						|
 | 
						|
  def __init__(self, url, debug=False):
 | 
						|
    self._url = url
 | 
						|
    self._pos = 0
 | 
						|
    self._local_file = None
 | 
						|
    self._debug = debug
 | 
						|
 | 
						|
    try:
 | 
						|
      self._curl = self._tlocal.curl
 | 
						|
    except AttributeError:
 | 
						|
      self._curl = self._tlocal.curl = pycurl.Curl()
 | 
						|
 | 
						|
  def __enter__(self):
 | 
						|
    return self
 | 
						|
 | 
						|
  def __exit__(self, type, value, traceback):
 | 
						|
    if self._local_file is not None:
 | 
						|
      os.remove(self._local_file.name)
 | 
						|
      self._local_file.close()
 | 
						|
      self._local_file = None
 | 
						|
 | 
						|
  @retry(wait=wait_random_exponential(multiplier=1, max=5), stop=stop_after_attempt(3), reraise=True)
 | 
						|
  def read(self, ll=None):
 | 
						|
    if ll is None:
 | 
						|
      trange = 'bytes=%d-' % self._pos
 | 
						|
    else:
 | 
						|
      trange = 'bytes=%d-%d' % (self._pos, self._pos+ll-1)
 | 
						|
 | 
						|
    dats = BytesIO()
 | 
						|
    c = self._curl
 | 
						|
    c.setopt(pycurl.URL, self._url)
 | 
						|
    c.setopt(pycurl.WRITEDATA, dats)
 | 
						|
    c.setopt(pycurl.NOSIGNAL, 1)
 | 
						|
    c.setopt(pycurl.TIMEOUT_MS, 500000)
 | 
						|
    c.setopt(pycurl.HTTPHEADER, ["Range: " + trange, "Connection: keep-alive"])
 | 
						|
    c.setopt(pycurl.FOLLOWLOCATION, True)
 | 
						|
 | 
						|
    if self._debug:
 | 
						|
      print("downloading", self._url)
 | 
						|
      def header(x):
 | 
						|
        if b'MISS' in x:
 | 
						|
          print(x.strip())
 | 
						|
      c.setopt(pycurl.HEADERFUNCTION, header)
 | 
						|
      def test(debug_type, debug_msg):
 | 
						|
       print("  debug(%d): %s" % (debug_type, debug_msg.strip()))
 | 
						|
      c.setopt(pycurl.VERBOSE, 1)
 | 
						|
      c.setopt(pycurl.DEBUGFUNCTION, test)
 | 
						|
      t1 = time.time()
 | 
						|
 | 
						|
    c.perform()
 | 
						|
 | 
						|
    if self._debug:
 | 
						|
      t2 = time.time()
 | 
						|
      if t2-t1 > 0.1:
 | 
						|
        print("get %s %r %.f slow" % (self._url, trange, t2-t1))
 | 
						|
 | 
						|
    response_code = c.getinfo(pycurl.RESPONSE_CODE)
 | 
						|
    if response_code == 416: #  Requested Range Not Satisfiable
 | 
						|
      return ""
 | 
						|
    if response_code != 206 and response_code != 200:
 | 
						|
      raise Exception("Error {}: {}".format(response_code, repr(dats.getvalue())[:500]))
 | 
						|
 | 
						|
    ret = dats.getvalue()
 | 
						|
    self._pos += len(ret)
 | 
						|
    return ret
 | 
						|
 | 
						|
  def seek(self, pos):
 | 
						|
    self._pos = pos
 | 
						|
 | 
						|
  @property
 | 
						|
  def name(self):
 | 
						|
    """Returns a local path to file with the URLFile's contents.
 | 
						|
 | 
						|
       This can be used to interface with modules that require local files.
 | 
						|
    """
 | 
						|
    if self._local_file is None:
 | 
						|
      _, ext = os.path.splitext(urllib.parse.urlparse(self._url).path)
 | 
						|
      local_fd, local_path = tempfile.mkstemp(suffix=ext)
 | 
						|
      try:
 | 
						|
        os.write(local_fd, self.read())
 | 
						|
        local_file = open(local_path, "rb")
 | 
						|
      except:
 | 
						|
        os.remove(local_path)
 | 
						|
        raise
 | 
						|
      finally:
 | 
						|
        os.close(local_fd)
 | 
						|
 | 
						|
      self._local_file = local_file
 | 
						|
      self.read = self._local_file.read
 | 
						|
      self.seek = self._local_file.seek
 | 
						|
 | 
						|
    return self._local_file.name
 | 
						|
 |