# pylint: skip-file
import os
import time
import tempfile
import threading
import urllib . parse
import pycurl
from io import BytesIO
from tenacity import retry , wait_random_exponential , stop_after_attempt
class URLFile ( object ) :
_tlocal = threading . local ( )
def __init__ ( self , url , debug = False ) :
self . _url = url
self . _pos = 0
self . _local_file = None
self . _debug = debug
try :
self . _curl = self . _tlocal . curl
except AttributeError :
self . _curl = self . _tlocal . curl = pycurl . Curl ( )
def __enter__ ( self ) :
return self
def __exit__ ( self , type , value , traceback ) :
if self . _local_file is not None :
os . remove ( self . _local_file . name )
self . _local_file . close ( )
self . _local_file = None
@retry ( wait = wait_random_exponential ( multiplier = 1 , max = 5 ) , stop = stop_after_attempt ( 3 ) , reraise = True )
def read ( self , ll = None ) :
if ll is None :
trange = ' bytes= %d - ' % self . _pos
else :
trange = ' bytes= %d - %d ' % ( self . _pos , self . _pos + ll - 1 )
dats = BytesIO ( )
c = self . _curl
c . setopt ( pycurl . URL , self . _url )
c . setopt ( pycurl . WRITEDATA , dats )
c . setopt ( pycurl . NOSIGNAL , 1 )
c . setopt ( pycurl . TIMEOUT_MS , 500000 )
c . setopt ( pycurl . HTTPHEADER , [ " Range: " + trange , " Connection: keep-alive " ] )
c . setopt ( pycurl . FOLLOWLOCATION , True )
if self . _debug :
print ( " downloading " , self . _url )
def header ( x ) :
if b ' MISS ' in x :
print ( x . strip ( ) )
c . setopt ( pycurl . HEADERFUNCTION , header )
def test ( debug_type , debug_msg ) :
print ( " debug( %d ): %s " % ( debug_type , debug_msg . strip ( ) ) )
c . setopt ( pycurl . VERBOSE , 1 )
c . setopt ( pycurl . DEBUGFUNCTION , test )
t1 = time . time ( )
c . perform ( )
if self . _debug :
t2 = time . time ( )
if t2 - t1 > 0.1 :
print ( " get %s %r % .f slow " % ( self . _url , trange , t2 - t1 ) )
response_code = c . getinfo ( pycurl . RESPONSE_CODE )
if response_code == 416 : # Requested Range Not Satisfiable
return " "
if response_code != 206 and response_code != 200 :
raise Exception ( " Error {} ( {} ): {} " . format ( response_code , self . _url , repr ( dats . getvalue ( ) ) [ : 500 ] ) )
ret = dats . getvalue ( )
self . _pos + = len ( ret )
return ret
def seek ( self , pos ) :
self . _pos = pos
@property
def name ( self ) :
""" Returns a local path to file with the URLFile ' s contents.
This can be used to interface with modules that require local files .
"""
if self . _local_file is None :
_ , ext = os . path . splitext ( urllib . parse . urlparse ( self . _url ) . path )
local_fd , local_path = tempfile . mkstemp ( suffix = ext )
try :
os . write ( local_fd , self . read ( ) )
local_file = open ( local_path , " rb " )
except :
os . remove ( local_path )
raise
finally :
os . close ( local_fd )
self . _local_file = local_file
self . read = self . _local_file . read
self . seek = self . _local_file . seek
return self . _local_file . name