@@ -11,17 +11,21 @@ class CacheBlock:
1111 data : bytes
1212
1313class URLCache :
14- CACHESIZE = 64
14+ CACHESIZE = 128
1515 BLOCKSIZE = 1 * 1024 * 1024
16- READAHEAD = 20
1716 TIMEOUT = 30
17+ MIN_READAHEAD = 8
18+ MAX_READAHEAD = 64
19+ SPINNER = "/-\\ |"
1820
1921 def __init__ (self , url ):
2022 self .url = url
2123 self .size = self .get_size ()
2224 self .p = 0
2325 self .cache = {}
2426 self .blocks_read = 0
27+ self .readahead = self .MAX_READAHEAD
28+ self .spin = 0
2529
2630 def seekable (self ):
2731 return True
@@ -37,23 +41,26 @@ def get_partial(self, off, size):
3741 req .add_header ("Range" , f"bytes={ off } -{ off + size - 1 } " )
3842 fd = request .urlopen (req , timeout = self .TIMEOUT )
3943
40- if size <= self . BLOCKSIZE :
41- sys . stdout . write ( "." )
42- else :
43- sys .stdout .write ("+ " )
44+ d = fd . read ()
45+
46+ self . spin = ( self . spin + 1 ) % len ( self . SPINNER )
47+ sys .stdout .write (f" \r { self . SPINNER [ self . spin ] } " )
4448 sys .stdout .flush ()
4549 self .blocks_read += 1
4650
47- return fd . read ()
51+ return d
4852
49- def get_block (self , blk ):
53+ def get_block (self , blk , readahead = 1 ):
5054 if blk in self .cache :
5155 return self .cache [blk ]
5256
5357 off = blk * self .BLOCKSIZE
5458 size = self .BLOCKSIZE
5559
56- for i in range (self .READAHEAD ):
60+ blocks = max (self .MIN_READAHEAD ,
61+ min (readahead , self .readahead )) - 1
62+
63+ for i in range (blocks ):
5764 if blk + i in self .cache :
5865 break
5966 size += self .BLOCKSIZE
@@ -71,6 +78,9 @@ def get_block(self, blk):
7178 p_warning (f"Error downloading data ({ e } ), retrying... ({ retry + 1 } /{ retries } )" )
7279 time .sleep (sleep )
7380 sleep += 1
81+ # Retry in smaller chunks
82+ self .readahead = self .MIN_READAHEAD
83+ size = min (size , self .readahead * self .BLOCKSIZE )
7484 else :
7585 break
7686
@@ -102,9 +112,14 @@ def read(self, count=None):
102112 blk_start = self .p // self .BLOCKSIZE
103113 blk_end = (self .p + count - 1 ) // self .BLOCKSIZE
104114
115+ blocks = blk_end - blk_start + 1
116+
105117 d = []
106118 for blk in range (blk_start , blk_end + 1 ):
107- d .append (self .get_block (blk ).data )
119+ readahead = blk_end - blk + 1
120+ d .append (self .get_block (blk , readahead ).data )
121+ prog = (blk - blk_start + 1 ) / blocks * 100
122+ self .blocks_read += 1
108123
109124 trim = self .p - (blk_start * self .BLOCKSIZE )
110125 d [0 ] = d [0 ][trim :]
@@ -117,8 +132,10 @@ def read(self, count=None):
117132 def flush_progress (self ):
118133 if self .blocks_read > 0 :
119134 sys .stdout .write ("\n " )
120- self .blocks_read = 0
121-
135+ self .blocks_read = 0
136+ return True
137+ else :
138+ return False
122139
123140if __name__ == "__main__" :
124141 import sys , zipfile
0 commit comments