Skip to content

Commit 6e04afd

Browse files
committed
urlcache: Redo readahead logic, add spinner
Now avoids reading ahead uselessly, uses a larger blocksize by default, and shows a spinner. Signed-off-by: Hector Martin <[email protected]>
1 parent e59c111 commit 6e04afd

1 file changed

Lines changed: 29 additions & 12 deletions

File tree

src/urlcache.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,21 @@ class CacheBlock:
1111
data: bytes
1212

1313
class URLCache:
14-
CACHESIZE = 64
14+
CACHESIZE = 128
1515
BLOCKSIZE = 1 * 1024 * 1024
16-
READAHEAD = 20
1716
TIMEOUT = 30
17+
MIN_READAHEAD = 8
18+
MAX_READAHEAD = 64
19+
SPINNER = "/-\\|"
1820

1921
def __init__(self, url):
2022
self.url = url
2123
self.size = self.get_size()
2224
self.p = 0
2325
self.cache = {}
2426
self.blocks_read = 0
27+
self.readahead = self.MAX_READAHEAD
28+
self.spin = 0
2529

2630
def seekable(self):
2731
return True
@@ -37,23 +41,26 @@ def get_partial(self, off, size):
3741
req.add_header("Range", f"bytes={off}-{off+size-1}")
3842
fd = request.urlopen(req, timeout=self.TIMEOUT)
3943

40-
if size <= self.BLOCKSIZE:
41-
sys.stdout.write(".")
42-
else:
43-
sys.stdout.write("+")
44+
d = fd.read()
45+
46+
self.spin = (self.spin + 1) % len(self.SPINNER)
47+
sys.stdout.write(f"\r{self.SPINNER[self.spin]} ")
4448
sys.stdout.flush()
4549
self.blocks_read += 1
4650

47-
return fd.read()
51+
return d
4852

49-
def get_block(self, blk):
53+
def get_block(self, blk, readahead=1):
5054
if blk in self.cache:
5155
return self.cache[blk]
5256

5357
off = blk * self.BLOCKSIZE
5458
size = self.BLOCKSIZE
5559

56-
for i in range(self.READAHEAD):
60+
blocks = max(self.MIN_READAHEAD,
61+
min(readahead, self.readahead)) - 1
62+
63+
for i in range(blocks):
5764
if blk + i in self.cache:
5865
break
5966
size += self.BLOCKSIZE
@@ -71,6 +78,9 @@ def get_block(self, blk):
7178
p_warning(f"Error downloading data ({e}), retrying... ({retry + 1}/{retries})")
7279
time.sleep(sleep)
7380
sleep += 1
81+
# Retry in smaller chunks
82+
self.readahead = self.MIN_READAHEAD
83+
size = min(size, self.readahead * self.BLOCKSIZE)
7484
else:
7585
break
7686

@@ -102,9 +112,14 @@ def read(self, count=None):
102112
blk_start = self.p // self.BLOCKSIZE
103113
blk_end = (self.p + count - 1) // self.BLOCKSIZE
104114

115+
blocks = blk_end - blk_start + 1
116+
105117
d = []
106118
for blk in range(blk_start, blk_end + 1):
107-
d.append(self.get_block(blk).data)
119+
readahead = blk_end - blk + 1
120+
d.append(self.get_block(blk, readahead).data)
121+
prog = (blk - blk_start + 1) / blocks * 100
122+
self.blocks_read += 1
108123

109124
trim = self.p - (blk_start * self.BLOCKSIZE)
110125
d[0] = d[0][trim:]
@@ -117,8 +132,10 @@ def read(self, count=None):
117132
def flush_progress(self):
118133
if self.blocks_read > 0:
119134
sys.stdout.write("\n")
120-
self.blocks_read = 0
121-
135+
self.blocks_read = 0
136+
return True
137+
else:
138+
return False
122139

123140
if __name__ == "__main__":
124141
import sys, zipfile

0 commit comments

Comments
 (0)