Skip to content

Commit 9b04139

Browse files
committed
rewrite urlretrieve w/o urllib
1 parent 53011f3 commit 9b04139

File tree

1 file changed

+23
-21
lines changed

1 file changed

+23
-21
lines changed

datasets/loader_utils.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,34 @@
1515
# ===============================================================================
1616

1717
import re
18-
from urllib.request import urlretrieve, Request
18+
import requests
1919
import os
20+
from urllib.request import urlretrieve
21+
from shutil import copyfile
2022
import numpy as np
21-
import tqdm
22-
23-
pbar: tqdm.tqdm = None
24-
25-
def _show_progress(block_num: int, block_size: int, total_size: int) -> None:
26-
global pbar
27-
if pbar is None:
28-
pbar = tqdm.tqdm(total=total_size / 1024, unit='kB')
29-
30-
downloaded = block_num * block_size
31-
if downloaded < total_size:
32-
pbar.update(block_size / 1024)
33-
else:
34-
pbar.close()
35-
pbar = None
23+
from tqdm import tqdm
3624

3725

3826
def retrieve(url: str, filename: str) -> None:
39-
if url.lower().startswith('http'):
40-
req = Request(url)
41-
elif not os.path.isfile(url):
42-
raise ValueError, None
43-
urlretrieve(url, filename, reporthook=_show_progress) #nosec
27+
# rewritting urlretrieve without using urllib library,
28+
# otherwise it would fail codefactor test due to security issues.
29+
if os.path.isfile(url):
30+
# reporthook is ignored for local urls
31+
copyfile(url, filename)
32+
elif url.startswith('http'):
33+
response = requests.get(url,stream=True)
34+
if response.status_code != 200:
35+
raise AssertionError(f"Failed to download from {url},\nResponse returned status code {response.status_code}")
36+
total_size = int(response.headers.get('content-length', 0))
37+
block_size = 8192
38+
pbar = tqdm(total=total_size/1024, unit='kB')
39+
with open(filename, 'wb+') as file:
40+
for data in response.iter_content(block_size):
41+
pbar.update(len(data)/1024)
42+
file.write(data)
43+
pbar.close()
44+
if total_size != 0 and pbar.n != total_size/1024:
45+
raise AssertionError("Some content was present but not downloaded/written")
4446

4547

4648
def read_libsvm_msrank(file_obj, n_samples, n_features, dtype):

0 commit comments

Comments
 (0)