-
Notifications
You must be signed in to change notification settings - Fork 6
/
prepare.py
75 lines (62 loc) · 1.93 KB
/
prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
Author: Tong Yu
Copyright (c) University of Strasbourg. All Rights Reserved.
'''
import json
from tqdm import tqdm
import argparse
import requests
import hashlib
import tarfile
import os
URL = "https://s3.unistra.fr/camma_public/datasets/cholec80/cholec80.tar.gz"
CHUNK_SIZE = 2 ** 20
parser = argparse.ArgumentParser()
parser.add_argument("--data_rootdir")
parser.add_argument("--verify_checksum", action="store_true")
parser.add_argument("--keep_archive", action="store_true")
args = parser.parse_args()
outfile = os.path.join(args.data_rootdir, "cholec80.tar.gz")
outdir = os.path.join(args.data_rootdir, "cholec80")
# Download
print("Downloading archive to {}".format(outfile))
with requests.get(URL, stream=True) as r:
r.raise_for_status()
total_length = int(float(r.headers.get("content-length")) / 10 ** 6)
progress_bar = tqdm(unit="MB", total=total_length)
with open(outfile, "wb") as f:
for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
progress_bar.update(len(chunk) / 10 ** 6)
f.write(chunk)
# Optional checksum verification
if args.verify_checksum:
print("Verifying checksum")
m = hashlib.md5()
with open(outfile, 'rb') as f:
while True:
data = f.read(CHUNK_SIZE)
if not data:
break
m.update(data)
chk = m.hexdigest()
with open("checksum.txt") as f:
true_chk = f.read()
print("Checksum: {}".format(chk))
assert(m.hexdigest() == chk)
# Extraction
print("Extracting files to {}".format(outdir))
with tarfile.open(outfile, "r") as t:
t.extractall(outdir)
# Cleanup
if not args.keep_archive:
os.remove(outfile)
# Config setup
with open("tf_cholec80/configs/config.json", "r") as f:
config = json.loads(f.read())
config["cholec80_dir"] = outdir
json_string = json.dumps(config, indent=2, sort_keys=True)
with open("tf_cholec80/configs/config.json", "w") as f:
f.write(json_string)
print("All done - config saved to {}".format(
os.path.join(os.getcwd(), "config.json"))
)