-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathparse_dupes.py
executable file
·38 lines (30 loc) · 1001 Bytes
/
parse_dupes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Parse the result of the unix fdupes program
fdupes finds duplicated files in a given set of directories.
Usage:
%program <input_file> <min_bytes> <output_file>"""
import sys
import re
try:
in_file = open(sys.argv[1]) # Input fdupes file
min_bytes = int(sys.argv[2]) # Input minimum number of bytes to use
out_file = sys.argv[3] # Output edited dfupes file
except:
print __doc__
sys.exit(0)
lines = [l.strip() for l in in_file.readlines()]
new_file = False
with open(out_file, "w") as f:
for line in lines:
if line == "":
if new_file == True:
f.write("\n")
new_file = False
elif new_file == True:
f.write(line + "\n")
elif new_file == False and len(re.findall("[0-9]+\ bytes", line)) > 0:
bytes = int(line.split()[0])
if bytes >= min_bytes:
f.write(line + "\n")
new_file = True