forked from wesnoth/wesnoth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate_copyrights
executable file
·173 lines (150 loc) · 6.25 KB
/
update_copyrights
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python3
# encoding: utf-8
##
# This script checks all sound/music files in the repository for whether they've been modified or added without updating the file tracking sound or music copyright
# This check was used in continuous integration for image files as well prior to September 2024.
##
import argparse
import contextlib
import csv
import hashlib
from operator import itemgetter
import os
from pathlib import Path
from subprocess import check_output
import sys
##
# csv file layout:
# [0] = current git commit date
# [1] = file path, relative to the repository root
# [2] = license name(s)
# [3] = authorship information
# [4] = notes
# [5] = new git commit date, if different from the value in [0]
# [6] = current md5 hash
##
##
# Add new licenses to this list:
# Avoid things like "GNU GPL v2+;CC BY-SA 4.0", unless you mean to dual license
# under either GNU GPL v2+ or CC BY-SA 4.0. GNU GPL v2+ and CC BY-SA 4.0 (e.g.
# a GNU GPL v2+ file with CC BY-SA 4.0 modifications) isn't legally possible.
##
known_licenses = (
"CC BY-SA 4.0",
"CC0",
"GNU GPL v2+",
)
def do_git(file):
return str(check_output(["git", "log", "-1", "--format=%ad", "--date=format:%Y/%m/%d", file]), 'UTF-8').rstrip('\n')
def do_hash(file):
md5 = hashlib.md5()
with open(file, 'rb') as f:
while True:
data = f.read(65536)
if not data:
break
md5.update(data)
return str(md5.hexdigest())
##
# program logic start
##
args = argparse.ArgumentParser()
args.add_argument("--repo", default=".", help="The directory of the Wesnoth repository to run this script against.")
args.add_argument("--output", default="copyrights.csv", help="The file to write the results of this script to.")
args.add_argument("--input", default="copyrights.csv", help="The file to read the existing copyright data from.")
options = args.parse_args()
os.chdir(options.repo)
csv_data = {}
# Too few fields
missing_fields = []
# Too many fields, possibly due to an unquoted comma
extra_fields = []
# New images
added = []
# Changed images
changed = []
# Already mentioned in the CSV file, but lacking something in either the license or author fields
incomplete = []
# Already mentioned in the CSV file, but have something in the needs update field
update = []
unchanged = []
removed = []
# Sanity-check for known licenses
unknown_licenses = []
with open(options.input, encoding="utf-8") as csvfile:
reader = csv.reader(csvfile)
previous_file = ""
for row in reader:
if row[0] == "Date":
continue
file = row[1]
previous_file = file
if not os.path.exists(file):
removed.append(file)
continue
csv_data[file] = row
with contextlib.suppress(FileNotFoundError):
os.remove(options.output)
for root, _, files in os.walk(options.repo):
for filename in files:
filetype = Path(filename).suffix
if filetype == ".wav" or filetype == ".ogg":
file_path = os.path.normpath(os.path.join(root, filename))
if os.path.sep != '/':
# Always use slashes for the file path irrespective of OS used to run the update
file_path = file_path.replace(os.path.sep, '/')
file_hash = do_hash(file_path)
if not file_path in csv_data:
added.append(["", file_path, "", "", "", do_git(file_path), file_hash])
elif len(csv_data[file_path]) < 7:
missing_fields.append(csv_data[file_path])
elif len(csv_data[file_path]) > 7:
extra_fields.append(csv_data[file_path])
elif csv_data[file_path][5] != "":
update.append(csv_data[file_path])
elif csv_data[file_path][6] != file_hash:
csv_data[file_path][5] = do_git(file_path)
csv_data[file_path][6] = file_hash
changed.append(csv_data[file_path])
elif csv_data[file_path][2].strip() == "" or csv_data[file_path][3].strip() == "":
incomplete.append(csv_data[file_path])
elif not csv_data[file_path][2] in known_licenses:
unknown_licenses.append(csv_data[file_path][2])
incomplete.append(csv_data[file_path])
else:
unchanged.append(csv_data[file_path])
final_output = missing_fields + extra_fields + added + changed + incomplete + update + unchanged
final_output.sort(key=itemgetter(1))
if options.output != "":
with open(options.output, 'w', encoding="utf-8") as f:
writer = csv.writer(f, lineterminator="\n")
writer.writerow(["Date", "File", "License", "Author - Real Name(other name);Real Name(other name);etc", "Notes", "Needs Update", "MD5"])
writer.writerows(final_output)
else:
writer = csv.writer(sys.stdout, lineterminator="\n")
writer.writerows(final_output)
any_check_failed = False
if len(removed) > 0:
any_check_failed = True
print("There are "+str(len(removed))+" removed files")
print(",".join(removed))
count_missing_fields = len(missing_fields)
count_extra_fields = len(extra_fields)
count_added = len(added)
count_changed = len(changed)
count_incomplete = len(incomplete)
count_update = len(update)
if count_missing_fields > 0 or count_extra_fields > 0 or count_added > 0 or count_changed > 0 or count_incomplete > 0 or count_update > 0:
any_check_failed = True
print("\nThere are "+str(count_missing_fields)+" rows with too few fields:\n"+"\n".join(",".join(a) for a in missing_fields))
print("\nThere are "+str(count_extra_fields)+" rows with too many fields, possibly due to an unquoted comma:\n"+"\n".join(",".join(a) for a in extra_fields))
print("\nThere are "+str(count_added)+" new files:\n"+"\n".join(a[1] for a in added))
print("\nThere are "+str(count_changed)+" changed files:\n"+"\n".join(a[1] for a in changed))
print("\nThere are "+str(count_incomplete)+" files that lack license or author information:\n"+"\n".join(a[1] for a in incomplete))
print("\nThere are "+str(count_update)+" files that need updated information:\n"+"\n".join(a[1] for a in update))
if len(unknown_licenses) > 0:
any_check_failed = True
print("Unknown licenses:")
print(" " + "\n ".join(unknown_licenses))
if any_check_failed:
sys.exit(1)