Check for and fix copyrights.csv rows with wrong numbers of fields

This should prevent accidental uses of commas in fields without quotes.
It would have caught the previous issue of str.join() not quoting fields
that contain commas.  For now though, it found a different issue: three
rows added in commit c631345314 had duplicated MD5 fields.
This commit is contained in:
P. J. McDermott 2024-02-05 22:08:55 -05:00 committed by Steve Cotton
parent 5a048ad303
commit 1f1a68e94e
2 changed files with 19 additions and 5 deletions

View file

@ -3066,9 +3066,9 @@ Date,File,License,Author - Real Name(other name);Real Name(other name);etc,Notes
2015/02/20,data/core/images/help/l10n/es/hpxp.png,GNU GPL v2+,unknown,,,e8dbef425934eff5c9f734aabe960214 2015/02/20,data/core/images/help/l10n/es/hpxp.png,GNU GPL v2+,unknown,,,e8dbef425934eff5c9f734aabe960214
2015/02/20,data/core/images/help/l10n/es/recruit.png,GNU GPL v2+,unknown,,,d7f9b5862bc9465c4e1bc2513a00263f 2015/02/20,data/core/images/help/l10n/es/recruit.png,GNU GPL v2+,unknown,,,d7f9b5862bc9465c4e1bc2513a00263f
2015/02/20,data/core/images/help/l10n/es/tooltip.png,GNU GPL v2+,unknown,,,f3721f44c30dfdb6f9c6086be30835c5 2015/02/20,data/core/images/help/l10n/es/tooltip.png,GNU GPL v2+,unknown,,,f3721f44c30dfdb6f9c6086be30835c5
2023/12/16,data/core/images/help/l10n/fi/hpxp.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,bab0586e45fcad729ee1b2b76cb5177c,bab0586e45fcad729ee1b2b76cb5177c 2023/12/16,data/core/images/help/l10n/fi/hpxp.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,bab0586e45fcad729ee1b2b76cb5177c
2023/12/16,data/core/images/help/l10n/fi/recruit.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,1b4e518b8a1829ecbd867ab71e6c54fb,1b4e518b8a1829ecbd867ab71e6c54fb 2023/12/16,data/core/images/help/l10n/fi/recruit.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,1b4e518b8a1829ecbd867ab71e6c54fb
2023/12/16,data/core/images/help/l10n/fi/tooltip.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,5c7299de7c5f8cb200a921b521f2cc1b,5c7299de7c5f8cb200a921b521f2cc1b 2023/12/16,data/core/images/help/l10n/fi/tooltip.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,5c7299de7c5f8cb200a921b521f2cc1b
2015/02/20,data/core/images/help/l10n/fr/hpxp.png,GNU GPL v2+,unknown,,,3a123d59308bbedb3d6ee617ff77f8d1 2015/02/20,data/core/images/help/l10n/fr/hpxp.png,GNU GPL v2+,unknown,,,3a123d59308bbedb3d6ee617ff77f8d1
2015/02/20,data/core/images/help/l10n/fr/recruit.png,GNU GPL v2+,unknown,,,137b03b4cd2569e00d9f7e70d87d6be5 2015/02/20,data/core/images/help/l10n/fr/recruit.png,GNU GPL v2+,unknown,,,137b03b4cd2569e00d9f7e70d87d6be5
2015/02/20,data/core/images/help/l10n/fr/tooltip.png,GNU GPL v2+,unknown,,,0dd5ff8f1536ce9dee90c6a7f26a0862 2015/02/20,data/core/images/help/l10n/fr/tooltip.png,GNU GPL v2+,unknown,,,0dd5ff8f1536ce9dee90c6a7f26a0862

Can't render this file because it is too large.

View file

@ -66,7 +66,13 @@ with contextlib.suppress(FileNotFoundError):
os.remove(options.output) os.remove(options.output)
csv_data = {} csv_data = {}
# Too few fields
missing_fields = []
# Too many fields, possibly due to an unquoted comma
extra_fields = []
# New images
added = [] added = []
# Changed images
changed = [] changed = []
# Already mentioned in the CSV file, but lacking something in either the license or author fields # Already mentioned in the CSV file, but lacking something in either the license or author fields
incomplete = [] incomplete = []
@ -106,6 +112,10 @@ for root, _, files in os.walk(options.repo):
if not file in csv_data: if not file in csv_data:
added.append(["", file, "", "", "", do_git(file), hash]) added.append(["", file, "", "", "", do_git(file), hash])
elif len(csv_data[file]) < 7:
missing_fields.append(csv_data[file])
elif len(csv_data[file]) > 7:
extra_fields.append(csv_data[file])
elif csv_data[file][5] != "": elif csv_data[file][5] != "":
update.append(csv_data[file]) update.append(csv_data[file])
elif csv_data[file][6] != hash: elif csv_data[file][6] != hash:
@ -120,13 +130,15 @@ for root, _, files in os.walk(options.repo):
else: else:
unchanged.append(csv_data[file]) unchanged.append(csv_data[file])
missing_fields.sort(key=itemgetter(1))
extra_fields.sort(key=itemgetter(1))
added.sort(key=itemgetter(1)) added.sort(key=itemgetter(1))
changed.sort(key=itemgetter(1)) changed.sort(key=itemgetter(1))
incomplete.sort(key=itemgetter(1)) incomplete.sort(key=itemgetter(1))
update.sort(key=itemgetter(1)) update.sort(key=itemgetter(1))
unchanged.sort(key=itemgetter(1)) unchanged.sort(key=itemgetter(1))
final_output = added + changed + incomplete + update + unchanged final_output = missing_fields + extra_fields + added + changed + incomplete + update + unchanged
if options.output != "": if options.output != "":
with open(options.output, 'w') as f: with open(options.output, 'w') as f:
@ -142,8 +154,10 @@ if len(removed) > 0:
any_check_failed = True any_check_failed = True
print("There are "+str(len(removed))+" removed images") print("There are "+str(len(removed))+" removed images")
if len(added) > 0 or len(changed) > 0 or len(incomplete) > 0 or len(update) > 0: if len(missing_fields) > 0 or len(extra_fields) > 0 or len(added) > 0 or len(changed) > 0 or len(incomplete) > 0 or len(update) > 0:
any_check_failed = True any_check_failed = True
print("There are "+str(len(missing_fields))+" rows with too few fields")
print("There are "+str(len(extra_fields))+" rows with too many fields, possibly due to an unquoted comma")
print("There are "+str(len(added))+" new images") print("There are "+str(len(added))+" new images")
print("There are "+str(len(changed))+" changed images") print("There are "+str(len(changed))+" changed images")
print("There are "+str(len(incomplete))+" images that lack license or author information") print("There are "+str(len(incomplete))+" images that lack license or author information")