Check for and fix copyrights.csv rows with wrong numbers of fields

This should prevent accidental uses of commas in fields without quotes. It would have caught the previous issue of str.join() not quoting fields that contain commas. For now though, it found a different issue: three rows added in commit c631345314 had duplicated MD5 fields.
2024-02-05 22:08:55 -05:00 · 2024-02-05 22:08:55 -05:00 · 1f1a68e94e
commit 1f1a68e94e
parent 5a048ad303
2 changed files with 19 additions and 5 deletions
--- a/copyrights.csv
+++ b/copyrights.csv
@ -3066,9 +3066,9 @@ Date,File,License,Author - Real Name(other name);Real Name(other name);etc,Notes
 2015/02/20,data/core/images/help/l10n/es/hpxp.png,GNU GPL v2+,unknown,,,e8dbef425934eff5c9f734aabe960214
 2015/02/20,data/core/images/help/l10n/es/recruit.png,GNU GPL v2+,unknown,,,d7f9b5862bc9465c4e1bc2513a00263f
 2015/02/20,data/core/images/help/l10n/es/tooltip.png,GNU GPL v2+,unknown,,,f3721f44c30dfdb6f9c6086be30835c5
-2023/12/16,data/core/images/help/l10n/fi/hpxp.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,bab0586e45fcad729ee1b2b76cb5177c,bab0586e45fcad729ee1b2b76cb5177c
+2023/12/16,data/core/images/help/l10n/fi/hpxp.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,bab0586e45fcad729ee1b2b76cb5177c
-2023/12/16,data/core/images/help/l10n/fi/recruit.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,1b4e518b8a1829ecbd867ab71e6c54fb,1b4e518b8a1829ecbd867ab71e6c54fb
+2023/12/16,data/core/images/help/l10n/fi/recruit.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,1b4e518b8a1829ecbd867ab71e6c54fb
-2023/12/16,data/core/images/help/l10n/fi/tooltip.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,5c7299de7c5f8cb200a921b521f2cc1b,5c7299de7c5f8cb200a921b521f2cc1b
+2023/12/16,data/core/images/help/l10n/fi/tooltip.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,5c7299de7c5f8cb200a921b521f2cc1b
 2015/02/20,data/core/images/help/l10n/fr/hpxp.png,GNU GPL v2+,unknown,,,3a123d59308bbedb3d6ee617ff77f8d1
 2015/02/20,data/core/images/help/l10n/fr/recruit.png,GNU GPL v2+,unknown,,,137b03b4cd2569e00d9f7e70d87d6be5
 2015/02/20,data/core/images/help/l10n/fr/tooltip.png,GNU GPL v2+,unknown,,,0dd5ff8f1536ce9dee90c6a7f26a0862
--- a/18
+++ b/18
@ -66,7 +66,13 @@ with contextlib.suppress(FileNotFoundError):
    os.remove(options.output)
 csv_data = {}
 # Too few fields
 missing_fields = []
 # Too many fields, possibly due to an unquoted comma
 extra_fields = []
 # New images
 added = []
 # Changed images
 changed = []
 # Already mentioned in the CSV file, but lacking something in either the license or author fields
 incomplete = []
@ -106,6 +112,10 @@ for root, _, files in os.walk(options.repo):
            if not file in csv_data:
                added.append(["", file, "", "", "", do_git(file), hash])
            elif len(csv_data[file]) < 7:
                missing_fields.append(csv_data[file])
            elif len(csv_data[file]) > 7:
                extra_fields.append(csv_data[file])
            elif csv_data[file][5] != "":
                update.append(csv_data[file])
            elif csv_data[file][6] != hash:
@ -120,13 +130,15 @@ for root, _, files in os.walk(options.repo):
            else:
                unchanged.append(csv_data[file])
 missing_fields.sort(key=itemgetter(1))
 extra_fields.sort(key=itemgetter(1))
 added.sort(key=itemgetter(1))
 changed.sort(key=itemgetter(1))
 incomplete.sort(key=itemgetter(1))
 update.sort(key=itemgetter(1))
 unchanged.sort(key=itemgetter(1))
-final_output = added + changed + incomplete + update + unchanged
+final_output = missing_fields + extra_fields + added + changed + incomplete + update + unchanged
 if options.output != "":
    with open(options.output, 'w') as f:
@ -142,8 +154,10 @@ if len(removed) > 0:
    any_check_failed = True
    print("There are "+str(len(removed))+" removed images")
-if len(added) > 0 or len(changed) > 0 or len(incomplete) > 0 or len(update) > 0:
+if len(missing_fields) > 0 or len(extra_fields) > 0 or len(added) > 0 or len(changed) > 0 or len(incomplete) > 0 or len(update) > 0:
    any_check_failed = True
    print("There are "+str(len(missing_fields))+" rows with too few fields")
    print("There are "+str(len(extra_fields))+" rows with too many fields, possibly due to an unquoted comma")
    print("There are "+str(len(added))+" new images")
    print("There are "+str(len(changed))+" changed images")
    print("There are "+str(len(incomplete))+" images that lack license or author information")