Update i18n scripts to run on Python3

The `pology` library runs on Python2 and development appears stalled. It is also not available on pip. The `babel` library, BSD-licensed, provides, amongst many other things, a replacement for .POT / .PO manipulation. The `poediff` tool that we used to detect spurious i18n change is replaced with a Python script that does a simpler but good enough job (it is also much, much faster). These replacements let the i18n scripts run on Python3 entirely. Makes D506 redundant. Comments by: Itms Reviewed By: Gallaecio Refs #5694 Differential Revision: https://code.wildfiregames.com/D2757 This was SVN commit r24313.
2020-12-02 10:05:27 +00:00
parent 783e77cc8d
commit 1b150b303f
15 changed files with 475 additions and 240 deletions
@@ -0,0 +1,11 @@
+# i18n helper
+
+This is a collection of scripts to automate 0 A.D.'s i18n process.
+
+See `maintenanceTasks.sh` for the full process.
+
+### Run tests
+```sh
+pip3 install pytest
+python3 -m pytest
+```
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2020 Wildfire Games.
+# This file is part of 0 A.D.
+#
+# 0 A.D. is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# 0 A.D. is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
+
+import io
+import os
+import subprocess
+from typing import List
+
+from i18n_helper import projectRootDirectory
+
+def get_diff():
+    """Return a diff using svn diff"""
+    os.chdir(projectRootDirectory)
+
+    diff_process = subprocess.run(["svn", "diff", "binaries"], capture_output=True)
+    if diff_process.returncode != 0:
+        print(f"Error running svn diff: {diff_process.stderr.decode()}. Exiting.")
+        return
+    return io.StringIO(diff_process.stdout.decode())
+
+def check_diff(diff : io.StringIO, verbose = False) -> List[str]:
+    """Run through a diff of .po files and check that some of the changes
+    are real translations changes and not just noise (line changes....).
+    The algorithm isn't extremely clever, but it is quite fast."""
+
+    keep = set()
+    files = set()
+
+    curfile = None
+    l = diff.readline()
+    while l:
+        if l.startswith("Index: binaries"):
+            if not l.endswith(".pot\n") and not l.endswith(".po\n"):
+                curfile = None
+            else:
+                curfile = l[7:-1]
+                files.add(curfile)
+            # skip patch header
+            diff.readline()
+            diff.readline()
+            diff.readline()
+            diff.readline()
+            l = diff.readline()
+            continue
+        if l[0] != '-' and l[0] != '+':
+            l = diff.readline()
+            continue
+        if l[1] == '\n' or l[1] == '#':
+            l = diff.readline()
+            continue
+        if "POT-Creation-Date:" in l or "PO-Revision-Date" in l or "Last-Translator" in l:
+            l = diff.readline()
+            continue
+        # We've hit a real line
+        if curfile:
+            keep.add(curfile)
+            curfile = None
+        l = diff.readline()
+
+    return list(files.difference(keep))
+
+
+def revert_files(files: List[str], verbose = False):
+    revert_process = subprocess.run(["svn", "revert"] + files, capture_output=True)
+    if revert_process.returncode != 0:
+        print(f"Warning: Some files could not be reverted. Error: {revert_process.stderr.decode()}")
+    if verbose:
+        for file in files:
+            print(f"Reverted {file}")
+
+
+def add_untracked(verbose = False):
+    """Add untracked .po files to svn"""
+    diff_process = subprocess.run(["svn", "st", "binaries"], capture_output=True)
+    if diff_process.stderr != b'':
+        print(f"Error running svn st: {diff_process.stderr.decode('utf-8')}. Exiting.")
+        return
+
+    for line in diff_process.stdout.decode('utf-8').split('\n'):
+        if not line.startswith("?"):
+            continue
+        # Ignore non PO files. This is important so that the translator credits
+        # correctly be updated, note however the script assumes a pristine SVN otherwise.
+        if not line.endswith(".po") and not line.endswith(".pot"):
+            continue
+        file = line[1:].strip()
+        add_process = subprocess.run(["svn", "add", file, "--parents"], capture_output=True)
+        if add_process.stderr != b'':
+            print(f"Warning: file {file} could not be added.")
+        if verbose:
+            print(f"Added {file}")
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--verbose", help="Print reverted files.", action='store_true')
+    args = parser.parse_args()
+    need_revert = check_diff(get_diff(), args.verbose)
+    revert_files(need_revert, args.verbose)
+    add_untracked(args.verbose)
@@ -1,7 +1,6 @@
-#!/usr/bin/env python2
-# -*- coding:utf-8 -*-
+#!/usr/bin/env python3
 #
-# Copyright (C) 2014 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
 # This file is part of 0 A.D.
 #
 # 0 A.D. is free software: you can redistribute it and/or modify
@@ -17,84 +16,75 @@
 # You should have received a copy of the GNU General Public License
 # along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.

-from __future__ import absolute_import, division, print_function, unicode_literals
+import os, re, sys
+import multiprocessing

-import codecs, os, re, sys
+from i18n_helper import l10nToolsDirectory, projectRootDirectory
+from i18n_helper.catalog import Catalog
+from i18n_helper.globber import getCatalogs

-from pology.catalog import Catalog
-from pology.message import Message
-
-
-l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
-projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir))
 l10nFolderName = "l10n"

-
 def checkTranslationsForSpam(inputFilePath):
-
-    print(u"Checking", inputFilePath)
-    templateCatalog = Catalog(inputFilePath)
+    print(f"Checking {inputFilePath}")
+    templateCatalog = Catalog.readFrom()

    # If language codes were specified on the command line, filter by those.
    filters = sys.argv[1:]

    # Load existing translation catalogs.
-    existingTranslationCatalogs = []
-    l10nFolderPath = os.path.dirname(inputFilePath)
+    existingTranslationCatalogs = getCatalogs(inputFilePath, filters)

-    # .pot is one letter longer than .po, but the dot that separates the locale
-    # code from the rest of the filename in .po files makes up for that.
-    charactersToSkip = len(os.path.basename(inputFilePath))
+    urlPattern = re.compile(r"https?://(?:[a-z0-9-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE)

-    for filename in os.listdir(l10nFolderPath):
-        if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
-            if not filters or filename[:-charactersToSkip] in filters:
-                if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]:
-                    existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)])
+    # Check that there are no spam URLs.
+    # Loop through all messages in the .POT catalog for URLs.
+    # For each, check for the corresponding key in the .PO catalogs.
+    # If found, check that URLS in the .PO keys are the same as those in the .POT key.
+    for templateMessage in templateCatalog:
+        templateUrls = set(urlPattern.findall(
+            templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id
+        ))
+        # As a sanity check, verify that the template message is coherent
+        if templateMessage.pluralizable:
+            pluralUrls = set(urlPattern.findall(templateMessage.id[1]))
+            if pluralUrls.difference(templateUrls):
+                print(f"{inputFilePath} - Different URLs in singular and plural source strings "
+                      f"for '{templateMessage}' in '{inputFilePath}'")

-    urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
-
-    # Check the URLs in translations against the URLs in the translation template.
-    for languageCode, pofile in existingTranslationCatalogs:
-        translationCatalog = Catalog(pofile)
-        for templateMessage in templateCatalog:
-            translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid)
-            if translationMessage:
-                templateSingularString = templateMessage.msgid
-                templateUrls = urlPattern.findall(templateMessage.msgid)
-                # Assert that the same URL is used in both the plural and singular forms.
-                if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1:
-                    pluralUrls = urlPattern.findall(templateMessage.msgstr[0])
-                    for url in pluralUrls:
-                        if url not in templateUrls:
-                            print(u"Different URLs in singular and plural source strings for ‘{}’ in ‘{}’".format(
-                                templateMessage.msgid,
-                                inputFilePath))
-                for translationString in translationMessage[0].msgstr:
-                    translationUrls = urlPattern.findall(translationString)
-                    for translationUrl in translationUrls:
-                        if translationUrl not in templateUrls:
-                            print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format(
-                                    languageCode,
-                                    translationUrl,
-                                    u", ".join(templateUrls)))
+        for translationCatalog in existingTranslationCatalogs:
+            translationMessage = translationCatalog.get(templateMessage.id, templateMessage.context)
+            if not translationMessage:
+                continue

+            translationUrls = set(urlPattern.findall(
+                translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string
+            ))
+            unknown_urls = translationUrls.difference(templateUrls)
+            if unknown_urls:
+                print(f'{inputFilePath} - {translationCatalog.locale}: '
+                      f'Found unknown URL(s) {", ".join(unknown_urls)} in the translation '
+                      f'which do not match any of the URLs in the template: {", ".join(templateUrls)}')
+    print(f"Done checking {inputFilePath}")

 def main():
-
-    print(u"\n    WARNING: Remember to regenerate the POT files with “updateTemplates.py” before you run this script.\n    POT files are not in the repository.\n")
-
+    print("\n\tWARNING: Remember to regenerate the POT files with “updateTemplates.py” "
+          "before you run this script.\n\tPOT files are not in the repository.\n")
    foundPots = 0
    for root, folders, filenames in os.walk(projectRootDirectory):
-        root = root.decode("utf-8")
        for filename in filenames:
            if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n":
                foundPots += 1
-                checkTranslationsForSpam(os.path.join(root, filename))
+                multiprocessing.Process(
+                    target=checkTranslationsForSpam,
+                    args=(os.path.join(root, filename), )
+                ).start()
    if foundPots == 0:
-        print(u"This script did not work because no ‘.pot’ files were found.")
-        print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.")
-        print(u"Then you can run this script to generate ‘.po’ files with the longest strings.")
+        print(
+            "This script did not work because no '.pot' files were found. "
+            "Please run 'updateTemplates.py' to generate the '.pot' files, "
+            "and run 'pullTranslations.py' to pull the latest translations from Transifex. "
+            "Then you can run this script to check for spam in translations.")


 if __name__ == "__main__":
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding:utf-8 -*-
 #
-# Copyright (C) 2019 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
 # This file is part of 0 A.D.
 #
 # 0 A.D. is free software: you can redistribute it and/or modify
@@ -1,5 +1,3 @@
-# -*- coding:utf-8 -*-
-#
 # Copyright (C) 2016 Wildfire Games.
 # All rights reserved.
 #
@@ -20,8 +18,6 @@
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import codecs, re, os, sys
 import json as jsonParser

@@ -39,7 +35,7 @@ def pathmatch(mask, path):
    """
    s = re.split(r"([*][*]?)", mask)
    p = ""
-    for i in xrange(len(s)):
+    for i in range(len(s)):
        if i % 2 != 0:
            p = p + "[^/]+"
            if len(s[i]) == 2:
@@ -327,7 +323,7 @@ class json(Extractor):
    def extractFromFile(self, filepath):
        with codecs.open(filepath, "r", 'utf-8') as fileObject:
            for message, breadcrumbs in self.extractFromString(fileObject.read()):
-                yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), -1, self.comments
+                yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), None, self.comments

    def extractFromString(self, string):
        self.breadcrumbs = []
@@ -360,7 +356,7 @@ class json(Extractor):
        for keyword in dictionary:
            self.breadcrumbs.append(keyword)
            if keyword in self.keywords:
-                if isinstance(dictionary[keyword], unicode):
+                if isinstance(dictionary[keyword], str):
                    yield dictionary[keyword], self.breadcrumbs
                elif isinstance(dictionary[keyword], list):
                    for message, breadcrumbs in self.extractList(dictionary[keyword]):
@@ -380,7 +376,7 @@ class json(Extractor):
        index = 0
        for listItem in itemsList:
            self.breadcrumbs.append(index)
-            if isinstance(listItem, unicode):
+            if isinstance(listItem, str):
                yield listItem, self.breadcrumbs
            del self.breadcrumbs[-1]
            index += 1
@@ -388,7 +384,7 @@ class json(Extractor):
    def extractDictionary(self, dictionary):
        for keyword in dictionary:
            self.breadcrumbs.append(keyword)
-            if isinstance(dictionary[keyword], unicode):
+            if isinstance(dictionary[keyword], str):
                yield dictionary[keyword], self.breadcrumbs
            del self.breadcrumbs[-1]

@@ -429,7 +425,7 @@ class xml(Extractor):
                                attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib]
                                breadcrumb = "({attributes})".format(attributes=", ".join(attributes))
                            if "context" in element.attrib:
-                                context = unicode(element.get("context"))
+                                context = str(element.get("context"))
                            elif "tagAsContext" in self.keywords[keyword]:
                                context = keyword
                            elif "customContext" in self.keywords[keyword]:
@@ -442,9 +438,9 @@ class xml(Extractor):
                                for splitText in element.text.split():
                                    # split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either
                                    if splitText[0] != "-":
-                                        yield unicode(splitText), None, context, breadcrumb, position, comments
+                                        yield str(splitText), None, context, breadcrumb, position, comments
                            else:
-                                yield unicode(element.text), None, context, breadcrumb, position, comments
+                                yield str(element.text), None, context, breadcrumb, position, comments


 # Hack from http://stackoverflow.com/a/2819788
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # Copyright (C) 2008-2011 Edgewall Software
 # Copyright (C) 2013-2014 Wildfire Games.
 # All rights reserved.
@@ -31,8 +29,6 @@
 extractor.
 """

-from __future__ import absolute_import, division, print_function, unicode_literals
-
 from operator import itemgetter
 import re

@@ -128,7 +124,7 @@ def unquote_string(string):
                escaped_value = escaped.group()
                if len(escaped_value) == 4:
                    try:
-                        add(unichr(int(escaped_value, 16)))
+                        add(chr(int(escaped_value, 16)))
                    except ValueError:
                        pass
                    else:
@@ -1,7 +1,6 @@
-#!/usr/bin/env python2
-# -*- coding:utf-8 -*-
+#!/usr/bin/env python3
 #
-# Copyright (C) 2014 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
 # This file is part of 0 A.D.
 #
 # 0 A.D. is free software: you can redistribute it and/or modify
@@ -17,67 +16,50 @@
 # You should have received a copy of the GNU General Public License
 # along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.

-from __future__ import absolute_import, division, print_function, unicode_literals
+import os, sys
+import multiprocessing

-import codecs, json, os, sys, textwrap
+from i18n_helper import l10nToolsDirectory, projectRootDirectory
+from i18n_helper.catalog import Catalog
+from i18n_helper.globber import getCatalogs

-from pology.catalog import Catalog
-from pology.message import Message
-
-
-l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
-projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir))
 l10nFolderName = "l10n"


 def generateLongStringTranslationFromPotIntoPo(inputFilePath, outputFilePath):
-
-    templateCatalog = Catalog(inputFilePath)
-    longStringCatalog = Catalog(outputFilePath, create=True, truncate=True)
+    templateCatalog = Catalog.readFrom(inputFilePath)
+    longStringCatalog = Catalog(locale="en") # Pretend we write English to get plurals.

    # Fill catalog with English strings.
    for message in templateCatalog:
-        longStringCatalog.add(message)
+        longStringCatalog.add(id=message.id, string=message.id, context=message.context)

    # If language codes were specified on the command line, filder by those.
    filters = sys.argv[1:]

    # Load existing translation catalogs.
-    existingTranslationCatalogs = []
-    l10nFolderPath = os.path.dirname(inputFilePath)
-
-    # .pot is one letter longer than .po, but the dot that separates the locale
-    # code from the rest of the filename in .po files makes up for that.
-    charactersToSkip = len(os.path.basename(inputFilePath))
-
-    for filename in os.listdir(l10nFolderPath):
-        if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
-            if not filters or filename[:-charactersToSkip] in filters:
-                if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]:
-                    existingTranslationCatalogs.append(os.path.join(l10nFolderPath, filename))
+    existingTranslationCatalogs = getCatalogs(inputFilePath, filters)

    # If any existing translation has more characters than the average expansion, use that instead.
-    for pofile in existingTranslationCatalogs:
-        print(u"Merging", pofile)
-        translationCatalog = Catalog(pofile)
+    for translationCatalog in existingTranslationCatalogs:
        for longStringCatalogMessage in longStringCatalog:
-            translationMessage = translationCatalog.select_by_key(longStringCatalogMessage.msgctxt, longStringCatalogMessage.msgid)
-            if not translationMessage:
+            translationMessage = translationCatalog.get(longStringCatalogMessage.id, longStringCatalogMessage.context)
+            if not translationMessage or not translationMessage.string:
                continue

-            if not longStringCatalogMessage.msgid_plural:
-                if len(translationMessage[0].msgstr[0]) > len(longStringCatalogMessage.msgstr[0]):
-                    longStringCatalogMessage.msgstr = translationMessage[0].msgstr
-                    translationMessage = longStringCatalogMessage
+            if not longStringCatalogMessage.pluralizable or not translationMessage.pluralizable:
+                if len(translationMessage.string) > len(longStringCatalogMessage.string):
+                    longStringCatalogMessage.string = translationMessage.string
                continue

-            longestSingularString = translationMessage[0].msgstr[0]
-            longestPluralString = translationMessage[0].msgstr[1] if len(translationMessage[0].msgstr) > 1 else longestSingularString
+            longestSingularString = translationMessage.string[0]
+            longestPluralString = translationMessage.string[1 if len(translationMessage.string) > 1 else 0]

-            candidateSingularString = longStringCatalogMessage.msgstr[0]
+            candidateSingularString = longStringCatalogMessage.string[0]
            candidatePluralString = "" # There might be between 0 and infinite plural forms.
-            for candidateString in longStringCatalogMessage.msgstr[1:]:
-                if len(candidateString) > len(candidatePluralString): candidatePluralString = candidateString
+            for candidateString in longStringCatalogMessage.string[1:]:
+                if len(candidateString) > len(candidatePluralString):
+                    candidatePluralString = candidateString

            changed = False
            if len(candidateSingularString) > len(longestSingularString):
@@ -88,27 +70,28 @@ def generateLongStringTranslationFromPotIntoPo(inputFilePath, outputFilePath):
                changed = True

            if changed:
-                longStringCatalogMessage.msgstr = [longestSingularString, longestPluralString]
+                longStringCatalogMessage.string = [longestSingularString, longestPluralString]
                translationMessage = longStringCatalogMessage
-
-    longStringCatalog.set_encoding("utf-8")
-    longStringCatalog.sync()
+    longStringCatalog.writeTo(outputFilePath)


 def main():

    foundPots = 0
    for root, folders, filenames in os.walk(projectRootDirectory):
-        root = root.decode("utf-8")
        for filename in filenames:
            if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n":
                foundPots += 1
-                print(u"Generating", "long." + filename[:-1])
-                generateLongStringTranslationFromPotIntoPo(os.path.join(root, filename), os.path.join(root, "long." + filename[:-1]))
+                print("Generating", "long." + filename[:-1])
+                multiprocessing.Process(
+                    target=generateLongStringTranslationFromPotIntoPo,
+                    args=(os.path.join(root, filename), os.path.join(root, "long." + filename[:-1]))
+                ).start()
+
    if foundPots == 0:
-        print(u"This script did not work because no ‘.pot’ files were found.")
-        print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.")
-        print(u"Then you can run this script to generate ‘.po’ files with the longest strings.")
+        print("This script did not work because no ‘.pot’ files were found. "
+              "Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex. "
+              "Then you can run this script to generate ‘.po’ files with the longest strings.")


 if __name__ == "__main__":
@@ -0,0 +1,4 @@
+import os
+
+l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
+projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir, os.pardir))
@@ -0,0 +1,49 @@
+"""Wrapper around babel Catalog / .po handling"""
+from datetime import datetime
+
+from babel.messages.catalog import Catalog as BabelCatalog
+from babel.messages.pofile import read_po, write_po
+
+class Catalog(BabelCatalog):
+    """Wraps a BabelCatalog for convenience."""
+    def __init__(self, *args, project=None, copyright_holder=None, **other_kwargs):
+        date = datetime.now()
+        super().__init__(*args, header_comment=(
+                f"# Translation template for {project}.\n"
+                f"# Copyright (C) {date.year} {copyright_holder}\n"
+                f"# This file is distributed under the same license as the {project} project."
+            ),
+            copyright_holder=copyright_holder,
+            fuzzy=False,
+            charset="utf-8",
+            creation_date=date,
+            revision_date=date,
+            **other_kwargs)
+        self._project = project
+
+    @BabelCatalog.mime_headers.getter
+    def mime_headers(self):
+        headers = []
+        for name, value in super().mime_headers:
+            if name in {
+                "PO-Revision-Date",
+                "POT-Creation-Date",
+                "MIME-Version",
+                "Content-Type",
+                "Content-Transfer-Encoding",
+                "Plural-Forms"}:
+                headers.append((name, value))
+
+        return [('Project-Id-Version', self._project)] + headers
+
+    @staticmethod
+    def readFrom(file_path, locale = None):
+        return read_po(open(file_path, "r+"), locale=locale)
+
+    def writeTo(self, file_path):
+        return write_po(
+            fileobj=open(file_path, "wb+"),
+            catalog=self,
+            width=90,
+            sort_by_file=True,
+        )
@@ -0,0 +1,22 @@
+"""Utils to list .po"""
+import os
+from typing import List
+
+from i18n_helper.catalog import Catalog
+
+def getCatalogs(inputFilePath, filters = None) -> List[Catalog]:
+    """Returns a list of "real" catalogs (.po) in the fiven folder."""
+    existingTranslationCatalogs = []
+    l10nFolderPath = os.path.dirname(inputFilePath)
+    inputFileName = os.path.basename(inputFilePath)
+
+    for filename in os.listdir(str(l10nFolderPath)):
+        if filename.startswith("long") or not filename.endswith(".po"):
+            continue
+        if filename.split(".")[1] != inputFileName.split(".")[0]:
+            continue
+        if not filters or filename.split(".")[0] in filters:
+            existingTranslationCatalogs.append(
+                Catalog.readFrom(os.path.join(l10nFolderPath, filename), locale=filename.split('.')[0]))
+
+    return existingTranslationCatalogs
@@ -9,33 +9,19 @@ SCRIPT_PATH="`dirname \"$0\"`"
 # POT Generation ##############################################################

 echo ":: Regenerating the translation templates…"
-python2 "${SCRIPT_PATH}/updateTemplates.py"
+python3 "${SCRIPT_PATH}/updateTemplates.py"


 # PO Download #################################################################

 echo ":: Downloading translations from Transifex…"
-python2 "${SCRIPT_PATH}/pullTranslations.py"
+python3 "${SCRIPT_PATH}/pullTranslations.py"


 # Pre-Commit Cleanup  #########################################################

-# Note: I (Gallaecio) tried using GNU parallel for this, the problem is that
-# poediff accesses Subversion, and when you use Subversion more than once
-# simultaneously you end up with commands not running properly due to the
-# Subversion database being locked. So just take a beverage, put some music on
-# and wait for the task to eventually finish.
-
 echo ":: Reverting unnecessary changes…"
-for FILE_PATH in $(find "${SCRIPT_PATH}/../../../binaries/data" -name "*.pot" -o -name "*.po")
-do
-    if [ -z "$(poediff -c svn -qs "${FILE_PATH}")" ]; then
-        svn revert "${FILE_PATH}"
-    else
-        svn add "${FILE_PATH}" 2> /dev/null
-    fi
-done
-
+python3 "${SCRIPT_PATH}/checkDiff.py"

 # Commit ######################################################################

@@ -1,7 +1,6 @@
-#!/usr/bin/env python2
-# -*- coding:utf-8 -*-
+#!/usr/bin/env python3
 #
-# Copyright (C) 2014 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
 # This file is part of 0 A.D.
 #
 # 0 A.D. is free software: you can redistribute it and/or modify
@@ -17,36 +16,17 @@
 # You should have received a copy of the GNU General Public License
 # along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.

-"""
-    Although this script itself should work with both Python 2 and Python 3, it relies on the Transifex Client, which at
-    this moment (2014-10-23) does not support Python 3 in the latest stable release (0.10).
-
-    As soon as Transifex Client supports Python 3, simply updating its folder should be enough to make this script work
-    with Python 3 as well.
-"""
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import os, sys

-# Python version check.
-if sys.version_info[0] != 2:
-    print(__doc__)
-    sys.exit()
-
 from txclib.project import Project

+from i18n_helper import l10nToolsDirectory, projectRootDirectory

 def main():
-
-
-    l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
-    projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir))
    l10nFolderName = "l10n"
    transifexClientFolder = ".tx"

    for root, folders, filenames in os.walk(projectRootDirectory):
-        root = root.decode('utf-8')
        for folder in folders:
            if folder == l10nFolderName:
                if os.path.exists(os.path.join(root, folder, transifexClientFolder)):
@@ -0,0 +1,3 @@
+babel~=2.6
+lxml~=4.5
+transifex-client>=0.14
@@ -0,0 +1,105 @@
+import io
+import pytest
+from checkDiff import check_diff
+from unittest import mock
+from types import SimpleNamespace
+
+PATCHES = [
+"""
+Index: binaries/data/l10n/en_GB.engine.po
+===================================================================
+--- binaries/data/l10n/en_GB.engine.po
+++ binaries/data/l10n/en_GB.engine.po
+@@ -103,7 +103,7 @@
+
+ #: lobby/XmppClient.cpp:1291
+ msgid "Stream error"
+-msgstr "Stream error"
+msgstr "Some Error"
+
+ #: lobby/XmppClient.cpp:1292
+ msgid "The incoming stream version is unsupported"
+
+""",
+"""
+Index: binaries/data/l10n/en_GB.engine.po
+===================================================================
+--- binaries/data/l10n/en_GB.engine.po
+++ binaries/data/l10n/en_GB.engine.po
+@@ -103,7 +103,7 @@
+
+-#: lobby/XmppClient.cpp:1291
+#: lobby/XmppClient.cpp:1295
+ msgid "Stream error"
+ msgstr "Stream error"
+""",
+"""
+Index: binaries/data/l10n/en_GB.engine.po
+===================================================================
+--- binaries/data/l10n/en_GB.engine.po
+++ binaries/data/l10n/en_GB.engine.po
+@@ -103,7 +103,7 @@
+
+-#: lobby/XmppClient.cpp:1291
+#: lobby/XmppClient.cpp:1295
+ msgid "Stream error"
+ msgstr "Stream error"
+Index: binaries/data/l10n/en_GB_2.engine.po
+===================================================================
+--- binaries/data/l10n/en_GB_2.engine.po
+++ binaries/data/l10n/en_GB_2.engine.po
+@@ -103,7 +103,7 @@
+
+ #: lobby/XmppClient.cpp:1291
+ #: lobby/XmppClient.cpp:1295
+-msgid "Stream error"
+msgstr "Stretotoro"
+Index: binaries/data/l10n/en_GB_3.engine.po
+===================================================================
+--- binaries/data/l10n/en_GB_3.engine.po
+++ binaries/data/l10n/en_GB_3.engine.po
+@@ -103,7 +103,7 @@
+
+-#: lobby/XmppClient.cpp:1291
+#: lobby/XmppClient.cpp:1295
+ msgid "Stream error"
+ msgstr "Stream error"
+""",
+"""
+Index: binaries/data/l10n/bar.engine.po
+===================================================================
+--- binaries/data/l10n/bar.engine.po
+++ binaries/data/l10n/bar.engine.po
+@@ -3,13 +3,13 @@
+ # This file is distributed under the same license as the Pyrogenesis project.
+ #
+ # Translators:
+-# Benedikt Wagner <holledau1@gmx.de>, 2020
+# dabene1408 <holledau1@gmx.de>, 2020
+ msgid ""
+ msgstr ""
+ "Project-Id-Version: 0 A.D.\n"
+ "POT-Creation-Date: 2020-05-22 07:08+0000\n"
+ "PO-Revision-Date: 2020-06-22 16:38+0000\n"
+-"Last-Translator: Benedikt Wagner <holledau1@gmx.de>\n"
+"Last-Translator: dabene1408 <holledau1@gmx.de>\n"
+ "Language-Team: Bavarian (http://www.transifex.com/wildfire-games/0ad/language/bar/)\n"
+ "MIME-Version: 1.0\n"
+ "Content-Type: text/plain; charset=UTF-8\n"
+"""
+]
+
+PATCHES_EXPECT_REVERT = [
+    [],
+    ["binaries/data/l10n/en_GB.engine.po"],
+    ["binaries/data/l10n/en_GB.engine.po", "binaries/data/l10n/en_GB_3.engine.po"],
+    ["binaries/data/l10n/bar.engine.po"]
+]
+
+@pytest.fixture(params=zip(PATCHES, PATCHES_EXPECT_REVERT))
+def patch(request):
+    return [io.StringIO(request.param[0]), set(request.param[1])]
+
+
+def test_checkdiff(patch):
+    assert check_diff(patch[0]) == patch[1]
@@ -1,7 +1,6 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 #
-# Copyright (C) 2018 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
 # This file is part of 0 A.D.
 #
 # 0 A.D. is free software: you can redistribute it and/or modify
@@ -17,19 +16,16 @@
 # You should have received a copy of the GNU General Public License
 # along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.

-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import codecs, datetime, json, os, string, textwrap
-
-from pology.catalog import Catalog
-from pology.message import Message
-from pology.monitored import Monpair, Monlist
+import json, os
+import multiprocessing
+from importlib import import_module

 from lxml import etree

+from i18n_helper import l10nToolsDirectory, projectRootDirectory
+from i18n_helper.catalog import Catalog
+from extractors import extractors

-l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
-projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir))
 l10nFolderName = "l10n"
 messagesFilename = "messages.json"

@@ -41,83 +37,82 @@ def warnAboutUntouchedMods():
    modsRootFolder = os.path.join(projectRootDirectory, "binaries", "data", "mods")
    untouchedMods = {}
    for modFolder in os.listdir(modsRootFolder):
-        if modFolder[0] != "_":
+        if modFolder[0] != "_" and modFolder[0] != '.':
            if not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName)):
                untouchedMods[modFolder] = "There is no '{folderName}' folder in the root folder of this mod.".format(folderName=l10nFolderName)
            elif not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName, messagesFilename)):
                untouchedMods[modFolder] = "There is no '{filename}' file within the '{folderName}' folder in the root folder of this mod.".format(folderName=l10nFolderName, filename=messagesFilename)
    if untouchedMods:
-        print(textwrap.dedent("""
-                Warning: No messages were extracted from the following mods:
-            """))
+        print(""
+            "Warning: No messages were extracted from the following mods:"
+            "")
        for mod in untouchedMods:
            print("• {modName}: {warningMessage}".format(modName=mod, warningMessage=untouchedMods[mod]))
-        print(textwrap.dedent("""
-                For this script to extract messages from a mod folder, this mod folder must contain a '{folderName}'
-                folder, and this folder must contain a '{filename}' file that describes how to extract messages for the
-                mod. See the folder of the main mod ('public') for an example, and see the documentation for more
-                information.
-                """.format(folderName=l10nFolderName, filename=messagesFilename)
-             ))
+        print(""
+            f"For this script to extract messages from a mod folder, this mod folder must contain a '{l10nFolderName}' "
+            f"folder, and this folder must contain a '{messagesFilename}' file that describes how to extract messages for the "
+            f"mod. See the folder of the main mod ('public') for an example, and see the documentation for more "
+            f"information."
+             )

+def generatePOT(templateSettings, rootPath):
+    if "skip" in templateSettings and templateSettings["skip"] == "yes":
+        return
+
+    inputRootPath = rootPath
+    if "inputRoot" in templateSettings:
+        inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"])
+
+    template = Catalog(
+        project=templateSettings["project"],
+        copyright_holder=templateSettings["copyrightHolder"],
+        locale='en',
+    )
+
+    for rule in templateSettings["rules"]:
+        if "skip" in rule and rule["skip"] == "yes":
+            return
+
+        options = rule.get("options", {})
+        extractorClass = getattr(import_module("extractors.extractors"), rule['extractor'])
+        extractor = extractorClass(inputRootPath, rule["filemasks"], options)
+        formatFlag = None
+        if "format" in options:
+            formatFlag = options["format"]
+        for message, plural, context, location, comments in extractor.run():
+            message_id = (message, plural) if plural else message
+
+            saved_message = template.get(message_id, context) or template.add(
+                id=message_id,
+                context=context,
+                auto_comments=comments,
+                flags=[formatFlag] if formatFlag and message.find("%") != -1 else []
+            )
+            saved_message.locations.append(location)
+            saved_message.flags.discard('python-format')
+
+    template.writeTo(os.path.join(rootPath, templateSettings["output"]))
+    print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template)))

 def generateTemplatesForMessagesFile(messagesFilePath):

    with open(messagesFilePath, 'r') as fileObject:
        settings = json.load(fileObject)

-    rootPath = os.path.dirname(messagesFilePath)
-
    for templateSettings in settings:
-        if "skip" in templateSettings and templateSettings["skip"] == "yes":
-            continue
-
-        inputRootPath = rootPath
-        if "inputRoot" in templateSettings:
-            inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"])
-
-        template = Catalog(os.path.join(rootPath, templateSettings["output"]), create=True, truncate=True)
-        h = template.update_header(
-            templateSettings["project"],
-            "Translation template for %project.",
-            "Copyright (C) {year} {holder}".format(
-                year=datetime.datetime.now().year,
-                holder=templateSettings["copyrightHolder"]
-            ),
-            "This file is distributed under the same license as the %project project.",
-            plforms="nplurals=2; plural=(n != 1);"
-        )
-        h.remove_field("Report-Msgid-Bugs-To")
-        h.remove_field("Last-Translator")
-        h.remove_field("Language-Team")
-        h.remove_field("Language")
-        h.author = Monlist()
-
-        for rule in templateSettings["rules"]:
-            if "skip" in rule and rule["skip"] == "yes":
-                continue
-
-            options = rule.get("options", {})
-            extractorClass = getattr(__import__("extractors.extractors", {}, {}, [rule["extractor"]]), rule["extractor"])
-            extractor = extractorClass(inputRootPath, rule["filemasks"], options)
-            formatFlag = None
-            if "format" in options:
-                formatFlag = options["format"]
-            for message, plural, context, location, comments in extractor.run():
-                msg = Message({"msgid": message, "msgid_plural": plural, "msgctxt": context, "auto_comment": comments, "flag": [formatFlag] if formatFlag and string.find(message, "%") != -1 else None, "source": [location]})
-                if template.get(msg):
-                    template.get(msg).source.append(Monpair(location))
-                else:
-                    template.add(msg)
-
-        template.set_encoding("utf-8")
-        template.sync(fitplural=True)
-        print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template)))
+        multiprocessing.Process(
+            target=generatePOT,
+            args=(templateSettings, os.path.dirname(messagesFilePath))
+        ).start()


 def main():
-
-    for root, folders, filenames in os.walk(projectRootDirectory):
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--scandir", help="Directory to start scanning for l10n folders in. "
+                                          "Type '.' for current working directory")
+    args = parser.parse_args()
+    for root, folders, filenames in os.walk(args.scandir or projectRootDirectory):
        for folder in folders:
            if folder == l10nFolderName:
                messagesFilePath = os.path.join(root, folder, messagesFilename)