1
0
forked from mirrors/0ad

Improve performance and output of entvalidate.py

The most notable changes are:

- improved performance thanks to using `lxml` instead of `xmllint` for
  validating the templates
- improved performance by parallelizing the validation across multiple
  threads
- more meaningful output for detected validation errors
- easier to call from other scripts (like checkrefs.py)
This commit is contained in:
Dunedan
2025-03-18 12:57:01 +01:00
parent f68a8c2532
commit e755ef7155
2 changed files with 118 additions and 90 deletions
+117 -90
View File
@@ -1,56 +1,116 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import argparse import argparse
import logging import logging
import shutil
import sys import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path from pathlib import Path
from subprocess import CalledProcessError, run
from xml.etree import ElementTree as ET from xml.etree import ElementTree as ET
from lxml import etree
from lxml.etree import DocumentInvalid, ElementTree
from scriptlib import SimulTemplateEntity, find_files from scriptlib import SimulTemplateEntity, find_files
SIMUL_TEMPLATES_PATH = Path("simulation/templates") SIMUL_TEMPLATES_PATH = Path("simulation/templates")
ENTITY_RELAXNG_FNAME = "entity.rng" ENTITY_RELAXNG_FNAME = "entity.rng"
RELAXNG_SCHEMA_ERROR_MSG = """Relax NG schema non existant.
Please create the file: {}
You can do that by running 'pyrogenesis -dumpSchema' in the 'system' directory
"""
XMLLINT_ERROR_MSG = (
"xmllint not found in your PATH, please install it (usually in libxml2 package)"
)
class SingleLevelFilter(logging.Filter): def init_logger(log_level) -> logging.Logger:
def __init__(self, passlevel, reject): """Initialize a logger."""
self.passlevel = passlevel logger = logging.getLogger(__name__)
self.reject = reject logger.setLevel(log_level)
handler = logging.StreamHandler(sys.stdout)
def filter(self, record): handler.setLevel(log_level)
if self.reject: handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
return record.levelno != self.passlevel logger.addHandler(handler)
return record.levelno == self.passlevel return logger
logger = logging.getLogger(__name__) def validate_template(
logger.setLevel(logging.INFO) simulation_template_entity: SimulTemplateEntity,
# create a console handler, seems nicer to Windows and for future uses template_path: Path,
ch = logging.StreamHandler(sys.stdout) relaxng_schema: ElementTree,
ch.setLevel(logging.INFO) mod_name: str,
ch.setFormatter(logging.Formatter("%(levelname)s - %(message)s")) ) -> None:
f1 = SingleLevelFilter(logging.INFO, False) """Validate a single template."""
ch.addFilter(f1) entity = simulation_template_entity.load_inherited(
logger.addHandler(ch) SIMUL_TEMPLATES_PATH, str(template_path.relative_to(SIMUL_TEMPLATES_PATH)), [mod_name]
errorch = logging.StreamHandler(sys.stderr) )
errorch.setLevel(logging.WARNING) relaxng_schema.assertValid(etree.fromstring(ET.tostring(entity, encoding="utf-8")))
errorch.setFormatter(logging.Formatter("%(levelname)s - %(message)s"))
logger.addHandler(errorch)
def main() -> int: class ValidationError(Exception):
parser = argparse.ArgumentParser(description="Validate templates") pass
def validate_templates(
logger: logging.Logger,
vfs_root: Path,
mod_name: str,
relaxng_schema_path: Path,
templates: list[Path] | None,
) -> None:
"""Validate templates against the given RELAX NG schema."""
if templates:
templates = [(template, None) for template in templates]
else:
templates = find_files(vfs_root, [mod_name], SIMUL_TEMPLATES_PATH.as_posix(), "xml")
templates_to_validate = []
for fp, _ in templates:
if fp.stem.startswith("template_"):
continue
template_path = fp.as_posix()
if template_path.startswith(
(
f"{SIMUL_TEMPLATES_PATH.as_posix()}/mixins/",
f"{SIMUL_TEMPLATES_PATH.as_posix()}/special/",
)
):
continue
templates_to_validate.append(fp)
simulation_template_entity = SimulTemplateEntity(vfs_root, logger)
relaxng_schema = etree.RelaxNG(file=relaxng_schema_path)
count, failed = 0, 0
with ThreadPoolExecutor() as executor:
futures = {}
for template_path in templates_to_validate:
future = executor.submit(
validate_template,
simulation_template_entity,
template_path,
relaxng_schema,
mod_name,
)
futures[future] = template_path
for future in as_completed(futures):
count += 1
template_path = futures[future]
logger.debug("Processed %s", template_path)
try:
future.result()
except DocumentInvalid as e:
failed += 1
logger.error("%s: %s", template_path, e) # noqa: TRY400
logger.info("Total: %s; failed: %s", count, failed)
if failed:
raise ValidationError
def main() -> None:
parser = argparse.ArgumentParser(description="Validate templates against a RELAX NG schema.")
parser.add_argument("-m", "--mod-name", required=True, help="The name of the mod to validate.") parser.add_argument("-m", "--mod-name", required=True, help="The name of the mod to validate.")
parser.add_argument( parser.add_argument(
"-r", "-r",
@@ -65,72 +125,39 @@ def main() -> int:
"--relaxng-schema", "--relaxng-schema",
default=Path() / ENTITY_RELAXNG_FNAME, default=Path() / ENTITY_RELAXNG_FNAME,
type=Path, type=Path,
help="The path to mod's root location.", help="The path to the RELAX NG schema.",
) )
parser.add_argument( parser.add_argument(
"-t", "--templates", nargs="*", help="Optionally, a list of templates to validate." "-t",
"--templates",
nargs="*",
type=Path,
help="A list of templates to validate. If omitted all templates will be validated.",
)
parser.add_argument(
"-v", "--verbose", help="Be verbose about the output.", action="store_true"
) )
parser.add_argument("-v", "--verbose", help="Be verbose about the output.", default=False)
args = parser.parse_args() args = parser.parse_args()
log_level = logging.DEBUG if args.verbose else logging.INFO
logger = init_logger(log_level)
if not args.relaxng_schema.exists(): if not args.relaxng_schema.exists():
logging.error(RELAXNG_SCHEMA_ERROR_MSG.format(args.relaxng_schema)) logging.error(
return 1 "RELAX NG schema file doesn't exist. Please create the file: %s. You can do that by "
'running "pyrogenesis -dumpSchema" in the "binaries/system" directory',
if not shutil.which("xmllint"): args.relaxng_schema,
logging.error(XMLLINT_ERROR_MSG)
return 2
if args.templates:
templates = sorted([(Path(t), None) for t in args.templates])
else:
templates = sorted(
find_files(args.vfs_root, [args.mod_name], SIMUL_TEMPLATES_PATH.as_posix(), "xml")
) )
sys.exit(1)
simul_template_entity = SimulTemplateEntity(args.vfs_root, logger) try:
count, failed = 0, 0 validate_templates(
for fp, _ in templates: logger, args.vfs_root, args.mod_name, args.relaxng_schema, args.templates
if fp.stem.startswith("template_"):
continue
path = fp.as_posix()
if path.startswith(
(
f"{SIMUL_TEMPLATES_PATH.as_posix()}/mixins/",
f"{SIMUL_TEMPLATES_PATH.as_posix()}/special/",
)
):
continue
if args.verbose:
logger.info("Parsing %s...", fp)
count += 1
entity = simul_template_entity.load_inherited(
SIMUL_TEMPLATES_PATH, str(fp.relative_to(SIMUL_TEMPLATES_PATH)), [args.mod_name]
) )
xmlcontent = ET.tostring(entity, encoding="unicode") except ValidationError:
try: sys.exit(1)
run(
["xmllint", "--relaxng", str(args.relaxng_schema.resolve()), "-"],
input=xmlcontent,
encoding="utf-8",
capture_output=True,
text=True,
check=True,
)
except CalledProcessError as e:
failed += 1
if e.stderr:
logger.exception(e.stderr)
if e.stdout:
logger.info(e.stdout)
logger.info("Total: %s; failed: %s", count, failed)
return 0
if __name__ == "__main__": if __name__ == "__main__":
raise SystemExit(main()) main()
+1
View File
@@ -0,0 +1 @@
lxml>=5.0.0