From d1ca249f2a1b7387fb87bdcde14b4f9e41b54cb2 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 11:22:00 -0400 Subject: [PATCH 1/8] Add mypy source review to current state This patch is to show what is needed to add static type checking with `mypy`. The brunt of the effort is adding `#type: ignore` annotations to `rdflib`. These can be removed once an `rdflib` release with the merged PR 1407 is issued. The additional package in `tests/requirements.txt` pertaining to `dateutil` was reported by `mypy`. DISCLAIMER: Participation by NIST in the creation of the documentation of mentioned software is not intended to imply a recommendation or endorsement by the National Institute of Standards and Technology, nor is it intended to imply that any specific software is necessarily the best available for the purpose. References: * [AC-211] Add static type checking to CASE-Utilities-Python * https://github.com/RDFLib/rdflib/pull/1407 Signed-off-by: Alex Nelson --- case_utils/__init__.py | 2 +- case_utils/case_file/__init__.py | 2 +- case_utils/case_sparql_construct/__init__.py | 2 +- case_utils/case_sparql_select/__init__.py | 4 ++-- tests/Makefile | 14 ++++++++++++++ tests/case_file/test_case_file.py | 2 +- .../test_case_sparql_construct.py | 2 +- tests/case_utils/test_guess_format.py | 2 +- tests/hexbinary/test_hexbinary.py | 2 +- tests/requirements.txt | 2 ++ tests/src/compact.py | 2 +- tests/src/glom_graph.py | 2 +- tests/src/isomorphic_diff.py | 2 +- 13 files changed, 28 insertions(+), 12 deletions(-) diff --git a/case_utils/__init__.py b/case_utils/__init__.py index ee43a52..275e665 100644 --- a/case_utils/__init__.py +++ b/case_utils/__init__.py @@ -15,7 +15,7 @@ import warnings -import rdflib.util +import rdflib.util # type: ignore from . import local_uuid diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 7d07afb..369595d 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -21,7 +21,7 @@ import hashlib import os -import rdflib +import rdflib # type: ignore import case_utils diff --git a/case_utils/case_sparql_construct/__init__.py b/case_utils/case_sparql_construct/__init__.py index fcc9434..dcd110b 100644 --- a/case_utils/case_sparql_construct/__init__.py +++ b/case_utils/case_sparql_construct/__init__.py @@ -21,7 +21,7 @@ import os import logging -import rdflib.plugins.sparql +import rdflib.plugins.sparql # type: ignore import case_utils diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index d3d6f51..e99d79b 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -33,8 +33,8 @@ import os import logging -import pandas as pd -import rdflib.plugins.sparql +import pandas as pd # type: ignore +import rdflib.plugins.sparql # type: ignore import case_utils diff --git a/tests/Makefile b/tests/Makefile index 949f671..2d09c4b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -32,6 +32,7 @@ all: \ check-case_sparql_construct \ check-case_sparql_select \ check-isomorphic_diff \ + check-mypy \ download .venv.done.log: \ @@ -79,6 +80,7 @@ all-case_sparql_select: \ # These check calls are provided in preferred run-order. check: \ check-isomorphic_diff \ + check-mypy \ check-case_file \ check-case_sparql_construct \ check-case_sparql_select @@ -116,6 +118,18 @@ check-isomorphic_diff: \ --directory isomorphic_diff \ check +# mypy is called against specific members of the tests directory to avoid descending into the virtual environment. +check-mypy: \ + .venv.done.log + source venv/bin/activate \ + && mypy \ + $(top_srcdir)/case_utils \ + case_file \ + case_sparql_construct \ + case_utils \ + hexbinary \ + src + clean: @$(MAKE) \ --directory case_sparql_select \ diff --git a/tests/case_file/test_case_file.py b/tests/case_file/test_case_file.py index 8d3bd10..cb63002 100644 --- a/tests/case_file/test_case_file.py +++ b/tests/case_file/test_case_file.py @@ -16,7 +16,7 @@ import os import pytest -import rdflib.plugins.sparql +import rdflib.plugins.sparql # type: ignore _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/case_sparql_construct/test_case_sparql_construct.py b/tests/case_sparql_construct/test_case_sparql_construct.py index 68f6757..c8268af 100644 --- a/tests/case_sparql_construct/test_case_sparql_construct.py +++ b/tests/case_sparql_construct/test_case_sparql_construct.py @@ -11,7 +11,7 @@ # # We would appreciate acknowledgement if the software is used. -import rdflib.plugins.sparql +import rdflib.plugins.sparql # type: ignore import case_utils diff --git a/tests/case_utils/test_guess_format.py b/tests/case_utils/test_guess_format.py index d33bea5..bf11558 100644 --- a/tests/case_utils/test_guess_format.py +++ b/tests/case_utils/test_guess_format.py @@ -12,7 +12,7 @@ # We would appreciate acknowledgement if the software is used. import pytest -import rdflib +import rdflib # type: ignore import case_utils diff --git a/tests/hexbinary/test_hexbinary.py b/tests/hexbinary/test_hexbinary.py index 04784bc..ad7d307 100644 --- a/tests/hexbinary/test_hexbinary.py +++ b/tests/hexbinary/test_hexbinary.py @@ -50,7 +50,7 @@ import os import pytest -import rdflib.plugins.sparql +import rdflib.plugins.sparql # type: ignore _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/requirements.txt b/tests/requirements.txt index b31bac3..f913d96 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,5 @@ PyLD +mypy pytest python-dateutil +types-python-dateutil diff --git a/tests/src/compact.py b/tests/src/compact.py index 16c25ce..82e0f92 100644 --- a/tests/src/compact.py +++ b/tests/src/compact.py @@ -24,7 +24,7 @@ import os import json -import pyld +import pyld # type: ignore _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/src/glom_graph.py b/tests/src/glom_graph.py index cf101ab..0962a80 100644 --- a/tests/src/glom_graph.py +++ b/tests/src/glom_graph.py @@ -17,7 +17,7 @@ __version__ = "0.1.0" -import rdflib +import rdflib # type: ignore import case_utils diff --git a/tests/src/isomorphic_diff.py b/tests/src/isomorphic_diff.py index 02b411c..e770a10 100644 --- a/tests/src/isomorphic_diff.py +++ b/tests/src/isomorphic_diff.py @@ -34,7 +34,7 @@ import os import sys -import rdflib.compare +import rdflib.compare # type: ignore import case_utils From 9a582d78025475f7752d377313ecdd6ceb89e522 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 11:29:28 -0400 Subject: [PATCH 2/8] Designate case_utils as a typed package References: * [AC-211] Add static type checking to CASE-Utilities-Python * https://www.python.org/dev/peps/pep-0561/ Signed-off-by: Alex Nelson --- case_utils/py.typed | 13 +++++++++++++ setup.cfg | 4 ++++ 2 files changed, 17 insertions(+) create mode 100644 case_utils/py.typed diff --git a/case_utils/py.typed b/case_utils/py.typed new file mode 100644 index 0000000..3ecd1f5 --- /dev/null +++ b/case_utils/py.typed @@ -0,0 +1,13 @@ +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +# This file is defined to support PEP 561: +# https://www.python.org/dev/peps/pep-0561/ diff --git a/setup.cfg b/setup.cfg index ca1a136..0f51d10 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,6 +15,7 @@ classifiers = Programming Language :: Python :: 3 [options] +include_package_data = true # TODO The constraint on pyparsing can be removed when rdflib Issue #1190 is resolved. # https://github.com/RDFLib/rdflib/issues/1190 install_requires = @@ -32,3 +33,6 @@ console_scripts = case_sparql_construct = case_utils.case_sparql_construct:main # Note that numpy (pandas dependency, and pandas is dependency of case_sparql_select) is only supported in Python >= 3.7. case_sparql_select = case_utils.case_sparql_select:main + +[options.package_data] +case_utils = py.typed From f99bceaea0080f479aa88d98680bbf0cac49109d Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 11:55:18 -0400 Subject: [PATCH 3/8] Add minimal type signatures to test scripts An observed behavior is that mypy will not type signature analysis until one is added in the call path, e.g. designating `def main() -> None`. This patch is the minimal set of effects of adding a None return type to unit test functions. References: * [AC-211] Add static type checking to CASE-Utilities-Python Signed-off-by: Alex Nelson --- tests/case_file/test_case_file.py | 12 +++--- .../test_case_sparql_construct.py | 11 ++++-- tests/case_utils/test_guess_format.py | 24 ++++++------ tests/hexbinary/test_hexbinary.py | 37 ++++++++++--------- tests/src/compact.py | 2 +- tests/src/glom_graph.py | 2 +- tests/src/isomorphic_diff.py | 2 +- 7 files changed, 48 insertions(+), 42 deletions(-) diff --git a/tests/case_file/test_case_file.py b/tests/case_file/test_case_file.py index cb63002..29cd749 100644 --- a/tests/case_file/test_case_file.py +++ b/tests/case_file/test_case_file.py @@ -37,20 +37,22 @@ SRCDIR = os.path.dirname(__file__) -def load_graph(filename): +def load_graph( + filename +) -> rdflib.Graph: in_graph = rdflib.Graph() in_graph.parse(filename) return in_graph @pytest.fixture -def graph_case_file(): +def graph_case_file() -> rdflib.Graph: return load_graph(os.path.join(SRCDIR, "sample.txt.ttl")) @pytest.fixture -def graph_case_file_disable_hashes(): +def graph_case_file_disable_hashes() -> rdflib.Graph: return load_graph(os.path.join(SRCDIR, "sample.txt-disable_hashes.ttl")) -def test_confirm_hashes(graph_case_file): +def test_confirm_hashes(graph_case_file) -> None: expected = { "MD5": "098F6BCD4621D373CADE4E832627B4F6", "SHA1": "A94A8FE5CCB19BA61C4C0873D391E987982FBBD3", @@ -91,7 +93,7 @@ def test_confirm_hashes(graph_case_file): assert expected == computed -def test_confirm_mtime(graph_case_file, graph_case_file_disable_hashes): +def test_confirm_mtime(graph_case_file, graph_case_file_disable_hashes) -> None: query_confirm_mtime = """ SELECT ?nFile WHERE { diff --git a/tests/case_sparql_construct/test_case_sparql_construct.py b/tests/case_sparql_construct/test_case_sparql_construct.py index c8268af..eec5216 100644 --- a/tests/case_sparql_construct/test_case_sparql_construct.py +++ b/tests/case_sparql_construct/test_case_sparql_construct.py @@ -11,16 +11,18 @@ # # We would appreciate acknowledgement if the software is used. +import typing + import rdflib.plugins.sparql # type: ignore import case_utils -def _test_templates_with_blank_nodes_result(filename): +def _test_templates_with_blank_nodes_result(filename) -> None: ground_truth_positive = { ("Alice", "Hacker"), ("Bob", "Hacker") } - ground_truth_negative = set() + ground_truth_negative : typing.Set[str] = set() graph = rdflib.Graph() graph.parse(filename) @@ -48,7 +50,8 @@ def _test_templates_with_blank_nodes_result(filename): )) assert computed == ground_truth_positive -def test_templates_with_blank_nodes_result_json(): +def test_templates_with_blank_nodes_result_json() -> None: _test_templates_with_blank_nodes_result("output.json") -def test_templates_with_blank_nodes_result_turtle(): + +def test_templates_with_blank_nodes_result_turtle() -> None: _test_templates_with_blank_nodes_result("output.ttl") diff --git a/tests/case_utils/test_guess_format.py b/tests/case_utils/test_guess_format.py index bf11558..3cb4261 100644 --- a/tests/case_utils/test_guess_format.py +++ b/tests/case_utils/test_guess_format.py @@ -22,45 +22,45 @@ PATH_TO_XHTML = "/nonexistent/foo.xhtml" FMAP_XHTML_GRDDL = {"xhtml": "grddl"} -def test_rdflib_util_guess_format_xhtml_default(): +def test_rdflib_util_guess_format_xhtml_default() -> None: assert rdflib.util.guess_format(PATH_TO_XHTML) == "rdfa", "Failed to reproduce rdflib.util.guess_format test" -def test_rdflib_util_guess_format_xhtml_fmap(): +def test_rdflib_util_guess_format_xhtml_fmap() -> None: """ This test implements one of the documented demonstrations in rdflib.util.guess_format. """ assert rdflib.util.guess_format(PATH_TO_XHTML, FMAP_XHTML_GRDDL) == "grddl", "Failed to reproduce rdflib.util.guess_format test" -def test_rdflib_util_guess_format_ttl_default(): +def test_rdflib_util_guess_format_ttl_default() -> None: assert rdflib.util.guess_format(PATH_TO_TTL) == "turtle", "Failed to recognize .ttl RDF file extension" @pytest.mark.xfail(reason="rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) -def test_rdflib_util_guess_format_ttl_fmap(): +def test_rdflib_util_guess_format_ttl_fmap() -> None: assert rdflib.util.guess_format(PATH_TO_TTL, FMAP_XHTML_GRDDL) == "turtle", "Failed to recognize .ttl RDF file extension when using fmap" -def test_rdflib_util_guess_format_json(): +def test_rdflib_util_guess_format_json() -> None: assert rdflib.util.guess_format(PATH_TO_JSON) == "json-ld", "Failed to recognize .json RDF file extension" -def test_rdflib_util_guess_format_jsonld(): +def test_rdflib_util_guess_format_jsonld() -> None: assert rdflib.util.guess_format(PATH_TO_JSONLD) == "json-ld", "Failed to recognize .jsonld RDF file extension" -def test_case_utils_guess_format_ttl_default(): +def test_case_utils_guess_format_ttl_default() -> None: assert case_utils.guess_format(PATH_TO_TTL) == "turtle", "Failed to recognize .ttl RDF file extension" @pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) -def test_case_utils_guess_format_ttl_fmap(): +def test_case_utils_guess_format_ttl_fmap() -> None: assert case_utils.guess_format(PATH_TO_TTL, FMAP_XHTML_GRDDL) == "turtle", "Failed to recognize .ttl RDF file extension when using fmap" -def test_case_utils_guess_format_json_default(): +def test_case_utils_guess_format_json_default() -> None: assert case_utils.guess_format(PATH_TO_JSON) == "json-ld", "Failed to recognize .json RDF file extension" @pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) -def test_case_utils_guess_format_json_fmap(): +def test_case_utils_guess_format_json_fmap() -> None: assert case_utils.guess_format(PATH_TO_JSON, FMAP_XHTML_GRDDL) == "json-ld", "Failed to recognize .json RDF file extension when using fmap" -def test_case_utils_guess_format_jsonld_default(): +def test_case_utils_guess_format_jsonld_default() -> None: assert case_utils.guess_format(PATH_TO_JSONLD) == "json-ld", "Failed to recognize .jsonld RDF file extension" @pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) -def test_case_utils_guess_format_jsonld_fmap(): +def test_case_utils_guess_format_jsonld_fmap() -> None: assert case_utils.guess_format(PATH_TO_JSONLD, FMAP_XHTML_GRDDL) == "json-ld", "Failed to recognize .jsonld RDF file extension when using fmap" diff --git a/tests/hexbinary/test_hexbinary.py b/tests/hexbinary/test_hexbinary.py index ad7d307..d2b96d5 100644 --- a/tests/hexbinary/test_hexbinary.py +++ b/tests/hexbinary/test_hexbinary.py @@ -48,6 +48,7 @@ import logging import os +import typing import pytest import rdflib.plugins.sparql # type: ignore @@ -64,7 +65,7 @@ n_uppercase1 = rdflib.URIRef("urn:example:uppercase1") p_predicate = rdflib.URIRef("urn:example:predicate1") -def test_sparql_syntax_bind_boolean(): +def test_sparql_syntax_bind_boolean() -> None: """ This test serves as a syntax reminder for binding boolean values. """ @@ -81,7 +82,7 @@ def test_sparql_syntax_bind_boolean(): assert confirmed @pytest.mark.xfail(reason="hard-coded failure") -def test_pytest_syntax_xfail(): +def test_pytest_syntax_xfail() -> None: """ This test serves as a syntax reminder for the XFail decorator. """ @@ -97,7 +98,7 @@ def test_pytest_syntax_xfail(): confirmed = l_value.toPython() assert confirmed -def test_sparql_syntax_integer_coercion(): +def test_sparql_syntax_integer_coercion() -> None: """ This test serves as a syntax reminder for type coercions. """ @@ -113,7 +114,7 @@ def test_sparql_syntax_integer_coercion(): confirmed = l_value.toPython() assert confirmed -def test_sparql_syntax_integer_cast(): +def test_sparql_syntax_integer_cast() -> None: """ This test serves as a syntax reminder for the casting form of type coercions. """ @@ -130,7 +131,7 @@ def test_sparql_syntax_integer_cast(): assert confirmed @pytest.mark.xfail -def test_sparql_cast_custom_type(): +def test_sparql_cast_custom_type() -> None: """ This test checks for nonexistent literal-datatype assignments. """ @@ -146,7 +147,7 @@ def test_sparql_cast_custom_type(): confirmed = l_value.toPython() assert confirmed -def test_sparql_compare_hexbinary_mixcase(): +def test_sparql_compare_hexbinary_mixcase() -> None: confirmed = None graph = rdflib.Graph() for result in graph.query("""\ @@ -159,7 +160,7 @@ def test_sparql_compare_hexbinary_mixcase(): confirmed = l_value.toPython() assert confirmed -def test_sparql_compare_hexbinary_matchcase(): +def test_sparql_compare_hexbinary_matchcase() -> None: confirmed = None graph = rdflib.Graph() for result in graph.query("""\ @@ -172,7 +173,7 @@ def test_sparql_compare_hexbinary_matchcase(): confirmed = l_value.toPython() assert confirmed -def test_sparql_compare_hexbinarycanonical_matchcase(): +def test_sparql_compare_hexbinarycanonical_matchcase() -> None: confirmed = None graph = rdflib.Graph() for result in graph.query("""\ @@ -186,7 +187,7 @@ def test_sparql_compare_hexbinarycanonical_matchcase(): assert confirmed @pytest.mark.xfail -def test_sparql_compare_hexbinarycanonical_mixcase(): +def test_sparql_compare_hexbinarycanonical_mixcase() -> None: """ This test shows hexBinaryCanonical does not induce a casing-insensitive comparison. """ @@ -203,7 +204,7 @@ def test_sparql_compare_hexbinarycanonical_mixcase(): assert confirmed @pytest.mark.xfail -def test_sparql_compare_hb_hbc_mixcase(): +def test_sparql_compare_hb_hbc_mixcase() -> None: """ This test confirms that literal-comparison takes into account datatype when one type is unknown. """ @@ -220,7 +221,7 @@ def test_sparql_compare_hb_hbc_mixcase(): assert confirmed @pytest.mark.xfail -def test_sparql_compare_hb_hbc_mixcase_cast(): +def test_sparql_compare_hb_hbc_mixcase_cast() -> None: """ This test is a bit redundant with test_sparql_cast_custom_type, but is here as an explicit demonstration of failure to cast a hexBinary value. """ @@ -236,7 +237,7 @@ def test_sparql_compare_hb_hbc_mixcase_cast(): confirmed = l_value.toPython() assert confirmed -def test_rdflib_literal_hexbinary(): +def test_rdflib_literal_hexbinary() -> None: _logger.debug("l_hb_lowercase = %r." % l_hb_lowercase) _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) @@ -249,20 +250,20 @@ def test_rdflib_literal_hexbinary(): assert l_hb_lowercase.toPython() == l_hb_uppercase.toPython() @pytest.mark.xfail -def test_rdflib_literal_hexbinarycanonical(): +def test_rdflib_literal_hexbinarycanonical() -> None: _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) _logger.debug("l_hbc_uppercase = %r." % l_hbc_uppercase) assert l_hb_uppercase == l_hbc_uppercase @pytest.mark.xfail -def test_rdflib_literal_topython_hexbinarycanonical(): +def test_rdflib_literal_topython_hexbinarycanonical() -> None: _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) _logger.debug("l_hb_uppercase.toPython() = %r." % l_hb_uppercase.toPython()) assert l_hb_uppercase.toPython() == l_hbc_uppercase.toPython() -def _query_all_value_matches(graph): +def _query_all_value_matches(graph) -> typing.Set[str]: """ Return set of all node names (as strings) that have a matching value, where "matching" is determined by the SPARQL engine's type and data coercions. @@ -280,7 +281,7 @@ def _query_all_value_matches(graph): computed.add(n_node2.toPython()) return computed -def test_graph_repeat(): +def test_graph_repeat() -> None: """ Two nodes are given the same literal value, and are found to match on literal values. """ @@ -302,7 +303,7 @@ def test_graph_repeat(): computed = _query_all_value_matches(graph) assert computed == expected -def test_graph_all_hexbinary_literals(): +def test_graph_all_hexbinary_literals() -> None: """ Two nodes with the same literal value, and another node with the uppercase of the literal hexBinary value, are found to match on literal values. """ @@ -333,7 +334,7 @@ def test_graph_all_hexbinary_literals(): assert computed == expected @pytest.mark.xfail -def test_graph_hexbinarycanonical(): +def test_graph_hexbinarycanonical() -> None: graph = rdflib.Graph() graph.add(( n_lowercase1, diff --git a/tests/src/compact.py b/tests/src/compact.py index 82e0f92..6ca055a 100644 --- a/tests/src/compact.py +++ b/tests/src/compact.py @@ -28,7 +28,7 @@ _logger = logging.getLogger(os.path.basename(__file__)) -def main(): +def main() -> None: with open(args.out_json, "w") as out_fh: doc = None with open(args.in_json, "r") as in_fh: diff --git a/tests/src/glom_graph.py b/tests/src/glom_graph.py index 0962a80..0b4e5ad 100644 --- a/tests/src/glom_graph.py +++ b/tests/src/glom_graph.py @@ -21,7 +21,7 @@ import case_utils -def main(): +def main() -> None: g = rdflib.Graph() for in_graph in args.in_graph: g.parse(in_graph) diff --git a/tests/src/isomorphic_diff.py b/tests/src/isomorphic_diff.py index e770a10..08c3b3e 100644 --- a/tests/src/isomorphic_diff.py +++ b/tests/src/isomorphic_diff.py @@ -40,7 +40,7 @@ _logger = logging.getLogger(os.path.basename(__file__)) -def main(): +def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--debug", action="store_true") parser.add_argument("in_graph_1") From 97616b4d2c2e2a9788f9e2de49e0c12ff95a4b2c Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 12:03:22 -0400 Subject: [PATCH 4/8] Add minimal type review to case_utils An observed behavior is that mypy will not type signature analysis until one is added in the call path, e.g. designating `def main() -> None`. This patch is the minimal set of effects of adding a None return type to unit test functions. No further changes needed. One test not committed is that, before this patch, this line could be put into a function (I chose the SPARQL selector's main()) without mypy complaining: x : str = 1 After requiring that function return a type, mypy appropriately raised an error. References: * [AC-211] Add static type checking to CASE-Utilities-Python Signed-off-by: Alex Nelson --- case_utils/case_file/__init__.py | 2 +- case_utils/case_sparql_construct/__init__.py | 2 +- case_utils/case_sparql_select/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 369595d..de6d905 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -205,7 +205,7 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, return n_file -def main(): +def main() -> None: import argparse parser = argparse.ArgumentParser() parser.add_argument("--base-prefix", default=DEFAULT_PREFIX) diff --git a/case_utils/case_sparql_construct/__init__.py b/case_utils/case_sparql_construct/__init__.py index dcd110b..797d625 100644 --- a/case_utils/case_sparql_construct/__init__.py +++ b/case_utils/case_sparql_construct/__init__.py @@ -27,7 +27,7 @@ _logger = logging.getLogger(os.path.basename(__file__)) -def main(): +def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("-d", "--debug", action="store_true") parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.") diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index e99d79b..2580b89 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -42,7 +42,7 @@ _logger = logging.getLogger(os.path.basename(__file__)) -def main(): +def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("-d", "--debug", action="store_true") parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.") From 43ff13072ea030e73d7f17e1ecebeefa8459f976 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 12:23:47 -0400 Subject: [PATCH 5/8] Add type signatures to most of case_utils Some further signature work will come to case_file. References: * [AC-211] Add static type checking to CASE-Utilities-Python Signed-off-by: Alex Nelson --- case_utils/__init__.py | 6 +++++- case_utils/case_file/__init__.py | 3 ++- case_utils/case_sparql_construct/__init__.py | 3 ++- case_utils/local_uuid.py | 15 ++++++++------- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/case_utils/__init__.py b/case_utils/__init__.py index 275e665..e6bbc09 100644 --- a/case_utils/__init__.py +++ b/case_utils/__init__.py @@ -13,13 +13,17 @@ __version__ = "0.2.1" +import typing import warnings import rdflib.util # type: ignore from . import local_uuid -def guess_format(fpath, fmap=None): +def guess_format( + fpath : str, + fmap : typing.Optional[typing.Dict[str, str]] = None +) -> typing.Optional[str]: warnings.warn("The functionality in case_utils.guess_format is now upstream. Please revise your code to use rdflib.util.guess_format. The function arguments remain the same. case_utils.guess_format will be removed in case_utils 0.4.0.", DeprecationWarning) return rdflib.util.guess_format(fpath, fmap) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index de6d905..395fa77 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -20,6 +20,7 @@ import datetime import hashlib import os +import typing import rdflib # type: ignore @@ -234,7 +235,7 @@ def main() -> None: else: output_format = args.output_format - serialize_kwargs = { + serialize_kwargs : typing.Dict[str, typing.Any] = { "format": output_format } if output_format == "json-ld": diff --git a/case_utils/case_sparql_construct/__init__.py b/case_utils/case_sparql_construct/__init__.py index 797d625..f53df06 100644 --- a/case_utils/case_sparql_construct/__init__.py +++ b/case_utils/case_sparql_construct/__init__.py @@ -20,6 +20,7 @@ import argparse import os import logging +import typing import rdflib.plugins.sparql # type: ignore @@ -74,7 +75,7 @@ def main() -> None: else: output_format = args.output_format - serialize_kwargs = { + serialize_kwargs : typing.Dict[str, typing.Any] = { "format": output_format } if output_format == "json-ld": diff --git a/case_utils/local_uuid.py b/case_utils/local_uuid.py index fea615b..c4cb5a1 100644 --- a/case_utils/local_uuid.py +++ b/case_utils/local_uuid.py @@ -21,17 +21,17 @@ import sys import uuid -USE_DEMO_UUID = False +USE_DEMO_UUID : bool = False -DEMO_UUID_COUNTER = 0 +DEMO_UUID_COUNTER : int = 0 -def configure(): +def configure() -> None: global USE_DEMO_UUID if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") == "NONRANDOM_REQUESTED": USE_DEMO_UUID = True -def demo_uuid(): +def demo_uuid() -> str: """ This function generates a repeatable UUID, drawing on non-varying elements of the environment and process call for entropy. @@ -52,16 +52,17 @@ def demo_uuid(): parts.append(str(DEMO_UUID_COUNTER)) # Component: Present working directory, replacing $HOME with '~'. - parts.append(os.getcwd().replace(os.getenv("HOME"), "~")) + env_HOME : str = os.getenv("HOME", "/nonexistent") + parts.append(os.getcwd().replace(env_HOME, "~")) # Component: Argument vector. parts.extend(sys.argv) return str(uuid.uuid5(uuid.NAMESPACE_URL, "/".join(parts))) -def local_uuid(): +def local_uuid() -> str: """ - Generate either a UUID4, or if requested via environment configuration, a non-random demo UUID. Returns a string. + Generate either a UUID4, or if requested via environment configuration, a non-random demo UUID. """ global USE_DEMO_UUID if USE_DEMO_UUID: From 8854561e57233dc6c32ce85968fc60dfa5091f3e Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 12:46:22 -0400 Subject: [PATCH 6/8] Use warnings.warn instead of unused variable _logger After adding a type signature to create_file_node(), mypy complained that _logger was undefined. That was correct, as I'd copied and pasted that section from DFXML's walk_to_dfxml.py. Another issue around a multi-value-types dictionary will require a bigger patch before the type signature gets committed. References: * [AC-211] Add static type checking to CASE-Utilities-Python Signed-off-by: Alex Nelson --- case_utils/case_file/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 395fa77..d373858 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -21,6 +21,7 @@ import hashlib import os import typing +import warnings import rdflib # type: ignore @@ -166,7 +167,7 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, raise ValueError("Failed to confirm hashes of file %r." % filepath) if successful_hashdict["filesize"] != file_stat.st_size: # TODO - Discuss with AC whether this should be something stronger, like an assertion error. - _logger.warning( + warnings.warn( "Inode file size and hashed file sizes disagree: %d vs. %d.", file_stat.st_size, successful_hashdict["filesize"] From 7fb54ef3d535bd3f709b9e2d284b7257e838990a Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 12:50:41 -0400 Subject: [PATCH 7/8] Add type signatures to case_utils.case_file.create_file_node This uses an alternative light-class definition style more focused on type signatures. The example origin is cited inline. References: * [AC-211] Add static type checking to CASE-Utilities-Python Signed-off-by: Alex Nelson --- case_utils/case_file/__init__.py | 55 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index d373858..a25481d 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -36,7 +36,24 @@ NS_UCO_VOCABULARY = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/vocabulary#") NS_XSD = rdflib.XSD -def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, disable_hashes=False, disable_mtime=False): +# Shortcut syntax for defining an immutable named tuple is noted here: +# https://docs.python.org/3/library/typing.html#typing.NamedTuple +# via the "See also" box here: https://docs.python.org/3/library/collections.html#collections.namedtuple +class HashDict(typing.NamedTuple): + filesize : int + md5 : str + sha1 : str + sha256 : str + sha512 : str + +def create_file_node( + graph : rdflib.Graph, + filepath : str, + node_iri : typing.Optional[str] = None, + node_prefix : str = DEFAULT_PREFIX, + disable_hashes : bool = False, + disable_mtime : bool = False +) -> rdflib.URIRef: r""" This function characterizes the file at filepath. @@ -121,10 +138,10 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, )) # Compute hashes until they are re-computed and match once. (This is a lesson learned from working with a NAS that had a subtly faulty network cable.) - successful_hashdict = None - last_hashdict = dict() + + successful_hashdict : typing.Optional[HashDict] = None + last_hashdict : typing.Optional[HashDict] = None for attempt_no in [0, 1, 2, 3]: - current_hashdict = dict() # Hash file's contents. # This hashing logic was partially copied from DFXML's walk_to_dfxml.py. md5obj = hashlib.md5() @@ -132,9 +149,9 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, sha256obj = hashlib.sha256() sha512obj = hashlib.sha512() stashed_error = None + byte_tally = 0 with open(filepath, "rb") as in_fh: chunk_size = 2**22 - byte_tally = 0 while True: buf = b"" try: @@ -149,13 +166,15 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, sha1obj.update(buf) sha256obj.update(buf) sha512obj.update(buf) - current_hashdict["filesize"] = byte_tally if not stashed_error is None: raise stashed_error - current_hashdict["md5"] = md5obj.hexdigest() - current_hashdict["sha1"] = sha1obj.hexdigest() - current_hashdict["sha256"] = sha256obj.hexdigest() - current_hashdict["sha512"] = sha512obj.hexdigest() + current_hashdict = HashDict( + byte_tally, + md5obj.hexdigest(), + sha1obj.hexdigest(), + sha256obj.hexdigest(), + sha512obj.hexdigest() + ) if last_hashdict == current_hashdict: successful_hashdict = current_hashdict break @@ -165,22 +184,23 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, del current_hashdict if successful_hashdict is None: raise ValueError("Failed to confirm hashes of file %r." % filepath) - if successful_hashdict["filesize"] != file_stat.st_size: + if successful_hashdict.filesize != file_stat.st_size: # TODO - Discuss with AC whether this should be something stronger, like an assertion error. warnings.warn( - "Inode file size and hashed file sizes disagree: %d vs. %d.", - file_stat.st_size, - successful_hashdict["filesize"] + "Inode file size and hashed file sizes disagree: %d vs. %d." % ( + file_stat.st_size, + successful_hashdict.filesize + ) ) # TODO - Discuss whether this property should be recorded even if hashes are not attempted. graph.add(( n_contentdata_facet, NS_UCO_OBSERVABLE.sizeInBytes, - rdflib.Literal(successful_hashdict["filesize"]) + rdflib.Literal(successful_hashdict.filesize) )) # Add confirmed hashes into graph. - for key in successful_hashdict: + for key in successful_hashdict._fields: if not key in ("md5", "sha1", "sha256", "sha512"): continue n_hash = rdflib.BNode() @@ -199,10 +219,11 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, NS_UCO_TYPES.hashMethod, rdflib.Literal(key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab) )) + hash_value = getattr(successful_hashdict, key) graph.add(( n_hash, NS_UCO_TYPES.hashValue, - rdflib.Literal(successful_hashdict[key].upper(), datatype=NS_XSD.hexBinary) + rdflib.Literal(hash_value.upper(), datatype=NS_XSD.hexBinary) )) return n_file From 42000c982e300cad0daf58b8234b8a9b0a968adf Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 7 Oct 2021 13:00:26 -0400 Subject: [PATCH 8/8] Add type signatures required by mypy --strict One more `# type: ignore` was added while awaiting rdflib PR #1407. These were all identified by adding the `--strict` flag to mypy for a run. I will leave it up for future discussion whether to use that flag, especially to wait for PR 1407 and to see if too much work would be induced versus runtime safety improvements. References: * [AC-211] Add static type checking to CASE-Utilities-Python * https://github.com/RDFLib/rdflib/pull/1407 Signed-off-by: Alex Nelson --- case_utils/__init__.py | 2 +- tests/case_file/test_case_file.py | 11 ++++++++--- .../test_case_sparql_construct.py | 4 +++- tests/hexbinary/test_hexbinary.py | 4 +++- tests/src/compact.py | 5 ++++- tests/src/isomorphic_diff.py | 5 ++++- 6 files changed, 23 insertions(+), 8 deletions(-) diff --git a/case_utils/__init__.py b/case_utils/__init__.py index e6bbc09..678e6e7 100644 --- a/case_utils/__init__.py +++ b/case_utils/__init__.py @@ -26,4 +26,4 @@ def guess_format( ) -> typing.Optional[str]: warnings.warn("The functionality in case_utils.guess_format is now upstream. Please revise your code to use rdflib.util.guess_format. The function arguments remain the same. case_utils.guess_format will be removed in case_utils 0.4.0.", DeprecationWarning) - return rdflib.util.guess_format(fpath, fmap) + return rdflib.util.guess_format(fpath, fmap) # type: ignore diff --git a/tests/case_file/test_case_file.py b/tests/case_file/test_case_file.py index 29cd749..acbad79 100644 --- a/tests/case_file/test_case_file.py +++ b/tests/case_file/test_case_file.py @@ -38,7 +38,7 @@ SRCDIR = os.path.dirname(__file__) def load_graph( - filename + filename : str ) -> rdflib.Graph: in_graph = rdflib.Graph() in_graph.parse(filename) @@ -52,7 +52,9 @@ def graph_case_file() -> rdflib.Graph: def graph_case_file_disable_hashes() -> rdflib.Graph: return load_graph(os.path.join(SRCDIR, "sample.txt-disable_hashes.ttl")) -def test_confirm_hashes(graph_case_file) -> None: +def test_confirm_hashes( + graph_case_file : rdflib.Graph +) -> None: expected = { "MD5": "098F6BCD4621D373CADE4E832627B4F6", "SHA1": "A94A8FE5CCB19BA61C4C0873D391E987982FBBD3", @@ -93,7 +95,10 @@ def test_confirm_hashes(graph_case_file) -> None: assert expected == computed -def test_confirm_mtime(graph_case_file, graph_case_file_disable_hashes) -> None: +def test_confirm_mtime( + graph_case_file : rdflib.Graph, + graph_case_file_disable_hashes : rdflib.Graph +) -> None: query_confirm_mtime = """ SELECT ?nFile WHERE { diff --git a/tests/case_sparql_construct/test_case_sparql_construct.py b/tests/case_sparql_construct/test_case_sparql_construct.py index eec5216..1d919c1 100644 --- a/tests/case_sparql_construct/test_case_sparql_construct.py +++ b/tests/case_sparql_construct/test_case_sparql_construct.py @@ -17,7 +17,9 @@ import case_utils -def _test_templates_with_blank_nodes_result(filename) -> None: +def _test_templates_with_blank_nodes_result( + filename : str +) -> None: ground_truth_positive = { ("Alice", "Hacker"), ("Bob", "Hacker") diff --git a/tests/hexbinary/test_hexbinary.py b/tests/hexbinary/test_hexbinary.py index d2b96d5..31188de 100644 --- a/tests/hexbinary/test_hexbinary.py +++ b/tests/hexbinary/test_hexbinary.py @@ -263,7 +263,9 @@ def test_rdflib_literal_topython_hexbinarycanonical() -> None: assert l_hb_uppercase.toPython() == l_hbc_uppercase.toPython() -def _query_all_value_matches(graph) -> typing.Set[str]: +def _query_all_value_matches( + graph : rdflib.Graph +) -> typing.Set[str]: """ Return set of all node names (as strings) that have a matching value, where "matching" is determined by the SPARQL engine's type and data coercions. diff --git a/tests/src/compact.py b/tests/src/compact.py index 6ca055a..f314004 100644 --- a/tests/src/compact.py +++ b/tests/src/compact.py @@ -23,6 +23,7 @@ import logging import os import json +import typing import pyld # type: ignore @@ -38,7 +39,9 @@ def main() -> None: # Grab the first occurrence of every key. total_context = dict() - def _accrue_local_context(doc_object): + def _accrue_local_context( + doc_object : typing.Dict[str, typing.Any] + ) -> None: local_context = doc_object.get("@context", dict()) for key in local_context.keys(): if not key in total_context: diff --git a/tests/src/isomorphic_diff.py b/tests/src/isomorphic_diff.py index 08c3b3e..7d0c7c7 100644 --- a/tests/src/isomorphic_diff.py +++ b/tests/src/isomorphic_diff.py @@ -70,7 +70,10 @@ def main() -> None: if i1 == i2: sys.exit(0) - def _report(diff_symbol, graph): + def _report( + diff_symbol : str, + graph : rdflib.Graph + ) -> None: """ This function copied in spirit from: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#module-rdflib.compare