From ddf7a3201406b68d25d2dc46d4d64bdbeb955801 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Mon, 21 Mar 2022 09:04:03 -0400 Subject: [PATCH 1/2] Add Python code formatting with Black as Make steps This patch adds two Make targets to the test environment so a manual format-review can be done: * Testing - `cd tests ; make check-black` * Formatting - `cd tests ; make format` Further discussion needs to be had before these are added to CI steps. A follow-on patch will apply Black, so mechanical effects can be separated from manual changes. References: * [AC-215] Evaluate pre-commit usage on casework repositories * [AC-216] Apply Black to all casework Python code bases Acked-by: kchason Signed-off-by: Alex Nelson --- tests/Makefile | 21 +++++++++++++++++++++ tests/requirements.txt | 1 + 2 files changed, 22 insertions(+) diff --git a/tests/Makefile b/tests/Makefile index 25fa731..3b0fe93 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -67,6 +67,17 @@ check: \ --ignore case_utils \ --log-level=DEBUG +# TODO - Need to settle on policy for incorporating this and 'format' recipe into CI. +# https://case.atlassian.net/browse/AC-215 +# https://case.atlassian.net/browse/AC-216 +check-black: \ + .venv.done.log + source venv/bin/activate \ + && black \ + --check \ + $(top_srcdir)/case_utils \ + $$PWD + check-case_utils: \ .venv.done.log $(MAKE) \ @@ -101,3 +112,13 @@ clean: download: \ .venv.done.log + +# TODO - Need to settle on policy for incorporating this and 'check-black' recipe into CI. +# https://case.atlassian.net/browse/AC-215 +# https://case.atlassian.net/browse/AC-216 +format: \ + .venv.done.log + source venv/bin/activate \ + && black \ + $(top_srcdir)/case_utils \ + $$PWD diff --git a/tests/requirements.txt b/tests/requirements.txt index f913d96..acd3ddc 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,5 @@ PyLD +black mypy pytest python-dateutil From 6b9093dd5976646abf7113bb64e5bd0e55d0744d Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Mon, 21 Mar 2022 09:07:16 -0400 Subject: [PATCH 2/2] Format Python code with Black References: * [AC-216] Apply Black to all casework Python code bases Acked-by: kchason Signed-off-by: Alex Nelson --- case_utils/__init__.py | 9 +- case_utils/case_file/__init__.py | 192 +++++++++--------- case_utils/case_sparql_construct/__init__.py | 52 +++-- case_utils/case_sparql_select/__init__.py | 56 +++-- case_utils/case_validate/__init__.py | 127 ++++++------ case_utils/local_uuid.py | 13 +- case_utils/ontology/__init__.py | 5 +- case_utils/ontology/src/subclasses_ttl.py | 10 +- case_utils/ontology/version_info.py | 8 +- tests/case_utils/case_file/test_case_file.py | 42 ++-- .../test_case_sparql_construct.py | 46 ++--- .../cli/test_format_output_flags.py | 62 ++++-- .../case_utils/ontology/test_version_info.py | 46 +++-- tests/case_utils/test_guess_format.py | 80 ++++++-- tests/hexbinary/test_hexbinary.py | 171 ++++++++-------- tests/src/compact.py | 13 +- tests/src/glom_graph.py | 3 + tests/src/isomorphic_diff.py | 29 ++- 18 files changed, 540 insertions(+), 424 deletions(-) diff --git a/case_utils/__init__.py b/case_utils/__init__.py index 8e770ec..1b2fc69 100644 --- a/case_utils/__init__.py +++ b/case_utils/__init__.py @@ -20,10 +20,13 @@ from . import local_uuid + def guess_format( - fpath : str, - fmap : typing.Optional[typing.Dict[str, str]] = None + fpath: str, fmap: typing.Optional[typing.Dict[str, str]] = None ) -> typing.Optional[str]: - warnings.warn("The functionality in case_utils.guess_format is now upstream. Please revise your code to use rdflib.util.guess_format. The function arguments remain the same. case_utils.guess_format will be removed in case_utils 0.4.0.", DeprecationWarning) + warnings.warn( + "The functionality in case_utils.guess_format is now upstream. Please revise your code to use rdflib.util.guess_format. The function arguments remain the same. case_utils.guess_format will be removed in case_utils 0.4.0.", + DeprecationWarning, + ) return rdflib.util.guess_format(fpath, fmap) # type: ignore diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 1ad3ba3..c776c22 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -31,28 +31,33 @@ NS_RDF = rdflib.RDF NS_UCO_CORE = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/core#") -NS_UCO_OBSERVABLE = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/observable#") +NS_UCO_OBSERVABLE = rdflib.Namespace( + "https://unifiedcyberontology.org/ontology/uco/observable#" +) NS_UCO_TYPES = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/types#") -NS_UCO_VOCABULARY = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/vocabulary#") +NS_UCO_VOCABULARY = rdflib.Namespace( + "https://unifiedcyberontology.org/ontology/uco/vocabulary#" +) NS_XSD = rdflib.XSD # Shortcut syntax for defining an immutable named tuple is noted here: # https://docs.python.org/3/library/typing.html#typing.NamedTuple # via the "See also" box here: https://docs.python.org/3/library/collections.html#collections.namedtuple class HashDict(typing.NamedTuple): - filesize : int - md5 : str - sha1 : str - sha256 : str - sha512 : str + filesize: int + md5: str + sha1: str + sha256: str + sha512: str + def create_file_node( - graph : rdflib.Graph, - filepath : str, - node_iri : typing.Optional[str] = None, - node_prefix : str = DEFAULT_PREFIX, - disable_hashes : bool = False, - disable_mtime : bool = False + graph: rdflib.Graph, + filepath: str, + node_iri: typing.Optional[str] = None, + node_prefix: str = DEFAULT_PREFIX, + disable_hashes: bool = False, + disable_mtime: bool = False, ) -> rdflib.URIRef: r""" This function characterizes the file at filepath. @@ -82,65 +87,49 @@ def create_file_node( node_slug = "file-" + case_utils.local_uuid.local_uuid() node_iri = rdflib.Namespace(node_prefix)[node_slug] n_file = rdflib.URIRef(node_iri) - graph.add(( - n_file, - NS_RDF.type, - NS_UCO_OBSERVABLE.File - )) + graph.add((n_file, NS_RDF.type, NS_UCO_OBSERVABLE.File)) basename = os.path.basename(filepath) literal_basename = rdflib.Literal(basename) file_stat = os.stat(filepath) n_file_facet = rdflib.BNode() - graph.add(( - n_file_facet, - NS_RDF.type, - NS_UCO_OBSERVABLE.FileFacet, - )) - graph.add(( - n_file_facet, - NS_UCO_OBSERVABLE.fileName, - literal_basename - )) - graph.add(( - n_file_facet, - NS_UCO_OBSERVABLE.sizeInBytes, - rdflib.Literal(int(file_stat.st_size)) - )) - graph.add(( - n_file, - NS_UCO_CORE.hasFacet, - n_file_facet - )) + graph.add( + ( + n_file_facet, + NS_RDF.type, + NS_UCO_OBSERVABLE.FileFacet, + ) + ) + graph.add((n_file_facet, NS_UCO_OBSERVABLE.fileName, literal_basename)) + graph.add( + ( + n_file_facet, + NS_UCO_OBSERVABLE.sizeInBytes, + rdflib.Literal(int(file_stat.st_size)), + ) + ) + graph.add((n_file, NS_UCO_CORE.hasFacet, n_file_facet)) if not disable_mtime: - mtime_datetime = datetime.datetime.fromtimestamp(file_stat.st_mtime, tz=datetime.timezone.utc) + mtime_datetime = datetime.datetime.fromtimestamp( + file_stat.st_mtime, tz=datetime.timezone.utc + ) str_mtime = mtime_datetime.isoformat() literal_mtime = rdflib.Literal(str_mtime, datatype=NS_XSD.dateTime) - graph.add(( - n_file_facet, - NS_UCO_OBSERVABLE.modifiedTime, - literal_mtime - )) + graph.add((n_file_facet, NS_UCO_OBSERVABLE.modifiedTime, literal_mtime)) if not disable_hashes: n_contentdata_facet = rdflib.BNode() - graph.add(( - n_file, - NS_UCO_CORE.hasFacet, - n_contentdata_facet - )) - graph.add(( - n_contentdata_facet, - NS_RDF.type, - NS_UCO_OBSERVABLE.ContentDataFacet - )) + graph.add((n_file, NS_UCO_CORE.hasFacet, n_contentdata_facet)) + graph.add( + (n_contentdata_facet, NS_RDF.type, NS_UCO_OBSERVABLE.ContentDataFacet) + ) # Compute hashes until they are re-computed and match once. (This is a lesson learned from working with a NAS that had a subtly faulty network cable.) - successful_hashdict : typing.Optional[HashDict] = None - last_hashdict : typing.Optional[HashDict] = None + successful_hashdict: typing.Optional[HashDict] = None + last_hashdict: typing.Optional[HashDict] = None for attempt_no in [0, 1, 2, 3]: # Hash file's contents. # This hashing logic was partially copied from DFXML's walk_to_dfxml.py. @@ -169,11 +158,11 @@ def create_file_node( if not stashed_error is None: raise stashed_error current_hashdict = HashDict( - byte_tally, - md5obj.hexdigest(), - sha1obj.hexdigest(), - sha256obj.hexdigest(), - sha512obj.hexdigest() + byte_tally, + md5obj.hexdigest(), + sha1obj.hexdigest(), + sha256obj.hexdigest(), + sha512obj.hexdigest(), ) if last_hashdict == current_hashdict: successful_hashdict = current_hashdict @@ -187,54 +176,56 @@ def create_file_node( if successful_hashdict.filesize != file_stat.st_size: # TODO - Discuss with AC whether this should be something stronger, like an assertion error. warnings.warn( - "Inode file size and hashed file sizes disagree: %d vs. %d." % ( - file_stat.st_size, - successful_hashdict.filesize - ) + "Inode file size and hashed file sizes disagree: %d vs. %d." + % (file_stat.st_size, successful_hashdict.filesize) ) # TODO - Discuss whether this property should be recorded even if hashes are not attempted. - graph.add(( - n_contentdata_facet, - NS_UCO_OBSERVABLE.sizeInBytes, - rdflib.Literal(successful_hashdict.filesize) - )) + graph.add( + ( + n_contentdata_facet, + NS_UCO_OBSERVABLE.sizeInBytes, + rdflib.Literal(successful_hashdict.filesize), + ) + ) # Add confirmed hashes into graph. for key in successful_hashdict._fields: if not key in ("md5", "sha1", "sha256", "sha512"): continue n_hash = rdflib.BNode() - graph.add(( - n_contentdata_facet, - NS_UCO_OBSERVABLE.hash, - n_hash - )) - graph.add(( - n_hash, - NS_RDF.type, - NS_UCO_TYPES.Hash - )) - graph.add(( - n_hash, - NS_UCO_TYPES.hashMethod, - rdflib.Literal(key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab) - )) + graph.add((n_contentdata_facet, NS_UCO_OBSERVABLE.hash, n_hash)) + graph.add((n_hash, NS_RDF.type, NS_UCO_TYPES.Hash)) + graph.add( + ( + n_hash, + NS_UCO_TYPES.hashMethod, + rdflib.Literal( + key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab + ), + ) + ) hash_value = getattr(successful_hashdict, key) - graph.add(( - n_hash, - NS_UCO_TYPES.hashValue, - rdflib.Literal(hash_value.upper(), datatype=NS_XSD.hexBinary) - )) + graph.add( + ( + n_hash, + NS_UCO_TYPES.hashValue, + rdflib.Literal(hash_value.upper(), datatype=NS_XSD.hexBinary), + ) + ) return n_file + def main() -> None: import argparse + parser = argparse.ArgumentParser() parser.add_argument("--base-prefix", default=DEFAULT_PREFIX) parser.add_argument("--disable-hashes", action="store_true") parser.add_argument("--disable-mtime", action="store_true") - parser.add_argument("--output-format", help="Override extension-based format guesser.") + parser.add_argument( + "--output-format", help="Override extension-based format guesser." + ) parser.add_argument("out_graph") parser.add_argument("in_file") args = parser.parse_args() @@ -257,24 +248,23 @@ def main() -> None: else: output_format = args.output_format - serialize_kwargs : typing.Dict[str, typing.Any] = { - "format": output_format - } + serialize_kwargs: typing.Dict[str, typing.Any] = {"format": output_format} if output_format == "json-ld": - context_dictionary = {k:v for (k,v) in graph.namespace_manager.namespaces()} + context_dictionary = {k: v for (k, v) in graph.namespace_manager.namespaces()} serialize_kwargs["context"] = context_dictionary node_iri = NS_BASE["file-" + case_utils.local_uuid.local_uuid()] n_file = create_file_node( - graph, - args.in_file, - node_iri=node_iri, - node_prefix=args.base_prefix, - disable_hashes=args.disable_hashes, - disable_mtime=args.disable_mtime + graph, + args.in_file, + node_iri=node_iri, + node_prefix=args.base_prefix, + disable_hashes=args.disable_hashes, + disable_mtime=args.disable_mtime, ) graph.serialize(args.out_graph, **serialize_kwargs) + if __name__ == "__main__": main() diff --git a/case_utils/case_sparql_construct/__init__.py b/case_utils/case_sparql_construct/__init__.py index c35f60e..4b6b3eb 100644 --- a/case_utils/case_sparql_construct/__init__.py +++ b/case_utils/case_sparql_construct/__init__.py @@ -31,36 +31,39 @@ _logger = logging.getLogger(os.path.basename(__file__)) + def main() -> None: parser = argparse.ArgumentParser() # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser. - logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO) + logging.basicConfig( + level=logging.DEBUG + if ("--debug" in sys.argv or "-d" in sys.argv) + else logging.INFO + ) built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION] + parser.add_argument("-d", "--debug", action="store_true") parser.add_argument( - "-d", - "--debug", - action="store_true" + "--built-version", + choices=tuple(built_version_choices_list), + default="case-" + CURRENT_CASE_VERSION, + help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release.", ) parser.add_argument( - "--built-version", - choices=tuple(built_version_choices_list), - default="case-"+CURRENT_CASE_VERSION, - help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release." + "--disallow-empty-results", + action="store_true", + help="Raise error if no results are returned for query.", ) parser.add_argument( - "--disallow-empty-results", - action="store_true", - help="Raise error if no results are returned for query." + "--output-format", help="Override extension-based format guesser." ) + parser.add_argument("out_graph") parser.add_argument( - "--output-format", - help="Override extension-based format guesser." + "in_sparql", + help="File containing a SPARQL CONSTRUCT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", ) - parser.add_argument("out_graph") - parser.add_argument("in_sparql", help="File containing a SPARQL CONSTRUCT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.") parser.add_argument("in_graph", nargs="+") args = parser.parse_args() @@ -72,7 +75,7 @@ def main() -> None: out_graph = rdflib.Graph() # Inherit prefixes defined in input context dictionary. - nsdict = {k:v for (k,v) in in_graph.namespace_manager.namespaces()} + nsdict = {k: v for (k, v) in in_graph.namespace_manager.namespaces()} for prefix in sorted(nsdict.keys()): out_graph.bind(prefix, nsdict[prefix]) @@ -83,9 +86,13 @@ def main() -> None: assert not construct_query_text is None if "subClassOf" in construct_query_text: - case_utils.ontology.load_subclass_hierarchy(in_graph, built_version=args.built_version) + case_utils.ontology.load_subclass_hierarchy( + in_graph, built_version=args.built_version + ) - construct_query_object = rdflib.plugins.sparql.prepareQuery(construct_query_text, initNs=nsdict) + construct_query_object = rdflib.plugins.sparql.prepareQuery( + construct_query_text, initNs=nsdict + ) # https://rdfextras.readthedocs.io/en/latest/working_with.html construct_query_result = in_graph.query(construct_query_object) @@ -102,14 +109,15 @@ def main() -> None: else: output_format = args.output_format - serialize_kwargs : typing.Dict[str, typing.Any] = { - "format": output_format - } + serialize_kwargs: typing.Dict[str, typing.Any] = {"format": output_format} if output_format == "json-ld": - context_dictionary = {k:v for (k,v) in out_graph.namespace_manager.namespaces()} + context_dictionary = { + k: v for (k, v) in out_graph.namespace_manager.namespaces() + } serialize_kwargs["context"] = context_dictionary out_graph.serialize(args.out_graph, **serialize_kwargs) + if __name__ == "__main__": main() diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index 57c7b54..23c4d2f 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -46,35 +46,39 @@ _logger = logging.getLogger(os.path.basename(__file__)) + def main() -> None: parser = argparse.ArgumentParser() # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser. - logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO) + logging.basicConfig( + level=logging.DEBUG + if ("--debug" in sys.argv or "-d" in sys.argv) + else logging.INFO + ) built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION] + parser.add_argument("-d", "--debug", action="store_true") parser.add_argument( - "-d", - "--debug", - action="store_true" + "--built-version", + choices=tuple(built_version_choices_list), + default="case-" + CURRENT_CASE_VERSION, + help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release.", ) parser.add_argument( - "--built-version", - choices=tuple(built_version_choices_list), - default="case-"+CURRENT_CASE_VERSION, - help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release." + "--disallow-empty-results", + action="store_true", + help="Raise error if no results are returned for query.", ) parser.add_argument( - "--disallow-empty-results", - action="store_true", - help="Raise error if no results are returned for query." + "out_table", + help="Expected extensions are .html for HTML tables or .md for Markdown tables.", ) parser.add_argument( - "out_table", - help="Expected extensions are .html for HTML tables or .md for Markdown tables." + "in_sparql", + help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", ) - parser.add_argument("in_sparql", help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.") parser.add_argument("in_graph", nargs="+") args = parser.parse_args() @@ -83,7 +87,7 @@ def main() -> None: graph.parse(in_graph_filename) # Inherit prefixes defined in input context dictionary. - nsdict = {k:v for (k,v) in graph.namespace_manager.namespaces()} + nsdict = {k: v for (k, v) in graph.namespace_manager.namespaces()} select_query_text = None with open(args.in_sparql, "r") as in_fh: @@ -91,23 +95,32 @@ def main() -> None: _logger.debug("select_query_text = %r." % select_query_text) if "subClassOf" in select_query_text: - case_utils.ontology.load_subclass_hierarchy(graph, built_version=args.built_version) + case_utils.ontology.load_subclass_hierarchy( + graph, built_version=args.built_version + ) # Build columns list from SELECT line. select_query_text_lines = select_query_text.split("\n") - select_line = [line for line in select_query_text_lines if line.startswith("SELECT ")][0] + select_line = [ + line for line in select_query_text_lines if line.startswith("SELECT ") + ][0] variables = select_line.replace(" DISTINCT", "").replace("SELECT ", "").split(" ") tally = 0 records = [] - select_query_object = rdflib.plugins.sparql.prepareQuery(select_query_text, initNs=nsdict) + select_query_object = rdflib.plugins.sparql.prepareQuery( + select_query_text, initNs=nsdict + ) for (row_no, row) in enumerate(graph.query(select_query_object)): tally = row_no + 1 record = [] for (column_no, column) in enumerate(row): if column is None: column_value = "" - elif isinstance(column, rdflib.term.Literal) and column.datatype == NS_XSD.hexBinary: + elif ( + isinstance(column, rdflib.term.Literal) + and column.datatype == NS_XSD.hexBinary + ): # Use hexlify to convert xsd:hexBinary to ASCII. # The render to ASCII is in support of this script rendering results for website viewing. # .decode() is because hexlify returns bytes. @@ -135,10 +148,13 @@ def main() -> None: # Assume Github-flavored Markdown. table_text = df.to_markdown(tablefmt="github") if table_text is None: - raise NotImplementedError("Unsupported output extension for output filename %r.", args.out_table) + raise NotImplementedError( + "Unsupported output extension for output filename %r.", args.out_table + ) with open(args.out_table, "w") as out_fh: out_fh.write(table_text) + if __name__ == "__main__": main() diff --git a/case_utils/case_validate/__init__.py b/case_utils/case_validate/__init__.py index 97f7e66..e0a88c2 100644 --- a/case_utils/case_validate/__init__.py +++ b/case_utils/case_validate/__init__.py @@ -48,75 +48,79 @@ _logger = logging.getLogger(os.path.basename(__file__)) + def main() -> None: - parser = argparse.ArgumentParser(description="CASE wrapper to pySHACL command line tool.") + parser = argparse.ArgumentParser( + description="CASE wrapper to pySHACL command line tool." + ) # Configure debug logging before running parse_args, because there # could be an error raised before the construction of the argument # parser. - logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO) + logging.basicConfig( + level=logging.DEBUG + if ("--debug" in sys.argv or "-d" in sys.argv) + else logging.INFO + ) built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION] # Add arguments specific to case_validate. parser.add_argument( - '-d', - '--debug', - action='store_true', - help='Output additional runtime messages.' + "-d", "--debug", action="store_true", help="Output additional runtime messages." ) parser.add_argument( - "--built-version", - choices=tuple(built_version_choices_list), - default="case-"+CURRENT_CASE_VERSION, - help="Monolithic aggregation of CASE ontology files at certain versions. Does not require networking to use. Default is most recent CASE release." + "--built-version", + choices=tuple(built_version_choices_list), + default="case-" + CURRENT_CASE_VERSION, + help="Monolithic aggregation of CASE ontology files at certain versions. Does not require networking to use. Default is most recent CASE release.", ) parser.add_argument( - "--ontology-graph", - action="append", - help="Combined ontology (i.e. subclass hierarchy) and shapes (SHACL) file, in any format accepted by rdflib recognized by file extension (e.g. .ttl). Will supplement ontology selected by --built-version. Can be given multiple times." + "--ontology-graph", + action="append", + help="Combined ontology (i.e. subclass hierarchy) and shapes (SHACL) file, in any format accepted by rdflib recognized by file extension (e.g. .ttl). Will supplement ontology selected by --built-version. Can be given multiple times.", ) # Inherit arguments from pyshacl. parser.add_argument( - '--abort', - action='store_true', - help='(As with pyshacl CLI) Abort on first invalid data.' + "--abort", + action="store_true", + help="(As with pyshacl CLI) Abort on first invalid data.", ) parser.add_argument( - '-w', - '--allow-warnings', - action='store_true', - help='(As with pyshacl CLI) Shapes marked with severity of Warning or Info will not cause result to be invalid.', + "-w", + "--allow-warnings", + action="store_true", + help="(As with pyshacl CLI) Shapes marked with severity of Warning or Info will not cause result to be invalid.", ) parser.add_argument( - "-f", - "--format", - choices=('human', 'turtle', 'xml', 'json-ld', 'nt', 'n3'), - default='human', - help="(ALMOST as with pyshacl CLI) Choose an output format. Default is \"human\". Difference: 'table' not provided." + "-f", + "--format", + choices=("human", "turtle", "xml", "json-ld", "nt", "n3"), + default="human", + help="(ALMOST as with pyshacl CLI) Choose an output format. Default is \"human\". Difference: 'table' not provided.", ) parser.add_argument( - '-im', - '--imports', - action='store_true', - help='(As with pyshacl CLI) Allow import of sub-graphs defined in statements with owl:imports.', + "-im", + "--imports", + action="store_true", + help="(As with pyshacl CLI) Allow import of sub-graphs defined in statements with owl:imports.", ) parser.add_argument( - '-i', - '--inference', - choices=('none', 'rdfs', 'owlrl', 'both'), - default='none', - help="(As with pyshacl CLI) Choose a type of inferencing to run against the Data Graph before validating. Default is \"none\".", + "-i", + "--inference", + choices=("none", "rdfs", "owlrl", "both"), + default="none", + help='(As with pyshacl CLI) Choose a type of inferencing to run against the Data Graph before validating. Default is "none".', ) parser.add_argument( - '-o', - '--output', - dest='output', - nargs='?', - type=argparse.FileType('x'), - help="(ALMOST as with pyshacl CLI) Send output to a file. If absent, output will be written to stdout. Difference: If specified, file is expected not to exist. Clarification: Does NOT influence --format flag's default value of \"human\". (I.e., any machine-readable serialization format must be specified with --format.)", - default=sys.stdout, + "-o", + "--output", + dest="output", + nargs="?", + type=argparse.FileType("x"), + help='(ALMOST as with pyshacl CLI) Send output to a file. If absent, output will be written to stdout. Difference: If specified, file is expected not to exist. Clarification: Does NOT influence --format flag\'s default value of "human". (I.e., any machine-readable serialization format must be specified with --format.)', + default=sys.stdout, ) parser.add_argument("in_graph", nargs="+") @@ -145,25 +149,23 @@ def main() -> None: # determination by output file extension. case_validate will defer # to pySHACL behavior, as other CASE tools don't (at the time of # this writing) have the value "human" as an output format. - validator_kwargs : typing.Dict[str, str] = dict() + validator_kwargs: typing.Dict[str, str] = dict() if args.format != "human": - validator_kwargs['serialize_report_graph'] = args.format + validator_kwargs["serialize_report_graph"] = args.format - validate_result : typing.Tuple[ - bool, - typing.Union[Exception, bytes, str, rdflib.Graph], - str + validate_result: typing.Tuple[ + bool, typing.Union[Exception, bytes, str, rdflib.Graph], str ] validate_result = pyshacl.validate( - data_graph, - shacl_graph=ontology_graph, - ont_graph=ontology_graph, - inference=args.inference, - abort_on_first=args.abort, - allow_warnings=True if args.allow_warnings else False, - debug=True if args.debug else False, - do_owl_imports=True if args.imports else False, - **validator_kwargs + data_graph, + shacl_graph=ontology_graph, + ont_graph=ontology_graph, + inference=args.inference, + abort_on_first=args.abort, + allow_warnings=True if args.allow_warnings else False, + debug=True if args.debug else False, + do_owl_imports=True if args.imports else False, + **validator_kwargs ) # Relieve RAM of the data graph after validation has run. @@ -179,15 +181,22 @@ def main() -> None: args.output.write(validation_text) else: if isinstance(validation_graph, rdflib.Graph): - raise NotImplementedError("rdflib.Graph expected not to be created from --format value %r." % args.format) + raise NotImplementedError( + "rdflib.Graph expected not to be created from --format value %r." + % args.format + ) elif isinstance(validation_graph, bytes): args.output.write(validation_graph.decode("utf-8")) elif isinstance(validation_graph, str): args.output.write(validation_graph) else: - raise NotImplementedError("Unexpected result type returned from validate: %r." % type(validation_graph)) + raise NotImplementedError( + "Unexpected result type returned from validate: %r." + % type(validation_graph) + ) sys.exit(0 if conforms else 1) - + + if __name__ == "__main__": main() diff --git a/case_utils/local_uuid.py b/case_utils/local_uuid.py index f25daf3..7e526c0 100644 --- a/case_utils/local_uuid.py +++ b/case_utils/local_uuid.py @@ -21,9 +21,10 @@ import sys import uuid -USE_DEMO_UUID : bool = False +USE_DEMO_UUID: bool = False + +DEMO_UUID_COUNTER: int = 0 -DEMO_UUID_COUNTER : int = 0 def configure() -> None: global USE_DEMO_UUID @@ -31,6 +32,7 @@ def configure() -> None: if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") == "NONRANDOM_REQUESTED": USE_DEMO_UUID = True + def demo_uuid() -> str: """ This function generates a repeatable UUID, drawing on non-varying elements of the environment and process call for entropy. @@ -42,7 +44,9 @@ def demo_uuid() -> str: global DEMO_UUID_COUNTER if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") != "NONRANDOM_REQUESTED": - raise EnvironmentError("demo_uuid() called without DEMO_UUID_REQUESTING_NONRANDOM in environment.") + raise EnvironmentError( + "demo_uuid() called without DEMO_UUID_REQUESTING_NONRANDOM in environment." + ) # Component: An emphasis this is an example. parts = ["example.org"] @@ -52,7 +56,7 @@ def demo_uuid() -> str: parts.append(str(DEMO_UUID_COUNTER)) # Component: Present working directory, replacing $HOME with '~'. - env_HOME : str = os.getenv("HOME", "/nonexistent") + env_HOME: str = os.getenv("HOME", "/nonexistent") parts.append(os.getcwd().replace(env_HOME, "~")) # Component: Argument vector. @@ -60,6 +64,7 @@ def demo_uuid() -> str: return str(uuid.uuid5(uuid.NAMESPACE_URL, "/".join(parts))) + def local_uuid() -> str: """ Generate either a UUID4, or if requested via environment configuration, a non-random demo UUID. diff --git a/case_utils/ontology/__init__.py b/case_utils/ontology/__init__.py index eb14af5..7773fda 100644 --- a/case_utils/ontology/__init__.py +++ b/case_utils/ontology/__init__.py @@ -26,10 +26,9 @@ _logger = logging.getLogger(os.path.basename(__file__)) + def load_subclass_hierarchy( - graph : rdflib.Graph, - *, - built_version : str = "case-"+CURRENT_CASE_VERSION + graph: rdflib.Graph, *, built_version: str = "case-" + CURRENT_CASE_VERSION ) -> None: """ Adds all ontology rdfs:subClassOf statements from the version referred to by built_version. diff --git a/case_utils/ontology/src/subclasses_ttl.py b/case_utils/ontology/src/subclasses_ttl.py index 893d458..f7a25b5 100644 --- a/case_utils/ontology/src/subclasses_ttl.py +++ b/case_utils/ontology/src/subclasses_ttl.py @@ -21,6 +21,7 @@ import rdflib + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("out_ttl") @@ -30,18 +31,15 @@ def main() -> None: in_graph = rdflib.Graph() out_graph = rdflib.Graph() - in_ttl : str + in_ttl: str for in_ttl in args.in_ttl: in_graph.parse(in_ttl) - for triple in in_graph.triples(( - None, - rdflib.RDFS.subClassOf, - None - )): + for triple in in_graph.triples((None, rdflib.RDFS.subClassOf, None)): out_graph.add(triple) out_graph.serialize(args.out_ttl) + if __name__ == "__main__": main() diff --git a/case_utils/ontology/version_info.py b/case_utils/ontology/version_info.py index 5c8be8e..eba6089 100644 --- a/case_utils/ontology/version_info.py +++ b/case_utils/ontology/version_info.py @@ -21,15 +21,15 @@ __version__ = "0.1.0" -__all__ = [ - "CURRENT_CASE_VERSION" -] +__all__ = ["CURRENT_CASE_VERSION"] # Tested with CI to match versionInfo of . -CURRENT_CASE_VERSION : str = "0.5.0" +CURRENT_CASE_VERSION: str = "0.5.0" + def main() -> None: print(CURRENT_CASE_VERSION) + if __name__ == "__main__": main() diff --git a/tests/case_utils/case_file/test_case_file.py b/tests/case_utils/case_file/test_case_file.py index 19ba199..4eda521 100644 --- a/tests/case_utils/case_file/test_case_file.py +++ b/tests/case_utils/case_file/test_case_file.py @@ -32,38 +32,38 @@ NS_UCO_TYPES = rdflib.Namespace(IRI_UCO_TYPES) NSDICT = { - "uco-core": IRI_UCO_CORE, - "uco-observable": IRI_UCO_OBSERVABLE, - "uco-types": IRI_UCO_TYPES + "uco-core": IRI_UCO_CORE, + "uco-observable": IRI_UCO_OBSERVABLE, + "uco-types": IRI_UCO_TYPES, } SRCDIR = os.path.dirname(__file__) -def load_graph( - filename : str -) -> rdflib.Graph: + +def load_graph(filename: str) -> rdflib.Graph: in_graph = rdflib.Graph() in_graph.parse(filename) # The queries in this test rely on the subclass hierarchy. Load it. case_utils.ontology.load_subclass_hierarchy(in_graph) return in_graph + @pytest.fixture def graph_case_file() -> rdflib.Graph: return load_graph(os.path.join(SRCDIR, "sample.txt.ttl")) + @pytest.fixture def graph_case_file_disable_hashes() -> rdflib.Graph: return load_graph(os.path.join(SRCDIR, "sample.txt-disable_hashes.ttl")) -def test_confirm_hashes( - graph_case_file : rdflib.Graph -) -> None: + +def test_confirm_hashes(graph_case_file: rdflib.Graph) -> None: expected = { - "MD5": "098F6BCD4621D373CADE4E832627B4F6", - "SHA1": "A94A8FE5CCB19BA61C4C0873D391E987982FBBD3", - "SHA256": "9F86D081884C7D659A2FEAA0C55AD015A3BF4F1B2B0B822CD15D6C15B0F00A08", - "SHA512": "EE26B0DD4AF7E749AA1A8EE3C10AE9923F618980772E473F8819A5D4940E0DB27AC185F8A0E1D5F84F88BC887FD67B143732C304CC5FA9AD8E6F57F50028A8FF" + "MD5": "098F6BCD4621D373CADE4E832627B4F6", + "SHA1": "A94A8FE5CCB19BA61C4C0873D391E987982FBBD3", + "SHA256": "9F86D081884C7D659A2FEAA0C55AD015A3BF4F1B2B0B822CD15D6C15B0F00A08", + "SHA512": "EE26B0DD4AF7E749AA1A8EE3C10AE9923F618980772E473F8819A5D4940E0DB27AC185F8A0E1D5F84F88BC887FD67B143732C304CC5FA9AD8E6F57F50028A8FF", } computed = dict() @@ -99,9 +99,9 @@ def test_confirm_hashes( assert expected == computed + def test_confirm_mtime( - graph_case_file : rdflib.Graph, - graph_case_file_disable_hashes : rdflib.Graph + graph_case_file: rdflib.Graph, graph_case_file_disable_hashes: rdflib.Graph ) -> None: query_confirm_mtime = """ SELECT ?nFile @@ -117,14 +117,20 @@ def test_confirm_mtime( . } """ - query_object = rdflib.plugins.sparql.prepareQuery(query_confirm_mtime, initNs=NSDICT) + query_object = rdflib.plugins.sparql.prepareQuery( + query_confirm_mtime, initNs=NSDICT + ) n_observable_object = None for result in graph_case_file_disable_hashes.query(query_confirm_mtime): (n_observable_object,) = result - assert not n_observable_object is None, "File object with expected mtime not found in hashless graph." + assert ( + not n_observable_object is None + ), "File object with expected mtime not found in hashless graph." n_observable_object = None for result in graph_case_file.query(query_confirm_mtime): (n_observable_object,) = result - assert not n_observable_object is None, "File object with expected mtime not found in fuller graph." + assert ( + not n_observable_object is None + ), "File object with expected mtime not found in fuller graph." diff --git a/tests/case_utils/case_sparql_construct/test_case_sparql_construct.py b/tests/case_utils/case_sparql_construct/test_case_sparql_construct.py index b111d9e..f217ff0 100644 --- a/tests/case_utils/case_sparql_construct/test_case_sparql_construct.py +++ b/tests/case_utils/case_sparql_construct/test_case_sparql_construct.py @@ -17,11 +17,9 @@ import case_utils -def _test_subclass_templates_result( - filename : str, - expected : typing.Set[str] -) -> None: - computed : typing.Set[str] = set() + +def _test_subclass_templates_result(filename: str, expected: typing.Set[str]) -> None: + computed: typing.Set[str] = set() graph = rdflib.Graph() graph.parse(filename) @@ -38,14 +36,10 @@ def _test_subclass_templates_result( computed.add(n_entity.toPython()) assert expected == computed -def _test_w3_templates_with_blank_nodes_result( - filename : str -) -> None: - ground_truth_positive = { - ("Alice", "Hacker"), - ("Bob", "Hacker") - } - ground_truth_negative : typing.Set[str] = set() + +def _test_w3_templates_with_blank_nodes_result(filename: str) -> None: + ground_truth_positive = {("Alice", "Hacker"), ("Bob", "Hacker")} + ground_truth_negative: typing.Set[str] = set() graph = rdflib.Graph() graph.parse(filename) @@ -63,35 +57,27 @@ def _test_w3_templates_with_blank_nodes_result( } """ for result in graph.query(query_string): - ( - l_given_name, - l_family_name - ) = result - computed.add(( - l_given_name.toPython(), - l_family_name.toPython() - )) + (l_given_name, l_family_name) = result + computed.add((l_given_name.toPython(), l_family_name.toPython())) assert computed == ground_truth_positive + def test_w3_templates_with_blank_nodes_result_json() -> None: _test_w3_templates_with_blank_nodes_result("w3-output.json") + def test_w3_templates_with_blank_nodes_result_turtle() -> None: _test_w3_templates_with_blank_nodes_result("w3-output.ttl") + def test_subclass_templates_result_default_case() -> None: _test_subclass_templates_result( - "subclass-implicit-any.ttl", - { - "http://example.org/kb/file-1", - "http://example.org/kb/file-2" - } + "subclass-implicit-any.ttl", + {"http://example.org/kb/file-1", "http://example.org/kb/file-2"}, ) + def test_subclass_templates_result_no_case() -> None: _test_subclass_templates_result( - "subclass-explicit-none.ttl", - { - "http://example.org/kb/file-1" - } + "subclass-explicit-none.ttl", {"http://example.org/kb/file-1"} ) diff --git a/tests/case_utils/case_validate/cli/test_format_output_flags.py b/tests/case_utils/case_validate/cli/test_format_output_flags.py index 56e4d1e..f64d300 100644 --- a/tests/case_utils/case_validate/cli/test_format_output_flags.py +++ b/tests/case_utils/case_validate/cli/test_format_output_flags.py @@ -25,117 +25,147 @@ Conforms: True """.strip() -def _guess_format( - basename -) -> typing.Optional[str]: + +def _guess_format(basename) -> typing.Optional[str]: """ Guess format by file extension. """ filepath = srcdir / basename return rdflib.util.guess_format(str(filepath)) -def _parse_graph( - basename : str, - asserted_format : str -) -> rdflib.Graph: + +def _parse_graph(basename: str, asserted_format: str) -> rdflib.Graph: graph = rdflib.Graph() filepath = srcdir / basename graph.parse(str(filepath), format=asserted_format) return graph -def _verify_plaintext_report( - basename: str -) -> None: + +def _verify_plaintext_report(basename: str) -> None: filepath = srcdir / basename with filepath.open("r") as fh: assert PLAINTEXT_VALIDATION_PASS == fh.read(50)[:-1] -@pytest.mark.xfail(reason="Known mismatch", raises=json.decoder.JSONDecodeError, strict=True) + +@pytest.mark.xfail( + reason="Known mismatch", raises=json.decoder.JSONDecodeError, strict=True +) def test_format_human_output_jsonld() -> None: subject_file = "format_human_output_jsonld.jsonld" asserted_format = _guess_format(subject_file) assert asserted_format == "json-ld" _parse_graph(subject_file, asserted_format) -@pytest.mark.xfail(reason="Known mismatch", raises=rdflib.plugins.parsers.notation3.BadSyntax, strict=True) + +@pytest.mark.xfail( + reason="Known mismatch", + raises=rdflib.plugins.parsers.notation3.BadSyntax, + strict=True, +) def test_format_human_output_turtle() -> None: subject_file = "format_human_output_turtle.ttl" asserted_format = _guess_format(subject_file) assert asserted_format == "turtle" _parse_graph(subject_file, asserted_format) + def test_format_human_output_txt() -> None: _verify_plaintext_report("format_human_output_txt.txt") + def test_format_human_output_unspecified() -> None: _verify_plaintext_report("format_human_output_unspecified.txt") + def test_format_jsonld_output_jsonld() -> None: subject_file = "format_jsonld_output_jsonld.jsonld" asserted_format = _guess_format(subject_file) assert asserted_format == "json-ld" graph = _parse_graph(subject_file, asserted_format) -@pytest.mark.xfail(reason="Known mismatch", raises=rdflib.plugins.parsers.notation3.BadSyntax, strict=True) + +@pytest.mark.xfail( + reason="Known mismatch", + raises=rdflib.plugins.parsers.notation3.BadSyntax, + strict=True, +) def test_format_jsonld_output_turtle() -> None: subject_file = "format_jsonld_output_turtle.ttl" asserted_format = _guess_format(subject_file) assert asserted_format == "turtle" _parse_graph(subject_file, asserted_format) + def test_format_jsonld_output_txt() -> None: subject_file = "format_jsonld_output_txt.txt" asserted_format = _guess_format(subject_file) assert asserted_format is None _parse_graph(subject_file, "json-ld") + def test_format_jsonld_output_unspecified() -> None: subject_file = "format_jsonld_output_unspecified.jsonld" asserted_format = _guess_format(subject_file) assert asserted_format == "json-ld" graph = _parse_graph(subject_file, asserted_format) -@pytest.mark.xfail(reason="Known mismatch", raises=json.decoder.JSONDecodeError, strict=True) + +@pytest.mark.xfail( + reason="Known mismatch", raises=json.decoder.JSONDecodeError, strict=True +) def test_format_turtle_output_jsonld() -> None: subject_file = "format_turtle_output_jsonld.jsonld" asserted_format = _guess_format(subject_file) assert asserted_format == "json-ld" _parse_graph(subject_file, asserted_format) + def test_format_turtle_output_turtle() -> None: subject_file = "format_turtle_output_turtle.ttl" asserted_format = _guess_format(subject_file) assert asserted_format == "turtle" graph = _parse_graph(subject_file, asserted_format) + def test_format_turtle_output_txt() -> None: subject_file = "format_turtle_output_txt.txt" asserted_format = _guess_format(subject_file) assert asserted_format is None _parse_graph(subject_file, "turtle") + def test_format_turtle_output_unspecified() -> None: subject_file = "format_turtle_output_unspecified.ttl" asserted_format = _guess_format(subject_file) assert asserted_format == "turtle" graph = _parse_graph(subject_file, asserted_format) -@pytest.mark.xfail(reason="Known mismatch", raises=json.decoder.JSONDecodeError, strict=True) + +@pytest.mark.xfail( + reason="Known mismatch", raises=json.decoder.JSONDecodeError, strict=True +) def test_format_unspecified_output_jsonld() -> None: subject_file = "format_unspecified_output_jsonld.jsonld" asserted_format = _guess_format(subject_file) assert asserted_format == "json-ld" _parse_graph(subject_file, asserted_format) -@pytest.mark.xfail(reason="Known mismatch", raises=rdflib.plugins.parsers.notation3.BadSyntax, strict=True) + +@pytest.mark.xfail( + reason="Known mismatch", + raises=rdflib.plugins.parsers.notation3.BadSyntax, + strict=True, +) def test_format_unspecified_output_turtle() -> None: subject_file = "format_unspecified_output_turtle.ttl" asserted_format = _guess_format(subject_file) assert asserted_format == "turtle" _parse_graph(subject_file, asserted_format) + def test_format_unspecified_output_txt() -> None: _verify_plaintext_report("format_unspecified_output_txt.txt") + def test_format_unspecified_output_unspecified() -> None: _verify_plaintext_report("format_unspecified_output_unspecified.txt") diff --git a/tests/case_utils/ontology/test_version_info.py b/tests/case_utils/ontology/test_version_info.py index ebf58a8..fc53460 100644 --- a/tests/case_utils/ontology/test_version_info.py +++ b/tests/case_utils/ontology/test_version_info.py @@ -23,6 +23,7 @@ NS_OWL = rdflib.OWL + def test_case_ontology_version_info_versus_monolithic() -> None: ontology_graph = rdflib.Graph() @@ -30,34 +31,47 @@ def test_case_ontology_version_info_versus_monolithic() -> None: ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename) ontology_graph.parse(data=ttl_data) - version_info : typing.Optional[str] = None - for triple in ontology_graph.triples(( - rdflib.URIRef("https://ontology.caseontology.org/case/case"), - NS_OWL.versionInfo, - None - )): + version_info: typing.Optional[str] = None + for triple in ontology_graph.triples( + ( + rdflib.URIRef("https://ontology.caseontology.org/case/case"), + NS_OWL.versionInfo, + None, + ) + ): version_info = str(triple[2]) assert not version_info is None, "Failed to retrieve owl:versionInfo" - assert CURRENT_CASE_VERSION == version_info, "Version recorded in case_utils.ontology.version_info does not match built ontology" + assert ( + CURRENT_CASE_VERSION == version_info + ), "Version recorded in case_utils.ontology.version_info does not match built ontology" + def test_case_ontology_version_info_versus_submodule() -> None: ontology_graph = rdflib.Graph() top_srcdir = pathlib.Path(__file__).parent / ".." / ".." / ".." - assert (top_srcdir / ".gitmodules").exists(), "Hard-coded path to top_srcdir no longer correct" + assert ( + top_srcdir / ".gitmodules" + ).exists(), "Hard-coded path to top_srcdir no longer correct" - ttl_filepath = top_srcdir / "dependencies" / "CASE" / "ontology" / "master" / "case.ttl" + ttl_filepath = ( + top_srcdir / "dependencies" / "CASE" / "ontology" / "master" / "case.ttl" + ) ontology_graph.parse(str(ttl_filepath)) - version_info : typing.Optional[str] = None - for triple in ontology_graph.triples(( - rdflib.URIRef("https://ontology.caseontology.org/case/case"), - NS_OWL.versionInfo, - None - )): + version_info: typing.Optional[str] = None + for triple in ontology_graph.triples( + ( + rdflib.URIRef("https://ontology.caseontology.org/case/case"), + NS_OWL.versionInfo, + None, + ) + ): version_info = str(triple[2]) assert not version_info is None, "Failed to retrieve owl:versionInfo" - assert CURRENT_CASE_VERSION == version_info, "Version recorded in case_utils.ontology.version_info does not match tracked ontology" + assert ( + CURRENT_CASE_VERSION == version_info + ), "Version recorded in case_utils.ontology.version_info does not match tracked ontology" diff --git a/tests/case_utils/test_guess_format.py b/tests/case_utils/test_guess_format.py index 3cb4261..35727bd 100644 --- a/tests/case_utils/test_guess_format.py +++ b/tests/case_utils/test_guess_format.py @@ -22,45 +22,93 @@ PATH_TO_XHTML = "/nonexistent/foo.xhtml" FMAP_XHTML_GRDDL = {"xhtml": "grddl"} + def test_rdflib_util_guess_format_xhtml_default() -> None: - assert rdflib.util.guess_format(PATH_TO_XHTML) == "rdfa", "Failed to reproduce rdflib.util.guess_format test" + assert ( + rdflib.util.guess_format(PATH_TO_XHTML) == "rdfa" + ), "Failed to reproduce rdflib.util.guess_format test" + def test_rdflib_util_guess_format_xhtml_fmap() -> None: """ This test implements one of the documented demonstrations in rdflib.util.guess_format. """ - assert rdflib.util.guess_format(PATH_TO_XHTML, FMAP_XHTML_GRDDL) == "grddl", "Failed to reproduce rdflib.util.guess_format test" + assert ( + rdflib.util.guess_format(PATH_TO_XHTML, FMAP_XHTML_GRDDL) == "grddl" + ), "Failed to reproduce rdflib.util.guess_format test" + def test_rdflib_util_guess_format_ttl_default() -> None: - assert rdflib.util.guess_format(PATH_TO_TTL) == "turtle", "Failed to recognize .ttl RDF file extension" + assert ( + rdflib.util.guess_format(PATH_TO_TTL) == "turtle" + ), "Failed to recognize .ttl RDF file extension" -@pytest.mark.xfail(reason="rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) + +@pytest.mark.xfail( + reason="rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", + strict=True, +) def test_rdflib_util_guess_format_ttl_fmap() -> None: - assert rdflib.util.guess_format(PATH_TO_TTL, FMAP_XHTML_GRDDL) == "turtle", "Failed to recognize .ttl RDF file extension when using fmap" + assert ( + rdflib.util.guess_format(PATH_TO_TTL, FMAP_XHTML_GRDDL) == "turtle" + ), "Failed to recognize .ttl RDF file extension when using fmap" + def test_rdflib_util_guess_format_json() -> None: - assert rdflib.util.guess_format(PATH_TO_JSON) == "json-ld", "Failed to recognize .json RDF file extension" + assert ( + rdflib.util.guess_format(PATH_TO_JSON) == "json-ld" + ), "Failed to recognize .json RDF file extension" + def test_rdflib_util_guess_format_jsonld() -> None: - assert rdflib.util.guess_format(PATH_TO_JSONLD) == "json-ld", "Failed to recognize .jsonld RDF file extension" + assert ( + rdflib.util.guess_format(PATH_TO_JSONLD) == "json-ld" + ), "Failed to recognize .jsonld RDF file extension" + def test_case_utils_guess_format_ttl_default() -> None: - assert case_utils.guess_format(PATH_TO_TTL) == "turtle", "Failed to recognize .ttl RDF file extension" + assert ( + case_utils.guess_format(PATH_TO_TTL) == "turtle" + ), "Failed to recognize .ttl RDF file extension" + -@pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) +@pytest.mark.xfail( + reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", + strict=True, +) def test_case_utils_guess_format_ttl_fmap() -> None: - assert case_utils.guess_format(PATH_TO_TTL, FMAP_XHTML_GRDDL) == "turtle", "Failed to recognize .ttl RDF file extension when using fmap" + assert ( + case_utils.guess_format(PATH_TO_TTL, FMAP_XHTML_GRDDL) == "turtle" + ), "Failed to recognize .ttl RDF file extension when using fmap" + def test_case_utils_guess_format_json_default() -> None: - assert case_utils.guess_format(PATH_TO_JSON) == "json-ld", "Failed to recognize .json RDF file extension" + assert ( + case_utils.guess_format(PATH_TO_JSON) == "json-ld" + ), "Failed to recognize .json RDF file extension" -@pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) + +@pytest.mark.xfail( + reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", + strict=True, +) def test_case_utils_guess_format_json_fmap() -> None: - assert case_utils.guess_format(PATH_TO_JSON, FMAP_XHTML_GRDDL) == "json-ld", "Failed to recognize .json RDF file extension when using fmap" + assert ( + case_utils.guess_format(PATH_TO_JSON, FMAP_XHTML_GRDDL) == "json-ld" + ), "Failed to recognize .json RDF file extension when using fmap" + def test_case_utils_guess_format_jsonld_default() -> None: - assert case_utils.guess_format(PATH_TO_JSONLD) == "json-ld", "Failed to recognize .jsonld RDF file extension" + assert ( + case_utils.guess_format(PATH_TO_JSONLD) == "json-ld" + ), "Failed to recognize .jsonld RDF file extension" + -@pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) +@pytest.mark.xfail( + reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", + strict=True, +) def test_case_utils_guess_format_jsonld_fmap() -> None: - assert case_utils.guess_format(PATH_TO_JSONLD, FMAP_XHTML_GRDDL) == "json-ld", "Failed to recognize .jsonld RDF file extension when using fmap" + assert ( + case_utils.guess_format(PATH_TO_JSONLD, FMAP_XHTML_GRDDL) == "json-ld" + ), "Failed to recognize .jsonld RDF file extension when using fmap" diff --git a/tests/hexbinary/test_hexbinary.py b/tests/hexbinary/test_hexbinary.py index 31188de..6b63175 100644 --- a/tests/hexbinary/test_hexbinary.py +++ b/tests/hexbinary/test_hexbinary.py @@ -65,22 +65,26 @@ n_uppercase1 = rdflib.URIRef("urn:example:uppercase1") p_predicate = rdflib.URIRef("urn:example:predicate1") + def test_sparql_syntax_bind_boolean() -> None: """ This test serves as a syntax reminder for binding boolean values. """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( 1 = 1 AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + @pytest.mark.xfail(reason="hard-coded failure") def test_pytest_syntax_xfail() -> None: """ @@ -88,48 +92,57 @@ def test_pytest_syntax_xfail() -> None: """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( 1 = 2 AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + def test_sparql_syntax_integer_coercion() -> None: """ This test serves as a syntax reminder for type coercions. """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( 1 = "1"^^xsd:integer AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + def test_sparql_syntax_integer_cast() -> None: """ This test serves as a syntax reminder for the casting form of type coercions. """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( 1 = xsd:integer("1") AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + @pytest.mark.xfail def test_sparql_cast_custom_type() -> None: """ @@ -137,55 +150,67 @@ def test_sparql_cast_custom_type() -> None: """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( 1 = xsd:integer("1"^^xsd:hexBinaryTypoXXXX) AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + def test_sparql_compare_hexbinary_mixcase() -> None: confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( "ab"^^xsd:hexBinary = "AB"^^xsd:hexBinary AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + def test_sparql_compare_hexbinary_matchcase() -> None: confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( "AB"^^xsd:hexBinary = "AB"^^xsd:hexBinary AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + def test_sparql_compare_hexbinarycanonical_matchcase() -> None: confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( "AB"^^xsd:hexBinaryCanonical = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + @pytest.mark.xfail def test_sparql_compare_hexbinarycanonical_mixcase() -> None: """ @@ -193,16 +218,19 @@ def test_sparql_compare_hexbinarycanonical_mixcase() -> None: """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( "ab"^^xsd:hexBinaryCanonical = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + @pytest.mark.xfail def test_sparql_compare_hb_hbc_mixcase() -> None: """ @@ -210,16 +238,19 @@ def test_sparql_compare_hb_hbc_mixcase() -> None: """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( "AB"^^xsd:hexBinary = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + @pytest.mark.xfail def test_sparql_compare_hb_hbc_mixcase_cast() -> None: """ @@ -227,28 +258,32 @@ def test_sparql_compare_hb_hbc_mixcase_cast() -> None: """ confirmed = None graph = rdflib.Graph() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?lValue WHERE { BIND( "ab"^^xsd:hexBinary = xsd:hexBinary("AB"^^xsd:hexBinaryCanonical) AS ?lValue ) } -"""): +""" + ): (l_value,) = result confirmed = l_value.toPython() assert confirmed + def test_rdflib_literal_hexbinary() -> None: _logger.debug("l_hb_lowercase = %r." % l_hb_lowercase) _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) _logger.debug("l_hb_uppercase.toPython() = %r." % l_hb_uppercase.toPython()) - assert l_hb_lowercase == l_hb_lowercase + assert l_hb_lowercase == l_hb_lowercase assert l_hb_lowercase.toPython() == l_hb_lowercase.toPython() - assert l_hb_lowercase == l_hb_uppercase + assert l_hb_lowercase == l_hb_uppercase assert l_hb_lowercase.toPython() == l_hb_uppercase.toPython() + @pytest.mark.xfail def test_rdflib_literal_hexbinarycanonical() -> None: _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) @@ -256,6 +291,7 @@ def test_rdflib_literal_hexbinarycanonical() -> None: assert l_hb_uppercase == l_hbc_uppercase + @pytest.mark.xfail def test_rdflib_literal_topython_hexbinarycanonical() -> None: _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) @@ -263,107 +299,72 @@ def test_rdflib_literal_topython_hexbinarycanonical() -> None: assert l_hb_uppercase.toPython() == l_hbc_uppercase.toPython() -def _query_all_value_matches( - graph : rdflib.Graph -) -> typing.Set[str]: + +def _query_all_value_matches(graph: rdflib.Graph) -> typing.Set[str]: """ Return set of all node names (as strings) that have a matching value, where "matching" is determined by the SPARQL engine's type and data coercions. """ computed = set() - for result in graph.query("""\ + for result in graph.query( + """\ SELECT ?nNode1 ?nNode2 WHERE { ?nNode1 ?p ?lValue . ?nNode2 ?p ?lValue . FILTER ( ?nNode1 != ?nNode2 ) -}"""): +}""" + ): (n_node1, n_node2) = result computed.add(n_node1.toPython()) computed.add(n_node2.toPython()) return computed + def test_graph_repeat() -> None: """ Two nodes are given the same literal value, and are found to match on literal values. """ graph = rdflib.Graph() - graph.add(( - n_lowercase1, - p_predicate, - l_hb_lowercase - )) - graph.add(( - n_lowercase2, - p_predicate, - l_hb_lowercase - )) - expected = { - "urn:example:lowercase1", - "urn:example:lowercase2" - } + graph.add((n_lowercase1, p_predicate, l_hb_lowercase)) + graph.add((n_lowercase2, p_predicate, l_hb_lowercase)) + expected = {"urn:example:lowercase1", "urn:example:lowercase2"} computed = _query_all_value_matches(graph) assert computed == expected + def test_graph_all_hexbinary_literals() -> None: """ Two nodes with the same literal value, and another node with the uppercase of the literal hexBinary value, are found to match on literal values. """ graph = rdflib.Graph() - graph.add(( - n_lowercase1, - p_predicate, - l_hb_lowercase - )) - graph.add(( - n_lowercase2, - p_predicate, - l_hb_lowercase - )) - graph.add(( - n_uppercase1, - p_predicate, - l_hb_uppercase - )) + graph.add((n_lowercase1, p_predicate, l_hb_lowercase)) + graph.add((n_lowercase2, p_predicate, l_hb_lowercase)) + graph.add((n_uppercase1, p_predicate, l_hb_uppercase)) expected = { - "urn:example:lowercase1", - "urn:example:lowercase2", - "urn:example:uppercase1" + "urn:example:lowercase1", + "urn:example:lowercase2", + "urn:example:uppercase1", } computed = _query_all_value_matches(graph) assert computed == expected + @pytest.mark.xfail def test_graph_hexbinarycanonical() -> None: graph = rdflib.Graph() - graph.add(( - n_lowercase1, - p_predicate, - l_hb_lowercase - )) - graph.add(( - n_lowercase2, - p_predicate, - l_hb_lowercase - )) - graph.add(( - n_uppercase1, - p_predicate, - l_hb_uppercase - )) - graph.add(( - n_canonical1, - p_predicate, - l_hbc_uppercase - )) + graph.add((n_lowercase1, p_predicate, l_hb_lowercase)) + graph.add((n_lowercase2, p_predicate, l_hb_lowercase)) + graph.add((n_uppercase1, p_predicate, l_hb_uppercase)) + graph.add((n_canonical1, p_predicate, l_hbc_uppercase)) expected = { - "urn:example:canonical1", - "urn:example:lowercase1", - "urn:example:lowercase2", - "urn:example:uppercase1" + "urn:example:canonical1", + "urn:example:lowercase1", + "urn:example:lowercase2", + "urn:example:uppercase1", } computed = _query_all_value_matches(graph) diff --git a/tests/src/compact.py b/tests/src/compact.py index e673edd..f20a3b8 100644 --- a/tests/src/compact.py +++ b/tests/src/compact.py @@ -29,24 +29,27 @@ _logger = logging.getLogger(os.path.basename(__file__)) + def main() -> None: with open(args.out_json, "w") as out_fh: doc = None with open(args.in_json, "r") as in_fh: doc = json.load(in_fh) assert not doc is None - assert isinstance(doc, (dict, list)), "JSON parsed top-level type assumption invalidated" + assert isinstance( + doc, (dict, list) + ), "JSON parsed top-level type assumption invalidated" # Grab the first occurrence of every key. total_context = dict() - def _accrue_local_context( - doc_object : typing.Dict[str, typing.Any] - ) -> None: + + def _accrue_local_context(doc_object: typing.Dict[str, typing.Any]) -> None: local_context = doc_object.get("@context", dict()) for key in local_context.keys(): if not key in total_context: # Accrue new key. total_context[key] = local_context[key] + if isinstance(doc, list): # Handle rdf-toolkit styled output, where graph is returned in a top-level list. for obj in doc: @@ -73,8 +76,10 @@ def _accrue_local_context( out_fh.write(json.dumps(compacted, indent=4)) + if __name__ == "__main__": import argparse + parser = argparse.ArgumentParser() parser.add_argument("--debug", action="store_true") parser.add_argument("out_json") diff --git a/tests/src/glom_graph.py b/tests/src/glom_graph.py index 2441954..df0fa1e 100644 --- a/tests/src/glom_graph.py +++ b/tests/src/glom_graph.py @@ -21,14 +21,17 @@ import case_utils + def main() -> None: g = rdflib.Graph() for in_graph in args.in_graph: g.parse(in_graph) g.serialize(args.out_graph) + if __name__ == "__main__": import argparse + parser = argparse.ArgumentParser() parser.add_argument("out_graph") parser.add_argument("in_graph", nargs="*") diff --git a/tests/src/isomorphic_diff.py b/tests/src/isomorphic_diff.py index 0c3fea1..f4eab89 100644 --- a/tests/src/isomorphic_diff.py +++ b/tests/src/isomorphic_diff.py @@ -40,6 +40,7 @@ _logger = logging.getLogger(os.path.basename(__file__)) + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--debug", action="store_true") @@ -55,25 +56,22 @@ def main() -> None: g1.parse(args.in_graph_1) g2.parse(args.in_graph_2) - #_logger.debug("type(g1) = %r.", type(g1)) - #_logger.debug("type(g2) = %r.", type(g2)) + # _logger.debug("type(g1) = %r.", type(g1)) + # _logger.debug("type(g2) = %r.", type(g2)) - #_logger.debug("len(g1) = %d.", len(g1)) - #_logger.debug("len(g2) = %d.", len(g2)) + # _logger.debug("len(g1) = %d.", len(g1)) + # _logger.debug("len(g2) = %d.", len(g2)) i1 = rdflib.compare.to_isomorphic(g1) i2 = rdflib.compare.to_isomorphic(g2) - #_logger.debug("type(i1) = %r.", type(i1)) - #_logger.debug("type(i2) = %r.", type(i2)) + # _logger.debug("type(i1) = %r.", type(i1)) + # _logger.debug("type(i2) = %r.", type(i2)) if i1 == i2: sys.exit(0) - def _report( - diff_symbol : str, - graph : rdflib.Graph - ) -> None: + def _report(diff_symbol: str, graph: rdflib.Graph) -> None: """ This function copied in spirit from: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#module-rdflib.compare @@ -83,19 +81,16 @@ def _report( continue _logger.debug("%s %s", diff_symbol, line) - #_report("1", g1) - #_report("2", g2) + # _report("1", g1) + # _report("2", g2) if args.debug: - ( - in_both, - in_first, - in_second - ) = rdflib.compare.graph_diff(i1, i2) + (in_both, in_first, in_second) = rdflib.compare.graph_diff(i1, i2) _report("<", in_first) _report(">", in_second) sys.exit(1) + if __name__ == "__main__": main()