casework · ajnelson-nist · Nov 10, 2021 · Nov 10, 2021 · Nov 10, 2021 · Nov 10, 2021
@@ -56,10 +56,12 @@ all: \
 	# Do not rebuild the current ontology file if it is already present.  It is expected not to change once built.
 	# touch -c: Do not create the file if it does not exist.  This will convince the recursive make nothing needs to be done if the file is present.
 	touch -c case_utils/ontology/case-$(case_version).ttl
+	touch -c case_utils/ontology/case-$(case_version)-subclasses.ttl
 	$(MAKE) \
 	  --directory case_utils/ontology
 	# Confirm the current monolithic file is in place.
 	test -r case_utils/ontology/case-$(case_version).ttl
+	test -r case_utils/ontology/case-$(case_version)-subclasses.ttl
 	touch $@
 
 check: \

@@ -66,6 +66,8 @@ case_file --disable-hashes sample.txt.json sample.txt
 
 Two commands are provided to generate output from a SPARQL query and one or more input graphs.  Input graphs can be any graph, such as instance data or supplementary ontology files that supply custom class definitions or other external ontologies.
 
+These commands can be used with any RDF files to run arbitrary SPARQL queries.  They have one additional behavior tailored to CASE: If a path query is used for subclasses, the CASE subclass hierarchy will be loaded to supplement the input graph.  An expected use case of this feature is subclasses of `ObservableObject`.  For instance, if a data graph included an object with only the class `uco-observable:File` specified, the query `?x a/rdfs:subClassOf* uco-observable:ObservableObject` would match `?x` against that object.
+
 
 #### `case_sparql_construct`
 

@@ -85,7 +85,7 @@ def create_file_node(
     graph.add((
       n_file,
       NS_RDF.type,
-      NS_UCO_OBSERVABLE.ObservableObject
+      NS_UCO_OBSERVABLE.File
     ))
 
     basename = os.path.basename(filepath)

@@ -18,28 +18,52 @@
 __version__ = "0.1.0"
 
 import argparse
-import os
 import logging
+import os
+import sys
 import typing
 
 import rdflib.plugins.sparql  # type: ignore
 
-import case_utils
+import case_utils.ontology
+
+from case_utils.ontology.version_info import *
 
 _logger = logging.getLogger(os.path.basename(__file__))
 
 def main() -> None:
     parser = argparse.ArgumentParser()
-    parser.add_argument("-d", "--debug", action="store_true")
-    parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.")
-    parser.add_argument("--output-format", help="Override extension-based format guesser.")
+
+    # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
+    logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO)
+
+    built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION]
+
+    parser.add_argument(
+      "-d",
+      "--debug",
+      action="store_true"
+    )
+    parser.add_argument(
+      "--built-version",
+      choices=tuple(built_version_choices_list),
+      default="case-"+CURRENT_CASE_VERSION,
+      help="Ontology version to use to supplement query, such as for subclass querying.  Does not require networking to use.  Default is most recent CASE release."
+    )
+    parser.add_argument(
+      "--disallow-empty-results",
+      action="store_true",
+      help="Raise error if no results are returned for query."
+    )
+    parser.add_argument(
+      "--output-format",
+      help="Override extension-based format guesser."
+    )
     parser.add_argument("out_graph")
     parser.add_argument("in_sparql")
     parser.add_argument("in_graph", nargs="+")
     args = parser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
-
     in_graph = rdflib.Graph()
     for in_graph_filename in args.in_graph:
         in_graph.parse(in_graph_filename)
@@ -58,6 +82,9 @@ def main() -> None:
         construct_query_text = in_fh.read().strip()
     assert not construct_query_text is None
 
+    if "subClassOf" in construct_query_text:
+        case_utils.ontology.load_subclass_hierarchy(in_graph, built_version=args.built_version)
+
     construct_query_object = rdflib.plugins.sparql.prepareQuery(construct_query_text, initNs=nsdict)
 
     # https://rdfextras.readthedocs.io/en/latest/working_with.html

@@ -30,29 +30,54 @@
 
 import argparse
 import binascii
+import importlib.resources
 import logging
 import os
+import sys
 
 import pandas as pd  # type: ignore
 import rdflib.plugins.sparql  # type: ignore
 
-import case_utils
+import case_utils.ontology
+
+from case_utils.ontology.version_info import *
 
 NS_XSD = rdflib.XSD
 
 _logger = logging.getLogger(os.path.basename(__file__))
 
 def main() -> None:
     parser = argparse.ArgumentParser()
-    parser.add_argument("-d", "--debug", action="store_true")
-    parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.")
-    parser.add_argument("out_table", help="Expected extensions are .html for HTML tables or .md for Markdown tables.")
+
+    # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
+    logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO)
+
+    built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION]
+
+    parser.add_argument(
+      "-d",
+      "--debug",
+      action="store_true"
+    )
+    parser.add_argument(
+      "--built-version",
+      choices=tuple(built_version_choices_list),
+      default="case-"+CURRENT_CASE_VERSION,
+      help="Ontology version to use to supplement query, such as for subclass querying.  Does not require networking to use.  Default is most recent CASE release."
+    )
+    parser.add_argument(
+      "--disallow-empty-results",
+      action="store_true",
+      help="Raise error if no results are returned for query."
+    )
+    parser.add_argument(
+      "out_table",
+      help="Expected extensions are .html for HTML tables or .md for Markdown tables."
+    )
     parser.add_argument("in_sparql")
     parser.add_argument("in_graph", nargs="+")
     args = parser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
-
     graph = rdflib.Graph()
     for in_graph_filename in args.in_graph:
         graph.parse(in_graph_filename)
@@ -65,6 +90,9 @@ def main() -> None:
         select_query_text = in_fh.read().strip()
     _logger.debug("select_query_text = %r." % select_query_text)
 
+    if "subClassOf" in select_query_text:
+        case_utils.ontology.load_subclass_hierarchy(graph, built_version=args.built_version)
+
     # Build columns list from SELECT line.
     select_query_text_lines = select_query_text.split("\n")
     select_line = [line for line in select_query_text_lines if line.startswith("SELECT ")][0]

@@ -24,6 +24,9 @@ RDF_TOOLKIT_JAR := $(case_srcdir)/lib/rdf-toolkit.jar
 case_version := $(shell python3 version_info.py)
 
 all: \
+  case-$(case_version)-subclasses.ttl
+
+.PRECIOUS: \
   case-$(case_version).ttl
 
 case-$(case_version).ttl: \
@@ -45,5 +48,34 @@ case-$(case_version).ttl: \
 	  --target-format turtle
 	mv _$@ $@
 
+case-$(case_version)-subclasses.ttl: \
+  case-$(case_version).ttl \
+  src/subclasses_ttl.py
+	# The CASE ontology test venv is made by the earlier build step
+	# of case_monolithic.ttl.  However, unless a new ontology
+	# release is being made, that step will have been skipped.
+	# This recursive Make call guarantees the virtual environment is
+	# set up.
+	$(MAKE) \
+	  --directory $(case_srcdir)/tests \
+	    .venv.done.log
+	#TODO This cleanup step should be removed after the 0.3.0 release of CASE-Utility-SHACL-Inheritance-Reviewer.
+	test ! -d $(uco_srcdir)/dependencies/CASE-Utility-SHACL-Inheritance-Reviewer/build \
+	  || rm -rf \
+	    $(uco_srcdir)/dependencies/CASE-Utility-SHACL-Inheritance-Reviewer/build
+	source $(case_srcdir)/tests/venv/bin/activate \
+	  && python3 src/subclasses_ttl.py \
+	  __$@ \
+	 $<
+	java -jar $(RDF_TOOLKIT_JAR) \
+	  --inline-blank-nodes \
+	  --source __$@ \
+	  --source-format turtle \
+	  --target _$@ \
+	  --target-format turtle
+	rm __$@
+	mv _$@ $@
+
 clean:
-	@rm -f case-$(case_version).ttl
+	@rm -f \
+	  case-$(case_version)*.ttl
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 # This software was developed at the National Institute of Standards
 # and Technology by employees of the Federal Government in the course
 # of their official duties. Pursuant to title 17 Section 105 of the
@@ -8,3 +10,33 @@
 # reliability, or any other characteristic.
 #
 # We would appreciate acknowledgement if the software is used.
+
+__version__ = "0.1.0"
+
+import importlib.resources
+import logging
+import os
+
+import rdflib
+
+# Yes, this next import is self-referential (/circular).  But, it does work with importlib.
+import case_utils.ontology
+
+from .version_info import *
+
+_logger = logging.getLogger(os.path.basename(__file__))
+
+def load_subclass_hierarchy(
+  graph : rdflib.Graph,
+  *,
+  built_version : str = "case-"+CURRENT_CASE_VERSION
+) -> None:
+    """
+    Adds all ontology rdfs:subClassOf statements from the version referred to by built_version.
+    """
+    if built_version != "none":
+        _logger.debug("Loading subclass hierarchy.")
+        ttl_filename = built_version + "-subclasses.ttl"
+        _logger.debug("ttl_filename = %r.", ttl_filename)
+        ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename)
+        graph.parse(data=ttl_data)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -66,6 +66,8 @@ case_file --disable-hashes sample.txt.json sample.txt

		Two commands are provided to generate output from a SPARQL query and one or more input graphs. Input graphs can be any graph, such as instance data or supplementary ontology files that supply custom class definitions or other external ontologies.

		These commands can be used with any RDF files to run arbitrary SPARQL queries. They have one additional behavior tailored to CASE: If a path query is used for subclasses, the CASE subclass hierarchy will be loaded to supplement the input graph. An expected use case of this feature is subclasses of `ObservableObject`. For instance, if a data graph included an object with only the class `uco-observable:File` specified, the query `?x a/rdfs:subClassOf* uco-observable:ObservableObject` would match `?x` against that object.


		#### `case_sparql_construct`

Expand Down