Skip to content

Commit c39a9cd

Browse files
Merge pull request #25 from casework/OC-65
Add subclass hierarchy knowledge to case_sparql_* commands
2 parents 3901840 + 6f9458f commit c39a9cd

37 files changed

+2032
-96
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,12 @@ all: \
5656
# Do not rebuild the current ontology file if it is already present. It is expected not to change once built.
5757
# touch -c: Do not create the file if it does not exist. This will convince the recursive make nothing needs to be done if the file is present.
5858
touch -c case_utils/ontology/case-$(case_version).ttl
59+
touch -c case_utils/ontology/case-$(case_version)-subclasses.ttl
5960
$(MAKE) \
6061
--directory case_utils/ontology
6162
# Confirm the current monolithic file is in place.
6263
test -r case_utils/ontology/case-$(case_version).ttl
64+
test -r case_utils/ontology/case-$(case_version)-subclasses.ttl
6365
touch $@
6466

6567
check: \

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ case_file --disable-hashes sample.txt.json sample.txt
6666

6767
Two commands are provided to generate output from a SPARQL query and one or more input graphs. Input graphs can be any graph, such as instance data or supplementary ontology files that supply custom class definitions or other external ontologies.
6868

69+
These commands can be used with any RDF files to run arbitrary SPARQL queries. They have one additional behavior tailored to CASE: If a path query is used for subclasses, the CASE subclass hierarchy will be loaded to supplement the input graph. An expected use case of this feature is subclasses of `ObservableObject`. For instance, if a data graph included an object with only the class `uco-observable:File` specified, the query `?x a/rdfs:subClassOf* uco-observable:ObservableObject` would match `?x` against that object.
70+
6971

7072
#### `case_sparql_construct`
7173

case_utils/case_file/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def create_file_node(
8585
graph.add((
8686
n_file,
8787
NS_RDF.type,
88-
NS_UCO_OBSERVABLE.ObservableObject
88+
NS_UCO_OBSERVABLE.File
8989
))
9090

9191
basename = os.path.basename(filepath)

case_utils/case_sparql_construct/__init__.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,52 @@
1818
__version__ = "0.1.0"
1919

2020
import argparse
21-
import os
2221
import logging
22+
import os
23+
import sys
2324
import typing
2425

2526
import rdflib.plugins.sparql # type: ignore
2627

27-
import case_utils
28+
import case_utils.ontology
29+
30+
from case_utils.ontology.version_info import *
2831

2932
_logger = logging.getLogger(os.path.basename(__file__))
3033

3134
def main() -> None:
3235
parser = argparse.ArgumentParser()
33-
parser.add_argument("-d", "--debug", action="store_true")
34-
parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.")
35-
parser.add_argument("--output-format", help="Override extension-based format guesser.")
36+
37+
# Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
38+
logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO)
39+
40+
built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION]
41+
42+
parser.add_argument(
43+
"-d",
44+
"--debug",
45+
action="store_true"
46+
)
47+
parser.add_argument(
48+
"--built-version",
49+
choices=tuple(built_version_choices_list),
50+
default="case-"+CURRENT_CASE_VERSION,
51+
help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release."
52+
)
53+
parser.add_argument(
54+
"--disallow-empty-results",
55+
action="store_true",
56+
help="Raise error if no results are returned for query."
57+
)
58+
parser.add_argument(
59+
"--output-format",
60+
help="Override extension-based format guesser."
61+
)
3662
parser.add_argument("out_graph")
3763
parser.add_argument("in_sparql")
3864
parser.add_argument("in_graph", nargs="+")
3965
args = parser.parse_args()
4066

41-
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
42-
4367
in_graph = rdflib.Graph()
4468
for in_graph_filename in args.in_graph:
4569
in_graph.parse(in_graph_filename)
@@ -58,6 +82,9 @@ def main() -> None:
5882
construct_query_text = in_fh.read().strip()
5983
assert not construct_query_text is None
6084

85+
if "subClassOf" in construct_query_text:
86+
case_utils.ontology.load_subclass_hierarchy(in_graph, built_version=args.built_version)
87+
6188
construct_query_object = rdflib.plugins.sparql.prepareQuery(construct_query_text, initNs=nsdict)
6289

6390
# https://rdfextras.readthedocs.io/en/latest/working_with.html

case_utils/case_sparql_select/__init__.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,29 +30,54 @@
3030

3131
import argparse
3232
import binascii
33+
import importlib.resources
3334
import logging
3435
import os
36+
import sys
3537

3638
import pandas as pd # type: ignore
3739
import rdflib.plugins.sparql # type: ignore
3840

39-
import case_utils
41+
import case_utils.ontology
42+
43+
from case_utils.ontology.version_info import *
4044

4145
NS_XSD = rdflib.XSD
4246

4347
_logger = logging.getLogger(os.path.basename(__file__))
4448

4549
def main() -> None:
4650
parser = argparse.ArgumentParser()
47-
parser.add_argument("-d", "--debug", action="store_true")
48-
parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.")
49-
parser.add_argument("out_table", help="Expected extensions are .html for HTML tables or .md for Markdown tables.")
51+
52+
# Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
53+
logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO)
54+
55+
built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION]
56+
57+
parser.add_argument(
58+
"-d",
59+
"--debug",
60+
action="store_true"
61+
)
62+
parser.add_argument(
63+
"--built-version",
64+
choices=tuple(built_version_choices_list),
65+
default="case-"+CURRENT_CASE_VERSION,
66+
help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release."
67+
)
68+
parser.add_argument(
69+
"--disallow-empty-results",
70+
action="store_true",
71+
help="Raise error if no results are returned for query."
72+
)
73+
parser.add_argument(
74+
"out_table",
75+
help="Expected extensions are .html for HTML tables or .md for Markdown tables."
76+
)
5077
parser.add_argument("in_sparql")
5178
parser.add_argument("in_graph", nargs="+")
5279
args = parser.parse_args()
5380

54-
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
55-
5681
graph = rdflib.Graph()
5782
for in_graph_filename in args.in_graph:
5883
graph.parse(in_graph_filename)
@@ -65,6 +90,9 @@ def main() -> None:
6590
select_query_text = in_fh.read().strip()
6691
_logger.debug("select_query_text = %r." % select_query_text)
6792

93+
if "subClassOf" in select_query_text:
94+
case_utils.ontology.load_subclass_hierarchy(graph, built_version=args.built_version)
95+
6896
# Build columns list from SELECT line.
6997
select_query_text_lines = select_query_text.split("\n")
7098
select_line = [line for line in select_query_text_lines if line.startswith("SELECT ")][0]

case_utils/ontology/Makefile

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ RDF_TOOLKIT_JAR := $(case_srcdir)/lib/rdf-toolkit.jar
2424
case_version := $(shell python3 version_info.py)
2525

2626
all: \
27+
case-$(case_version)-subclasses.ttl
28+
29+
.PRECIOUS: \
2730
case-$(case_version).ttl
2831

2932
case-$(case_version).ttl: \
@@ -45,5 +48,34 @@ case-$(case_version).ttl: \
4548
--target-format turtle
4649
mv _$@ $@
4750

51+
case-$(case_version)-subclasses.ttl: \
52+
case-$(case_version).ttl \
53+
src/subclasses_ttl.py
54+
# The CASE ontology test venv is made by the earlier build step
55+
# of case_monolithic.ttl. However, unless a new ontology
56+
# release is being made, that step will have been skipped.
57+
# This recursive Make call guarantees the virtual environment is
58+
# set up.
59+
$(MAKE) \
60+
--directory $(case_srcdir)/tests \
61+
.venv.done.log
62+
#TODO This cleanup step should be removed after the 0.3.0 release of CASE-Utility-SHACL-Inheritance-Reviewer.
63+
test ! -d $(uco_srcdir)/dependencies/CASE-Utility-SHACL-Inheritance-Reviewer/build \
64+
|| rm -rf \
65+
$(uco_srcdir)/dependencies/CASE-Utility-SHACL-Inheritance-Reviewer/build
66+
source $(case_srcdir)/tests/venv/bin/activate \
67+
&& python3 src/subclasses_ttl.py \
68+
__$@ \
69+
$<
70+
java -jar $(RDF_TOOLKIT_JAR) \
71+
--inline-blank-nodes \
72+
--source __$@ \
73+
--source-format turtle \
74+
--target _$@ \
75+
--target-format turtle
76+
rm __$@
77+
mv _$@ $@
78+
4879
clean:
49-
@rm -f case-$(case_version).ttl
80+
@rm -f \
81+
case-$(case_version)*.ttl

case_utils/ontology/__init__.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env python3
2+
13
# This software was developed at the National Institute of Standards
24
# and Technology by employees of the Federal Government in the course
35
# of their official duties. Pursuant to title 17 Section 105 of the
@@ -8,3 +10,33 @@
810
# reliability, or any other characteristic.
911
#
1012
# We would appreciate acknowledgement if the software is used.
13+
14+
__version__ = "0.1.0"
15+
16+
import importlib.resources
17+
import logging
18+
import os
19+
20+
import rdflib
21+
22+
# Yes, this next import is self-referential (/circular). But, it does work with importlib.
23+
import case_utils.ontology
24+
25+
from .version_info import *
26+
27+
_logger = logging.getLogger(os.path.basename(__file__))
28+
29+
def load_subclass_hierarchy(
30+
graph : rdflib.Graph,
31+
*,
32+
built_version : str = "case-"+CURRENT_CASE_VERSION
33+
) -> None:
34+
"""
35+
Adds all ontology rdfs:subClassOf statements from the version referred to by built_version.
36+
"""
37+
if built_version != "none":
38+
_logger.debug("Loading subclass hierarchy.")
39+
ttl_filename = built_version + "-subclasses.ttl"
40+
_logger.debug("ttl_filename = %r.", ttl_filename)
41+
ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename)
42+
graph.parse(data=ttl_data)

0 commit comments

Comments
 (0)