Skip to content

Add subclass hierarchy knowledge to case_sparql_* commands #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@ all: \
# Do not rebuild the current ontology file if it is already present. It is expected not to change once built.
# touch -c: Do not create the file if it does not exist. This will convince the recursive make nothing needs to be done if the file is present.
touch -c case_utils/ontology/case-$(case_version).ttl
touch -c case_utils/ontology/case-$(case_version)-subclasses.ttl
$(MAKE) \
--directory case_utils/ontology
# Confirm the current monolithic file is in place.
test -r case_utils/ontology/case-$(case_version).ttl
test -r case_utils/ontology/case-$(case_version)-subclasses.ttl
touch $@

check: \
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ case_file --disable-hashes sample.txt.json sample.txt

Two commands are provided to generate output from a SPARQL query and one or more input graphs. Input graphs can be any graph, such as instance data or supplementary ontology files that supply custom class definitions or other external ontologies.

These commands can be used with any RDF files to run arbitrary SPARQL queries. They have one additional behavior tailored to CASE: If a path query is used for subclasses, the CASE subclass hierarchy will be loaded to supplement the input graph. An expected use case of this feature is subclasses of `ObservableObject`. For instance, if a data graph included an object with only the class `uco-observable:File` specified, the query `?x a/rdfs:subClassOf* uco-observable:ObservableObject` would match `?x` against that object.


#### `case_sparql_construct`

Expand Down
2 changes: 1 addition & 1 deletion case_utils/case_file/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def create_file_node(
graph.add((
n_file,
NS_RDF.type,
NS_UCO_OBSERVABLE.ObservableObject
NS_UCO_OBSERVABLE.File
))

basename = os.path.basename(filepath)
Expand Down
41 changes: 34 additions & 7 deletions case_utils/case_sparql_construct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,52 @@
__version__ = "0.1.0"

import argparse
import os
import logging
import os
import sys
import typing

import rdflib.plugins.sparql # type: ignore

import case_utils
import case_utils.ontology

from case_utils.ontology.version_info import *

_logger = logging.getLogger(os.path.basename(__file__))

def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--debug", action="store_true")
parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.")
parser.add_argument("--output-format", help="Override extension-based format guesser.")

# Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO)

built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION]

parser.add_argument(
"-d",
"--debug",
action="store_true"
)
parser.add_argument(
"--built-version",
choices=tuple(built_version_choices_list),
default="case-"+CURRENT_CASE_VERSION,
help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release."
)
parser.add_argument(
"--disallow-empty-results",
action="store_true",
help="Raise error if no results are returned for query."
)
parser.add_argument(
"--output-format",
help="Override extension-based format guesser."
)
parser.add_argument("out_graph")
parser.add_argument("in_sparql")
parser.add_argument("in_graph", nargs="+")
args = parser.parse_args()

logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

in_graph = rdflib.Graph()
for in_graph_filename in args.in_graph:
in_graph.parse(in_graph_filename)
Expand All @@ -58,6 +82,9 @@ def main() -> None:
construct_query_text = in_fh.read().strip()
assert not construct_query_text is None

if "subClassOf" in construct_query_text:
case_utils.ontology.load_subclass_hierarchy(in_graph, built_version=args.built_version)

construct_query_object = rdflib.plugins.sparql.prepareQuery(construct_query_text, initNs=nsdict)

# https://rdfextras.readthedocs.io/en/latest/working_with.html
Expand Down
40 changes: 34 additions & 6 deletions case_utils/case_sparql_select/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,54 @@

import argparse
import binascii
import importlib.resources
import logging
import os
import sys

import pandas as pd # type: ignore
import rdflib.plugins.sparql # type: ignore

import case_utils
import case_utils.ontology

from case_utils.ontology.version_info import *

NS_XSD = rdflib.XSD

_logger = logging.getLogger(os.path.basename(__file__))

def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--debug", action="store_true")
parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.")
parser.add_argument("out_table", help="Expected extensions are .html for HTML tables or .md for Markdown tables.")

# Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO)

built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION]

parser.add_argument(
"-d",
"--debug",
action="store_true"
)
parser.add_argument(
"--built-version",
choices=tuple(built_version_choices_list),
default="case-"+CURRENT_CASE_VERSION,
help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release."
)
parser.add_argument(
"--disallow-empty-results",
action="store_true",
help="Raise error if no results are returned for query."
)
parser.add_argument(
"out_table",
help="Expected extensions are .html for HTML tables or .md for Markdown tables."
)
parser.add_argument("in_sparql")
parser.add_argument("in_graph", nargs="+")
args = parser.parse_args()

logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

graph = rdflib.Graph()
for in_graph_filename in args.in_graph:
graph.parse(in_graph_filename)
Expand All @@ -65,6 +90,9 @@ def main() -> None:
select_query_text = in_fh.read().strip()
_logger.debug("select_query_text = %r." % select_query_text)

if "subClassOf" in select_query_text:
case_utils.ontology.load_subclass_hierarchy(graph, built_version=args.built_version)

# Build columns list from SELECT line.
select_query_text_lines = select_query_text.split("\n")
select_line = [line for line in select_query_text_lines if line.startswith("SELECT ")][0]
Expand Down
34 changes: 33 additions & 1 deletion case_utils/ontology/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ RDF_TOOLKIT_JAR := $(case_srcdir)/lib/rdf-toolkit.jar
case_version := $(shell python3 version_info.py)

all: \
case-$(case_version)-subclasses.ttl

.PRECIOUS: \
case-$(case_version).ttl

case-$(case_version).ttl: \
Expand All @@ -45,5 +48,34 @@ case-$(case_version).ttl: \
--target-format turtle
mv _$@ $@

case-$(case_version)-subclasses.ttl: \
case-$(case_version).ttl \
src/subclasses_ttl.py
# The CASE ontology test venv is made by the earlier build step
# of case_monolithic.ttl. However, unless a new ontology
# release is being made, that step will have been skipped.
# This recursive Make call guarantees the virtual environment is
# set up.
$(MAKE) \
--directory $(case_srcdir)/tests \
.venv.done.log
#TODO This cleanup step should be removed after the 0.3.0 release of CASE-Utility-SHACL-Inheritance-Reviewer.
test ! -d $(uco_srcdir)/dependencies/CASE-Utility-SHACL-Inheritance-Reviewer/build \
|| rm -rf \
$(uco_srcdir)/dependencies/CASE-Utility-SHACL-Inheritance-Reviewer/build
source $(case_srcdir)/tests/venv/bin/activate \
&& python3 src/subclasses_ttl.py \
__$@ \
$<
java -jar $(RDF_TOOLKIT_JAR) \
--inline-blank-nodes \
--source __$@ \
--source-format turtle \
--target _$@ \
--target-format turtle
rm __$@
mv _$@ $@

clean:
@rm -f case-$(case_version).ttl
@rm -f \
case-$(case_version)*.ttl
32 changes: 32 additions & 0 deletions case_utils/ontology/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

# This software was developed at the National Institute of Standards
# and Technology by employees of the Federal Government in the course
# of their official duties. Pursuant to title 17 Section 105 of the
Expand All @@ -8,3 +10,33 @@
# reliability, or any other characteristic.
#
# We would appreciate acknowledgement if the software is used.

__version__ = "0.1.0"

import importlib.resources
import logging
import os

import rdflib

# Yes, this next import is self-referential (/circular). But, it does work with importlib.
import case_utils.ontology

from .version_info import *

_logger = logging.getLogger(os.path.basename(__file__))

def load_subclass_hierarchy(
graph : rdflib.Graph,
*,
built_version : str = "case-"+CURRENT_CASE_VERSION
) -> None:
"""
Adds all ontology rdfs:subClassOf statements from the version referred to by built_version.
"""
if built_version != "none":
_logger.debug("Loading subclass hierarchy.")
ttl_filename = built_version + "-subclasses.ttl"
_logger.debug("ttl_filename = %r.", ttl_filename)
ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename)
graph.parse(data=ttl_data)
Loading