Skip to content

Commit 4186363

Browse files
committed
(Behavior change) Add --review-tbox flag
This is part of addressing what originally inspired pySHACL Issue 170. This patch modifies the behavior of `case_validate`, that reviews OWL syntax and OWL-SHACL interactions. With this patch, that functionality is now **opt-in** at call time. Further work on separating the OWL review shapes from UCO into a general CDO repository (originally started for CDOTSC-34) is currently believed to not have an impact on the user interface element where the user opts in to the more extensive review. References: * [CDOTSC-34] CDO should provide shapes for external ontologies and other RDF models, including OWL * RDFLib/pySHACL#170 Signed-off-by: Alex Nelson <alexander.nelson@nist.gov>
1 parent 63cb72b commit 4186363

File tree

3 files changed

+70
-31
lines changed

3 files changed

+70
-31
lines changed

case_utils/case_validate/__init__.py

Lines changed: 20 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
ValidationResult,
5151
)
5252
from case_utils.case_validate.validate_utils import (
53+
disable_tbox_review,
5354
get_invalid_cdo_concepts,
5455
get_ontology_graph,
5556
)
@@ -65,6 +66,7 @@ def validate(
6566
input_file: Union[List[str], str],
6667
*args: Any,
6768
case_version: Optional[str] = None,
69+
review_tbox: bool = False,
6870
supplemental_graphs: Optional[List[str]] = None,
6971
**kwargs: Any,
7072
) -> ValidationResult:
@@ -74,6 +76,7 @@ def validate(
7476
:param *args: The positional arguments to pass to the underlying pyshacl.validate function.
7577
:param input_file: The path to the file containing the data graph to validate. This can also be a list of paths to files containing data graphs to pool together.
7678
:param case_version: The version of the CASE ontology to use (e.g. 1.2.0). If None, the most recent version will be used.
79+
:param review_tbox: If True, SHACL shapes that review OWL Classes, OWL Properties, and SHACL shapes that constrain those classes and properties will be used in the review. Otherwise, those shapes will be deactivated before running validation. Be aware that these shapes are known to significantly increase the validation run time.
7780
:param supplemental_graphs: File paths to supplemental graphs to use. If None, no supplemental graphs will be used.
7881
:param allow_warnings: In addition to affecting the conformance of SHACL validation, this will affect conformance based on unrecognized CDO concepts (likely, misspelled or miscapitalized) in the data graph. If allow_warnings is not True, any unrecognized concept using a CDO IRI prefix will cause conformance to be False.
7982
:param inference: The type of inference to use. If "none" (type str), no inference will be used. If None (type NoneType), pyshacl defaults will be used. Note that at the time of this writing (pySHACL 0.23.0), pyshacl defaults are no inferencing for the data graph, and RDFS inferencing for the SHACL graph, which for case_utils.validate includes the SHACL and OWL graphs.
@@ -94,36 +97,16 @@ def validate(
9497
# Get the ontology graph from the case_version and supplemental_graphs arguments
9598
ontology_graph: Graph = get_ontology_graph(case_version, supplemental_graphs)
9699

97-
# Filter the graph pyshacl uses as its ontology mix-in to exclude
98-
# all SHACL-related triples.
99-
# This is done because, at the time of pyshacl 0.20.0, the entirety
100-
# of the ontology graph is mixed into the data graph. UCO 1.0.0
101-
# includes some mechanisms to cross-check SHACL PropertyShapes
102-
# versus OWL property definitions. Because of the mix-in, all of
103-
# the ontology graph (.validate ont_graph kwarg) is reviewed by the
104-
# SHACL graph (.validate shacl_graph kwarg), so for UCO 1.0.0 that
105-
# adds around 30 seconds to each case_validate call, redundantly
106-
# reviewing UCO.
107-
# The ontology graph (.validate ont_graph kwarg) is currently
108-
# believed to never need to know about SHACL concepts.
109-
ontology_graph_without_shacl = rdflib.Graph()
110-
SH_prefix = str(rdflib.SH)
111-
for triple in ontology_graph.triples((None, None, None)):
112-
skip_triple = False
113-
for triple_part in triple:
114-
if isinstance(triple_part, rdflib.URIRef):
115-
if str(triple_part).startswith(SH_prefix):
116-
skip_triple = True
117-
if skip_triple:
118-
break
119-
if skip_triple:
120-
continue
121-
ontology_graph_without_shacl.add(triple)
122-
# _logger.debug("len(ontology_graph) = %d.", len(ontology_graph))
123-
# _logger.debug("len(ontology_graph_without_shacl) = %d.", len(ontology_graph_without_shacl))
124-
# At the time of CASE 1.0.0, this was the debug output:
125-
# DEBUG:__init__.py:len(ontology_graph) = 13499.
126-
# DEBUG:__init__.py:len(ontology_graph_without_shacl) = 7639.
100+
if not review_tbox:
101+
# This is done because, at the time of pyshacl 0.20.0, the
102+
# entirety of the ontology graph is mixed into the data graph.
103+
# UCO 1.0.0 includes some mechanisms to cross-check SHACL
104+
# PropertyShapes versus OWL property definitions. Because of
105+
# the mix-in, all of the ontology graph (.validate ont_graph
106+
# kwarg) is reviewed by the SHACL graph (.validate shacl_graph
107+
# kwarg), so for UCO 1.0.0 that adds around 30 seconds to each
108+
# case_validate call, redundantly reviewing UCO.
109+
disable_tbox_review(ontology_graph)
127110

128111
# Get the undefined CDO concepts.
129112
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph)
@@ -142,7 +125,7 @@ def validate(
142125
] = pyshacl.validate(
143126
data_graph,
144127
*args,
145-
ont_graph=ontology_graph_without_shacl,
128+
ont_graph=ontology_graph,
146129
shacl_graph=ontology_graph,
147130
**kwargs,
148131
)
@@ -256,6 +239,11 @@ def main() -> None:
256239
help='(ALMOST as with pyshacl CLI) Send output to a file. If absent, output will be written to stdout. Difference: If specified, file is expected not to exist. Clarification: Does NOT influence --format flag\'s default value of "human". (I.e., any machine-readable serialization format must be specified with --format.)',
257240
default=sys.stdout,
258241
)
242+
parser.add_argument(
243+
"--review-tbox",
244+
action="store_true",
245+
help='Enable rules for reviewing OWL Classes, Properties, and SHACL shapes that constrain them (i.e. the "TBox", or "Theorem box", of the data graph and ontology graph; in contrast, the "ABox", or "Axiom box", contains the declarations of members of those classes, and users of those properties). This should be used when adding extension classes or properties not adopted by UCO or its downstream ontologies, e.g. when using a drafting namespace. Be aware that these rules are known to significantly increase the validation run time.',
246+
)
259247

260248
parser.add_argument("in_graph", nargs="+")
261249

@@ -281,6 +269,7 @@ def main() -> None:
281269
do_owl_imports=True if args.imports else False,
282270
inference=args.inference,
283271
meta_shacl=args.metashacl,
272+
review_tbox=True if args.review_tbox else False,
284273
supplemental_graphs=args.ontology_graph,
285274
**validator_kwargs,
286275
)

case_utils/case_validate/validate_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,22 @@ def get_ontology_graph(
182182
ontology_graph.parse(arg_ontology_graph)
183183

184184
return ontology_graph
185+
186+
187+
def disable_tbox_review(graph: rdflib.Graph) -> None:
188+
l_true = rdflib.Literal(True)
189+
ns_uco_owl = rdflib.Namespace("https://ontology.unifiedcyberontology.org/owl/")
190+
191+
for tbox_shape_basename in {
192+
"DataOneOf-shape",
193+
"DatatypeProperty-shacl-constraints-shape",
194+
"Disjointedness-AP-DP-shape",
195+
"Disjointedness-AP-OP-shape",
196+
"Disjointedness-C-DT-shape",
197+
"Disjointedness-DP-OP-shape",
198+
"ObjectProperty-shacl-constraints-shape",
199+
"ontologyIRI-versionIRI-prerequisite-shape",
200+
"versionIRI-nodeKind-shape",
201+
}:
202+
n_tbox_shape = ns_uco_owl[tbox_shape_basename]
203+
graph.add((n_tbox_shape, NS_SH.deactivated, l_true))

tests/case_utils/case_validate/uco_test_examples/Makefile

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,37 @@ all: \
9999
rm __$@
100100
mv _$@ $@
101101

102+
# NOTE - this more-specific recipe enables "tbox" review, but otherwise
103+
# matches the wildcarded recipe.
104+
owl_properties_XFAIL_validation.ttl: \
105+
$(examples_srcdir)/owl_properties_XFAIL.json \
106+
$(tests_srcdir)/.venv.done.log \
107+
$(top_srcdir)/.ontology.done.log \
108+
$(top_srcdir)/case_utils/case_validate/__init__.py \
109+
$(top_srcdir)/case_utils/case_validate/validate_types.py \
110+
$(top_srcdir)/case_utils/case_validate/validate_utils.py \
111+
$(top_srcdir)/case_utils/ontology/__init__.py
112+
source $(tests_srcdir)/venv/bin/activate \
113+
&& case_validate \
114+
--allow-warnings \
115+
--debug \
116+
--format turtle \
117+
--review-tbox \
118+
$< \
119+
> __$@ \
120+
; rc=$$? ; test 0 -eq $$rc -o 1 -eq $$rc
121+
@#Fail if output is empty.
122+
@test -s __$@ \
123+
|| exit 1
124+
java -jar $(RDF_TOOLKIT_JAR) \
125+
--inline-blank-nodes \
126+
--source __$@ \
127+
--source-format turtle \
128+
--target _$@ \
129+
--target-format turtle
130+
rm __$@
131+
mv _$@ $@
132+
102133
check: \
103134
$(validation_ttls)
104135
source $(tests_srcdir)/venv/bin/activate \

0 commit comments

Comments
 (0)