Skip to content

Commit bded343

Browse files
authored
Merge pull request #118 from casework/library-usage
Library Usage
2 parents 85cf1d8 + 90f5c8c commit bded343

File tree

8 files changed

+360
-179
lines changed

8 files changed

+360
-179
lines changed

case_utils/case_validate/__init__.py

Lines changed: 94 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
#!/usr/bin/env python3
22

3+
# Portions of this file contributed by NIST are governed by the following
4+
# statement:
5+
#
36
# This software was developed at the National Institute of Standards
47
# and Technology by employees of the Federal Government in the course
5-
# of their official duties. Pursuant to title 17 Section 105 of the
6-
# United States Code this software is not subject to copyright
7-
# protection and is in the public domain. NIST assumes no
8-
# responsibility whatsoever for its use by other parties, and makes
9-
# no guarantees, expressed or implied, about its quality,
10-
# reliability, or any other characteristic.
8+
# of their official duties. Pursuant to Title 17 Section 105 of the
9+
# United States Code, this software is not subject to copyright
10+
# protection within the United States. NIST assumes no responsibility
11+
# whatsoever for its use by other parties, and makes no guarantees,
12+
# expressed or implied, about its quality, reliability, or any other
13+
# characteristic.
1114
#
1215
# We would appreciate acknowledgement if the software is used.
1316

@@ -32,141 +35,105 @@
3235
__version__ = "0.3.0"
3336

3437
import argparse
35-
import importlib.resources
3638
import logging
3739
import os
3840
import sys
3941
import warnings
40-
from typing import Dict, Set, Tuple, Union
42+
from typing import Any, Dict, List, Optional, Tuple, Union
4143

4244
import pyshacl # type: ignore
4345
import rdflib
46+
from rdflib import Graph
4447

45-
import case_utils.ontology
48+
from case_utils.case_validate.validate_types import (
49+
NonExistentCDOConceptWarning,
50+
ValidationResult,
51+
)
52+
from case_utils.case_validate.validate_utils import (
53+
get_invalid_cdo_concepts,
54+
get_ontology_graph,
55+
)
4656
from case_utils.ontology.version_info import (
4757
CURRENT_CASE_VERSION,
4858
built_version_choices_list,
4959
)
5060

51-
NS_OWL = rdflib.OWL
52-
NS_RDF = rdflib.RDF
53-
NS_RDFS = rdflib.RDFS
54-
NS_SH = rdflib.SH
55-
5661
_logger = logging.getLogger(os.path.basename(__file__))
5762

5863

59-
class NonExistentCDOConceptWarning(UserWarning):
64+
def validate(
65+
input_file: Union[List[str], str],
66+
*args: Any,
67+
case_version: Optional[str] = None,
68+
supplemental_graphs: Optional[List[str]] = None,
69+
**kwargs: Any,
70+
) -> ValidationResult:
6071
"""
61-
This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags.
72+
Validate the given data graph against the given CASE ontology version and supplemental graphs.
73+
74+
:param *args: The positional arguments to pass to the underlying pyshacl.validate function.
75+
:param input_file: The path to the file containing the data graph to validate. This can also be a list of paths to files containing data graphs to pool together.
76+
:param case_version: The version of the CASE ontology to use (e.g. 1.2.0). If None, the most recent version will be used.
77+
:param supplemental_graphs: File paths to supplemental graphs to use. If None, no supplemental graphs will be used.
78+
:param allow_warnings: In addition to affecting the conformance of SHACL validation, this will affect conformance based on unrecognized CDO concepts (likely, misspelled or miscapitalized) in the data graph. If allow_warnings is not True, any unrecognized concept using a CDO IRI prefix will cause conformance to be False.
79+
:param inference: The type of inference to use. If "none" (type str), no inference will be used. If None (type NoneType), pyshacl defaults will be used. Note that at the time of this writing (pySHACL 0.23.0), pyshacl defaults are no inferencing for the data graph, and RDFS inferencing for the SHACL graph, which for case_utils.validate includes the SHACL and OWL graphs.
80+
:param **kwargs: The keyword arguments to pass to the underlying pyshacl.validate function.
81+
:return: The validation result object containing the defined properties.
6282
"""
83+
# Convert the data graph string to a rdflib.Graph object.
84+
data_graph = rdflib.Graph()
85+
if isinstance(input_file, str):
86+
data_graph.parse(input_file)
87+
elif isinstance(input_file, list):
88+
for _data_graph_file in input_file:
89+
_logger.debug("_data_graph_file = %r.", _data_graph_file)
90+
if not isinstance(_data_graph_file, str):
91+
raise TypeError("Expected str, received %s." % type(_data_graph_file))
92+
data_graph.parse(_data_graph_file)
93+
94+
# Get the ontology graph from the case_version and supplemental_graphs arguments
95+
ontology_graph: Graph = get_ontology_graph(case_version, supplemental_graphs)
96+
97+
# Get the undefined CDO concepts.
98+
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph)
6399

64-
pass
100+
# Warn about typo'd concepts before performing SHACL review.
101+
for undefined_cdo_concept in sorted(undefined_cdo_concepts):
102+
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning)
103+
undefined_cdo_concepts_message = (
104+
"There were %d concepts with CDO IRIs in the data graph that are not in the ontology graph."
105+
% len(undefined_cdo_concepts)
106+
)
65107

108+
# Validate data graph against ontology graph.
109+
validate_result: Tuple[
110+
bool, Union[Exception, bytes, str, rdflib.Graph], str
111+
] = pyshacl.validate(
112+
data_graph,
113+
*args,
114+
ont_graph=ontology_graph,
115+
shacl_graph=ontology_graph,
116+
**kwargs,
117+
)
66118

67-
def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
68-
concept_iri = str(n_concept)
69-
return concept_iri.startswith(
70-
"https://ontology.unifiedcyberontology.org/"
71-
) or concept_iri.startswith("https://ontology.caseontology.org/")
119+
# Relieve RAM of the data graph after validation has run.
120+
del data_graph
72121

122+
conforms = validate_result[0]
73123

74-
def get_invalid_cdo_concepts(
75-
data_graph: rdflib.Graph, ontology_graph: rdflib.Graph
76-
) -> Set[rdflib.URIRef]:
77-
"""
78-
Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument.
79-
80-
:param data_graph: The data graph to validate.
81-
:param ontology_graph: The ontology graph to use for validation.
82-
:return: The list of concepts in the data graph that are not part of the CDO ontology.
83-
84-
>>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE
85-
>>> from rdflib import Graph, Literal, Namespace, URIRef
86-
>>> # Define a namespace for a knowledge base, and a namespace for custom extensions.
87-
>>> ns_kb = Namespace("http://example.org/kb/")
88-
>>> ns_ex = Namespace("http://example.org/ontology/")
89-
>>> dg = Graph()
90-
>>> og = Graph()
91-
>>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property.
92-
>>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class))
93-
>>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty))
94-
>>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty))
95-
>>> # Define an individual.
96-
>>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"]
97-
>>> n_uco_object
98-
rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c')
99-
>>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK.
100-
>>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject))
101-
>>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test")))
102-
>>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value")))
103-
>>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og)
104-
>>> invalid_cdo_concepts
105-
{rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')}
106-
>>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported.
107-
>>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts
108-
"""
109-
# Construct set of CDO concepts for data graph concept-existence review.
110-
cdo_concepts: Set[rdflib.URIRef] = set()
111-
112-
for n_structural_class in [
113-
NS_OWL.Class,
114-
NS_OWL.AnnotationProperty,
115-
NS_OWL.DatatypeProperty,
116-
NS_OWL.ObjectProperty,
117-
NS_RDFS.Datatype,
118-
NS_SH.NodeShape,
119-
NS_SH.PropertyShape,
120-
NS_SH.Shape,
121-
]:
122-
for ontology_triple in ontology_graph.triples(
123-
(None, NS_RDF.type, n_structural_class)
124-
):
125-
if not isinstance(ontology_triple[0], rdflib.URIRef):
126-
continue
127-
if concept_is_cdo_concept(ontology_triple[0]):
128-
cdo_concepts.add(ontology_triple[0])
129-
for n_ontology_predicate in [
130-
NS_OWL.backwardCompatibleWith,
131-
NS_OWL.imports,
132-
NS_OWL.incompatibleWith,
133-
NS_OWL.priorVersion,
134-
NS_OWL.versionIRI,
135-
]:
136-
for ontology_triple in ontology_graph.triples(
137-
(None, n_ontology_predicate, None)
138-
):
139-
assert isinstance(ontology_triple[0], rdflib.URIRef)
140-
assert isinstance(ontology_triple[2], rdflib.URIRef)
141-
cdo_concepts.add(ontology_triple[0])
142-
cdo_concepts.add(ontology_triple[2])
143-
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
144-
if not isinstance(ontology_triple[0], rdflib.URIRef):
145-
continue
146-
cdo_concepts.add(ontology_triple[0])
147-
148-
# Also load historical ontology and version IRIs.
149-
ontology_and_version_iris_data = importlib.resources.read_text(
150-
case_utils.ontology, "ontology_and_version_iris.txt"
124+
if len(undefined_cdo_concepts) > 0:
125+
warnings.warn(undefined_cdo_concepts_message)
126+
if not kwargs.get("allow_warnings"):
127+
undefined_cdo_concepts_alleviation_message = "The data graph is SHACL-conformant with the CDO ontologies, but nonexistent-concept references raise Warnings with this tool. Please either correct the concept names in the data graph; use the --ontology-graph flag to pass a corrected CDO ontology file, also using --built-version none; or, use the --allow-warnings flag."
128+
warnings.warn(undefined_cdo_concepts_alleviation_message)
129+
conforms = False
130+
131+
return ValidationResult(
132+
conforms,
133+
validate_result[1],
134+
validate_result[2],
135+
undefined_cdo_concepts,
151136
)
152-
for line in ontology_and_version_iris_data.split("\n"):
153-
cleaned_line = line.strip()
154-
if cleaned_line == "":
155-
continue
156-
cdo_concepts.add(rdflib.URIRef(cleaned_line))
157-
158-
data_cdo_concepts: Set[rdflib.URIRef] = set()
159-
for data_triple in data_graph.triples((None, None, None)):
160-
for data_triple_member in data_triple:
161-
if isinstance(data_triple_member, rdflib.URIRef):
162-
if concept_is_cdo_concept(data_triple_member):
163-
data_cdo_concepts.add(data_triple_member)
164-
elif isinstance(data_triple_member, rdflib.Literal):
165-
if isinstance(data_triple_member.datatype, rdflib.URIRef):
166-
if concept_is_cdo_concept(data_triple_member.datatype):
167-
data_cdo_concepts.add(data_triple_member.datatype)
168-
169-
return data_cdo_concepts - cdo_concepts
170137

171138

172139
def main() -> None:
@@ -263,32 +230,6 @@ def main() -> None:
263230

264231
args = parser.parse_args()
265232

266-
data_graph = rdflib.Graph()
267-
for in_graph in args.in_graph:
268-
_logger.debug("in_graph = %r.", in_graph)
269-
data_graph.parse(in_graph)
270-
271-
ontology_graph = rdflib.Graph()
272-
if args.built_version != "none":
273-
ttl_filename = args.built_version + ".ttl"
274-
_logger.debug("ttl_filename = %r.", ttl_filename)
275-
ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename)
276-
ontology_graph.parse(data=ttl_data, format="turtle")
277-
if args.ontology_graph:
278-
for arg_ontology_graph in args.ontology_graph:
279-
_logger.debug("arg_ontology_graph = %r.", arg_ontology_graph)
280-
ontology_graph.parse(arg_ontology_graph)
281-
282-
# Get the list of undefined CDO concepts in the graph
283-
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph)
284-
285-
for undefined_cdo_concept in sorted(undefined_cdo_concepts):
286-
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning)
287-
undefined_cdo_concepts_message = (
288-
"There were %d concepts with CDO IRIs in the data graph that are not in the ontology graph."
289-
% len(undefined_cdo_concepts)
290-
)
291-
292233
# Determine output format.
293234
# pySHACL's determination of output formatting is handled solely
294235
# through the -f flag. Other CASE CLI tools handle format
@@ -299,27 +240,23 @@ def main() -> None:
299240
if args.format != "human":
300241
validator_kwargs["serialize_report_graph"] = args.format
301242

302-
validate_result: Tuple[bool, Union[Exception, bytes, str, rdflib.Graph], str]
303-
validate_result = pyshacl.validate(
304-
data_graph,
305-
shacl_graph=ontology_graph,
306-
ont_graph=ontology_graph,
307-
inference=args.inference,
308-
meta_shacl=args.metashacl,
243+
validation_result: ValidationResult = validate(
244+
args.in_graph,
309245
abort_on_first=args.abort,
310246
allow_infos=True if args.allow_infos else False,
311247
allow_warnings=True if args.allow_warnings else False,
248+
case_version=args.built_version,
312249
debug=True if args.debug else False,
313250
do_owl_imports=True if args.imports else False,
314-
**validator_kwargs
251+
inference=args.inference,
252+
meta_shacl=args.metashacl,
253+
supplemental_graphs=args.ontology_graph,
254+
**validator_kwargs,
315255
)
316256

317-
# Relieve RAM of the data graph after validation has run.
318-
del data_graph
319-
320-
conforms = validate_result[0]
321-
validation_graph = validate_result[1]
322-
validation_text = validate_result[2]
257+
conforms = validation_result.conforms
258+
validation_graph = validation_result.graph
259+
validation_text = validation_result.text
323260

324261
# NOTE: The output logistics code is adapted from pySHACL's file
325262
# pyshacl/cli.py. This section should be monitored for code drift.
@@ -341,13 +278,6 @@ def main() -> None:
341278
% type(validation_graph)
342279
)
343280

344-
if len(undefined_cdo_concepts) > 0:
345-
warnings.warn(undefined_cdo_concepts_message)
346-
if not args.allow_warnings:
347-
undefined_cdo_concepts_alleviation_message = "The data graph is SHACL-conformant with the CDO ontologies, but nonexistent-concept references raise Warnings with this tool. Please either correct the concept names in the data graph; use the --ontology-graph flag to pass a corrected CDO ontology file, also using --built-version none; or, use the --allow-warnings flag."
348-
warnings.warn(undefined_cdo_concepts_alleviation_message)
349-
conforms = False
350-
351281
sys.exit(0 if conforms else 1)
352282

353283

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python3
2+
3+
# Portions of this file contributed by NIST are governed by the following
4+
# statement:
5+
#
6+
# This software was developed at the National Institute of Standards
7+
# and Technology by employees of the Federal Government in the course
8+
# of their official duties. Pursuant to Title 17 Section 105 of the
9+
# United States Code, this software is not subject to copyright
10+
# protection within the United States. NIST assumes no responsibility
11+
# whatsoever for its use by other parties, and makes no guarantees,
12+
# expressed or implied, about its quality, reliability, or any other
13+
# characteristic.
14+
#
15+
# We would appreciate acknowledgement if the software is used.
16+
17+
from typing import Set, Union
18+
19+
import rdflib
20+
21+
22+
class ValidationResult:
23+
def __init__(
24+
self,
25+
conforms: bool,
26+
graph: Union[Exception, bytes, str, rdflib.Graph],
27+
text: str,
28+
undefined_concepts: Set[rdflib.URIRef],
29+
) -> None:
30+
self.conforms = conforms
31+
self.graph = graph
32+
self.text = text
33+
self.undefined_concepts = undefined_concepts
34+
35+
36+
class NonExistentCDOConceptWarning(UserWarning):
37+
"""
38+
This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags.
39+
"""
40+
41+
pass
42+
43+
44+
class NonExistentCASEVersionError(Exception):
45+
"""
46+
This class is used when an invalid CASE version is requested that is not supported by the library.
47+
"""
48+
49+
pass

0 commit comments

Comments
 (0)