Skip to content

Commit 39f8f11

Browse files
authored
Merge pull request #121 from casework/add_case_validate_get_invalid_cdo_concepts
case_validate: Expose CDO IRI typo-checker as function
2 parents e51dc87 + 1c636e0 commit 39f8f11

File tree

1 file changed

+100
-60
lines changed

1 file changed

+100
-60
lines changed

case_utils/case_validate/__init__.py

Lines changed: 100 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,104 @@ def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
7171
) or concept_iri.startswith("https://ontology.caseontology.org/")
7272

7373

74+
def get_invalid_cdo_concepts(
75+
data_graph: rdflib.Graph, ontology_graph: rdflib.Graph
76+
) -> Set[rdflib.URIRef]:
77+
"""
78+
Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument.
79+
80+
:param data_graph: The data graph to validate.
81+
:param ontology_graph: The ontology graph to use for validation.
82+
:return: The list of concepts in the data graph that are not part of the CDO ontology.
83+
84+
>>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE
85+
>>> from rdflib import Graph, Literal, Namespace, URIRef
86+
>>> # Define a namespace for a knowledge base, and a namespace for custom extensions.
87+
>>> ns_kb = Namespace("http://example.org/kb/")
88+
>>> ns_ex = Namespace("http://example.org/ontology/")
89+
>>> dg = Graph()
90+
>>> og = Graph()
91+
>>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property.
92+
>>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class))
93+
>>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty))
94+
>>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty))
95+
>>> # Define an individual.
96+
>>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"]
97+
>>> n_uco_object
98+
rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c')
99+
>>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK.
100+
>>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject))
101+
>>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test")))
102+
>>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value")))
103+
>>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og)
104+
>>> invalid_cdo_concepts
105+
{rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')}
106+
>>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported.
107+
>>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts
108+
"""
109+
# Construct set of CDO concepts for data graph concept-existence review.
110+
cdo_concepts: Set[rdflib.URIRef] = set()
111+
112+
for n_structural_class in [
113+
NS_OWL.Class,
114+
NS_OWL.AnnotationProperty,
115+
NS_OWL.DatatypeProperty,
116+
NS_OWL.ObjectProperty,
117+
NS_RDFS.Datatype,
118+
NS_SH.NodeShape,
119+
NS_SH.PropertyShape,
120+
NS_SH.Shape,
121+
]:
122+
for ontology_triple in ontology_graph.triples(
123+
(None, NS_RDF.type, n_structural_class)
124+
):
125+
if not isinstance(ontology_triple[0], rdflib.URIRef):
126+
continue
127+
if concept_is_cdo_concept(ontology_triple[0]):
128+
cdo_concepts.add(ontology_triple[0])
129+
for n_ontology_predicate in [
130+
NS_OWL.backwardCompatibleWith,
131+
NS_OWL.imports,
132+
NS_OWL.incompatibleWith,
133+
NS_OWL.priorVersion,
134+
NS_OWL.versionIRI,
135+
]:
136+
for ontology_triple in ontology_graph.triples(
137+
(None, n_ontology_predicate, None)
138+
):
139+
assert isinstance(ontology_triple[0], rdflib.URIRef)
140+
assert isinstance(ontology_triple[2], rdflib.URIRef)
141+
cdo_concepts.add(ontology_triple[0])
142+
cdo_concepts.add(ontology_triple[2])
143+
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
144+
if not isinstance(ontology_triple[0], rdflib.URIRef):
145+
continue
146+
cdo_concepts.add(ontology_triple[0])
147+
148+
# Also load historical ontology and version IRIs.
149+
ontology_and_version_iris_data = importlib.resources.read_text(
150+
case_utils.ontology, "ontology_and_version_iris.txt"
151+
)
152+
for line in ontology_and_version_iris_data.split("\n"):
153+
cleaned_line = line.strip()
154+
if cleaned_line == "":
155+
continue
156+
cdo_concepts.add(rdflib.URIRef(cleaned_line))
157+
158+
data_cdo_concepts: Set[rdflib.URIRef] = set()
159+
for data_triple in data_graph.triples((None, None, None)):
160+
for data_triple_member in data_triple:
161+
if isinstance(data_triple_member, rdflib.URIRef):
162+
if concept_is_cdo_concept(data_triple_member):
163+
data_cdo_concepts.add(data_triple_member)
164+
elif isinstance(data_triple_member, rdflib.Literal):
165+
if isinstance(data_triple_member.datatype, rdflib.URIRef):
166+
if concept_is_cdo_concept(data_triple_member.datatype):
167+
data_cdo_concepts.add(data_triple_member.datatype)
168+
169+
return data_cdo_concepts - cdo_concepts
170+
171+
74172
def main() -> None:
75173
parser = argparse.ArgumentParser(
76174
description="CASE wrapper to pySHACL command line tool."
@@ -181,67 +279,9 @@ def main() -> None:
181279
_logger.debug("arg_ontology_graph = %r.", arg_ontology_graph)
182280
ontology_graph.parse(arg_ontology_graph)
183281

184-
# Construct set of CDO concepts for data graph concept-existence review.
185-
cdo_concepts: Set[rdflib.URIRef] = set()
186-
187-
for n_structural_class in [
188-
NS_OWL.Class,
189-
NS_OWL.AnnotationProperty,
190-
NS_OWL.DatatypeProperty,
191-
NS_OWL.ObjectProperty,
192-
NS_RDFS.Datatype,
193-
NS_SH.NodeShape,
194-
NS_SH.PropertyShape,
195-
NS_SH.Shape,
196-
]:
197-
for ontology_triple in ontology_graph.triples(
198-
(None, NS_RDF.type, n_structural_class)
199-
):
200-
if not isinstance(ontology_triple[0], rdflib.URIRef):
201-
continue
202-
if concept_is_cdo_concept(ontology_triple[0]):
203-
cdo_concepts.add(ontology_triple[0])
204-
for n_ontology_predicate in [
205-
NS_OWL.backwardCompatibleWith,
206-
NS_OWL.imports,
207-
NS_OWL.incompatibleWith,
208-
NS_OWL.priorVersion,
209-
NS_OWL.versionIRI,
210-
]:
211-
for ontology_triple in ontology_graph.triples(
212-
(None, n_ontology_predicate, None)
213-
):
214-
assert isinstance(ontology_triple[0], rdflib.URIRef)
215-
assert isinstance(ontology_triple[2], rdflib.URIRef)
216-
cdo_concepts.add(ontology_triple[0])
217-
cdo_concepts.add(ontology_triple[2])
218-
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
219-
if not isinstance(ontology_triple[0], rdflib.URIRef):
220-
continue
221-
cdo_concepts.add(ontology_triple[0])
222-
223-
# Also load historical ontology and version IRIs.
224-
ontology_and_version_iris_data = importlib.resources.read_text(
225-
case_utils.ontology, "ontology_and_version_iris.txt"
226-
)
227-
for line in ontology_and_version_iris_data.split("\n"):
228-
cleaned_line = line.strip()
229-
if cleaned_line == "":
230-
continue
231-
cdo_concepts.add(rdflib.URIRef(cleaned_line))
232-
233-
data_cdo_concepts: Set[rdflib.URIRef] = set()
234-
for data_triple in data_graph.triples((None, None, None)):
235-
for data_triple_member in data_triple:
236-
if isinstance(data_triple_member, rdflib.URIRef):
237-
if concept_is_cdo_concept(data_triple_member):
238-
data_cdo_concepts.add(data_triple_member)
239-
elif isinstance(data_triple_member, rdflib.Literal):
240-
if isinstance(data_triple_member.datatype, rdflib.URIRef):
241-
if concept_is_cdo_concept(data_triple_member.datatype):
242-
data_cdo_concepts.add(data_triple_member.datatype)
282+
# Get the list of undefined CDO concepts in the graph
283+
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph)
243284

244-
undefined_cdo_concepts = data_cdo_concepts - cdo_concepts
245285
for undefined_cdo_concept in sorted(undefined_cdo_concepts):
246286
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning)
247287
undefined_cdo_concepts_message = (

0 commit comments

Comments
 (0)