@@ -71,6 +71,104 @@ def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
71
71
) or concept_iri .startswith ("https://ontology.caseontology.org/" )
72
72
73
73
74
+ def get_invalid_cdo_concepts (
75
+ data_graph : rdflib .Graph , ontology_graph : rdflib .Graph
76
+ ) -> Set [rdflib .URIRef ]:
77
+ """
78
+ Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument.
79
+
80
+ :param data_graph: The data graph to validate.
81
+ :param ontology_graph: The ontology graph to use for validation.
82
+ :return: The list of concepts in the data graph that are not part of the CDO ontology.
83
+
84
+ >>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE
85
+ >>> from rdflib import Graph, Literal, Namespace, URIRef
86
+ >>> # Define a namespace for a knowledge base, and a namespace for custom extensions.
87
+ >>> ns_kb = Namespace("http://example.org/kb/")
88
+ >>> ns_ex = Namespace("http://example.org/ontology/")
89
+ >>> dg = Graph()
90
+ >>> og = Graph()
91
+ >>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property.
92
+ >>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class))
93
+ >>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty))
94
+ >>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty))
95
+ >>> # Define an individual.
96
+ >>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"]
97
+ >>> n_uco_object
98
+ rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c')
99
+ >>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK.
100
+ >>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject))
101
+ >>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test")))
102
+ >>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value")))
103
+ >>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og)
104
+ >>> invalid_cdo_concepts
105
+ {rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')}
106
+ >>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported.
107
+ >>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts
108
+ """
109
+ # Construct set of CDO concepts for data graph concept-existence review.
110
+ cdo_concepts : Set [rdflib .URIRef ] = set ()
111
+
112
+ for n_structural_class in [
113
+ NS_OWL .Class ,
114
+ NS_OWL .AnnotationProperty ,
115
+ NS_OWL .DatatypeProperty ,
116
+ NS_OWL .ObjectProperty ,
117
+ NS_RDFS .Datatype ,
118
+ NS_SH .NodeShape ,
119
+ NS_SH .PropertyShape ,
120
+ NS_SH .Shape ,
121
+ ]:
122
+ for ontology_triple in ontology_graph .triples (
123
+ (None , NS_RDF .type , n_structural_class )
124
+ ):
125
+ if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
126
+ continue
127
+ if concept_is_cdo_concept (ontology_triple [0 ]):
128
+ cdo_concepts .add (ontology_triple [0 ])
129
+ for n_ontology_predicate in [
130
+ NS_OWL .backwardCompatibleWith ,
131
+ NS_OWL .imports ,
132
+ NS_OWL .incompatibleWith ,
133
+ NS_OWL .priorVersion ,
134
+ NS_OWL .versionIRI ,
135
+ ]:
136
+ for ontology_triple in ontology_graph .triples (
137
+ (None , n_ontology_predicate , None )
138
+ ):
139
+ assert isinstance (ontology_triple [0 ], rdflib .URIRef )
140
+ assert isinstance (ontology_triple [2 ], rdflib .URIRef )
141
+ cdo_concepts .add (ontology_triple [0 ])
142
+ cdo_concepts .add (ontology_triple [2 ])
143
+ for ontology_triple in ontology_graph .triples ((None , NS_RDF .type , NS_OWL .Ontology )):
144
+ if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
145
+ continue
146
+ cdo_concepts .add (ontology_triple [0 ])
147
+
148
+ # Also load historical ontology and version IRIs.
149
+ ontology_and_version_iris_data = importlib .resources .read_text (
150
+ case_utils .ontology , "ontology_and_version_iris.txt"
151
+ )
152
+ for line in ontology_and_version_iris_data .split ("\n " ):
153
+ cleaned_line = line .strip ()
154
+ if cleaned_line == "" :
155
+ continue
156
+ cdo_concepts .add (rdflib .URIRef (cleaned_line ))
157
+
158
+ data_cdo_concepts : Set [rdflib .URIRef ] = set ()
159
+ for data_triple in data_graph .triples ((None , None , None )):
160
+ for data_triple_member in data_triple :
161
+ if isinstance (data_triple_member , rdflib .URIRef ):
162
+ if concept_is_cdo_concept (data_triple_member ):
163
+ data_cdo_concepts .add (data_triple_member )
164
+ elif isinstance (data_triple_member , rdflib .Literal ):
165
+ if isinstance (data_triple_member .datatype , rdflib .URIRef ):
166
+ if concept_is_cdo_concept (data_triple_member .datatype ):
167
+ data_cdo_concepts .add (data_triple_member .datatype )
168
+
169
+ return data_cdo_concepts - cdo_concepts
170
+
171
+
74
172
def main () -> None :
75
173
parser = argparse .ArgumentParser (
76
174
description = "CASE wrapper to pySHACL command line tool."
@@ -181,67 +279,9 @@ def main() -> None:
181
279
_logger .debug ("arg_ontology_graph = %r." , arg_ontology_graph )
182
280
ontology_graph .parse (arg_ontology_graph )
183
281
184
- # Construct set of CDO concepts for data graph concept-existence review.
185
- cdo_concepts : Set [rdflib .URIRef ] = set ()
186
-
187
- for n_structural_class in [
188
- NS_OWL .Class ,
189
- NS_OWL .AnnotationProperty ,
190
- NS_OWL .DatatypeProperty ,
191
- NS_OWL .ObjectProperty ,
192
- NS_RDFS .Datatype ,
193
- NS_SH .NodeShape ,
194
- NS_SH .PropertyShape ,
195
- NS_SH .Shape ,
196
- ]:
197
- for ontology_triple in ontology_graph .triples (
198
- (None , NS_RDF .type , n_structural_class )
199
- ):
200
- if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
201
- continue
202
- if concept_is_cdo_concept (ontology_triple [0 ]):
203
- cdo_concepts .add (ontology_triple [0 ])
204
- for n_ontology_predicate in [
205
- NS_OWL .backwardCompatibleWith ,
206
- NS_OWL .imports ,
207
- NS_OWL .incompatibleWith ,
208
- NS_OWL .priorVersion ,
209
- NS_OWL .versionIRI ,
210
- ]:
211
- for ontology_triple in ontology_graph .triples (
212
- (None , n_ontology_predicate , None )
213
- ):
214
- assert isinstance (ontology_triple [0 ], rdflib .URIRef )
215
- assert isinstance (ontology_triple [2 ], rdflib .URIRef )
216
- cdo_concepts .add (ontology_triple [0 ])
217
- cdo_concepts .add (ontology_triple [2 ])
218
- for ontology_triple in ontology_graph .triples ((None , NS_RDF .type , NS_OWL .Ontology )):
219
- if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
220
- continue
221
- cdo_concepts .add (ontology_triple [0 ])
222
-
223
- # Also load historical ontology and version IRIs.
224
- ontology_and_version_iris_data = importlib .resources .read_text (
225
- case_utils .ontology , "ontology_and_version_iris.txt"
226
- )
227
- for line in ontology_and_version_iris_data .split ("\n " ):
228
- cleaned_line = line .strip ()
229
- if cleaned_line == "" :
230
- continue
231
- cdo_concepts .add (rdflib .URIRef (cleaned_line ))
232
-
233
- data_cdo_concepts : Set [rdflib .URIRef ] = set ()
234
- for data_triple in data_graph .triples ((None , None , None )):
235
- for data_triple_member in data_triple :
236
- if isinstance (data_triple_member , rdflib .URIRef ):
237
- if concept_is_cdo_concept (data_triple_member ):
238
- data_cdo_concepts .add (data_triple_member )
239
- elif isinstance (data_triple_member , rdflib .Literal ):
240
- if isinstance (data_triple_member .datatype , rdflib .URIRef ):
241
- if concept_is_cdo_concept (data_triple_member .datatype ):
242
- data_cdo_concepts .add (data_triple_member .datatype )
282
+ # Get the list of undefined CDO concepts in the graph
283
+ undefined_cdo_concepts = get_invalid_cdo_concepts (data_graph , ontology_graph )
243
284
244
- undefined_cdo_concepts = data_cdo_concepts - cdo_concepts
245
285
for undefined_cdo_concept in sorted (undefined_cdo_concepts ):
246
286
warnings .warn (undefined_cdo_concept , NonExistentCDOConceptWarning )
247
287
undefined_cdo_concepts_message = (
0 commit comments