|
32 | 32 | __version__ = "0.3.0"
|
33 | 33 |
|
34 | 34 | import argparse
|
35 |
| -import importlib.resources |
36 | 35 | import logging
|
37 | 36 | import os
|
38 | 37 | import sys
|
39 | 38 | import warnings
|
40 |
| -from typing import Any, Dict, List, Optional, Set, Tuple, Union |
| 39 | +from typing import Any, Dict, List, Optional, Tuple, Union |
41 | 40 |
|
42 | 41 | import pyshacl # type: ignore
|
43 | 42 | import rdflib
|
44 | 43 | from rdflib import Graph
|
45 | 44 |
|
46 |
| -import case_utils.ontology |
| 45 | +from case_utils.case_validate.validate_types import ( |
| 46 | + NonExistentCDOConceptWarning, |
| 47 | + ValidationResult, |
| 48 | +) |
| 49 | +from case_utils.case_validate.validate_utils import ( |
| 50 | + get_invalid_cdo_concepts, |
| 51 | + get_ontology_graph, |
| 52 | +) |
47 | 53 | from case_utils.ontology.version_info import (
|
48 | 54 | CURRENT_CASE_VERSION,
|
49 | 55 | built_version_choices_list,
|
50 | 56 | )
|
51 | 57 |
|
52 |
| -NS_OWL = rdflib.OWL |
53 |
| -NS_RDF = rdflib.RDF |
54 |
| -NS_RDFS = rdflib.RDFS |
55 |
| -NS_SH = rdflib.SH |
56 |
| - |
57 | 58 | _logger = logging.getLogger(os.path.basename(__file__))
|
58 | 59 |
|
59 | 60 |
|
60 |
| -class NonExistentCDOConceptWarning(UserWarning): |
61 |
| - """ |
62 |
| - This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags. |
63 |
| - """ |
64 |
| - |
65 |
| - pass |
66 |
| - |
67 |
| - |
68 |
| -class NonExistentCASEVersionError(Exception): |
69 |
| - """ |
70 |
| - This class is used when an invalid CASE version is requested that is not supported by the library. |
71 |
| - """ |
72 |
| - |
73 |
| - pass |
74 |
| - |
75 |
| - |
76 |
| -class ValidationResult: |
77 |
| - def __init__( |
78 |
| - self, |
79 |
| - conforms: bool, |
80 |
| - graph: Union[Exception, bytes, str, rdflib.Graph], |
81 |
| - text: str, |
82 |
| - undefined_concepts: Set[rdflib.URIRef], |
83 |
| - ) -> None: |
84 |
| - self.conforms = conforms |
85 |
| - self.graph = graph |
86 |
| - self.text = text |
87 |
| - self.undefined_concepts = undefined_concepts |
88 |
| - |
89 |
| - |
90 |
| -def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool: |
91 |
| - """ |
92 |
| - Determine if a concept is part of the CDO ontology. |
93 |
| -
|
94 |
| - :param n_concept: The concept to check. |
95 |
| - :return: whether the concept is part of the CDO ontologies. |
96 |
| - """ |
97 |
| - concept_iri = str(n_concept) |
98 |
| - return concept_iri.startswith( |
99 |
| - "https://ontology.unifiedcyberontology.org/" |
100 |
| - ) or concept_iri.startswith("https://ontology.caseontology.org/") |
101 |
| - |
102 |
| - |
103 |
| -def get_invalid_cdo_concepts( |
104 |
| - data_graph: rdflib.Graph, ontology_graph: rdflib.Graph |
105 |
| -) -> Set[rdflib.URIRef]: |
106 |
| - """ |
107 |
| - Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument. |
108 |
| -
|
109 |
| - :param data_graph: The data graph to validate. |
110 |
| - :param ontology_graph: The ontology graph to use for validation. |
111 |
| - :return: The list of concepts in the data graph that are not part of the CDO ontology. |
112 |
| -
|
113 |
| - >>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE |
114 |
| - >>> from rdflib import Graph, Literal, Namespace, URIRef |
115 |
| - >>> # Define a namespace for a knowledge base, and a namespace for custom extensions. |
116 |
| - >>> ns_kb = Namespace("http://example.org/kb/") |
117 |
| - >>> ns_ex = Namespace("http://example.org/ontology/") |
118 |
| - >>> dg = Graph() |
119 |
| - >>> og = Graph() |
120 |
| - >>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property. |
121 |
| - >>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class)) |
122 |
| - >>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty)) |
123 |
| - >>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty)) |
124 |
| - >>> # Define an individual. |
125 |
| - >>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"] |
126 |
| - >>> n_uco_object |
127 |
| - rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c') |
128 |
| - >>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK. |
129 |
| - >>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject)) |
130 |
| - >>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test"))) |
131 |
| - >>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value"))) |
132 |
| - >>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og) |
133 |
| - >>> invalid_cdo_concepts |
134 |
| - {rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')} |
135 |
| - >>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported. |
136 |
| - >>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts |
137 |
| - """ |
138 |
| - # Construct set of CDO concepts for data graph concept-existence review. |
139 |
| - cdo_concepts: Set[rdflib.URIRef] = set() |
140 |
| - |
141 |
| - for n_structural_class in [ |
142 |
| - NS_OWL.Class, |
143 |
| - NS_OWL.AnnotationProperty, |
144 |
| - NS_OWL.DatatypeProperty, |
145 |
| - NS_OWL.ObjectProperty, |
146 |
| - NS_RDFS.Datatype, |
147 |
| - NS_SH.NodeShape, |
148 |
| - NS_SH.PropertyShape, |
149 |
| - NS_SH.Shape, |
150 |
| - ]: |
151 |
| - for ontology_triple in ontology_graph.triples( |
152 |
| - (None, NS_RDF.type, n_structural_class) |
153 |
| - ): |
154 |
| - if not isinstance(ontology_triple[0], rdflib.URIRef): |
155 |
| - continue |
156 |
| - if concept_is_cdo_concept(ontology_triple[0]): |
157 |
| - cdo_concepts.add(ontology_triple[0]) |
158 |
| - for n_ontology_predicate in [ |
159 |
| - NS_OWL.backwardCompatibleWith, |
160 |
| - NS_OWL.imports, |
161 |
| - NS_OWL.incompatibleWith, |
162 |
| - NS_OWL.priorVersion, |
163 |
| - NS_OWL.versionIRI, |
164 |
| - ]: |
165 |
| - for ontology_triple in ontology_graph.triples( |
166 |
| - (None, n_ontology_predicate, None) |
167 |
| - ): |
168 |
| - assert isinstance(ontology_triple[0], rdflib.URIRef) |
169 |
| - assert isinstance(ontology_triple[2], rdflib.URIRef) |
170 |
| - cdo_concepts.add(ontology_triple[0]) |
171 |
| - cdo_concepts.add(ontology_triple[2]) |
172 |
| - for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)): |
173 |
| - if not isinstance(ontology_triple[0], rdflib.URIRef): |
174 |
| - continue |
175 |
| - cdo_concepts.add(ontology_triple[0]) |
176 |
| - |
177 |
| - # Also load historical ontology and version IRIs. |
178 |
| - ontology_and_version_iris_data = importlib.resources.read_text( |
179 |
| - case_utils.ontology, "ontology_and_version_iris.txt" |
180 |
| - ) |
181 |
| - for line in ontology_and_version_iris_data.split("\n"): |
182 |
| - cleaned_line = line.strip() |
183 |
| - if cleaned_line == "": |
184 |
| - continue |
185 |
| - cdo_concepts.add(rdflib.URIRef(cleaned_line)) |
186 |
| - |
187 |
| - data_cdo_concepts: Set[rdflib.URIRef] = set() |
188 |
| - for data_triple in data_graph.triples((None, None, None)): |
189 |
| - for data_triple_member in data_triple: |
190 |
| - if isinstance(data_triple_member, rdflib.URIRef): |
191 |
| - if concept_is_cdo_concept(data_triple_member): |
192 |
| - data_cdo_concepts.add(data_triple_member) |
193 |
| - elif isinstance(data_triple_member, rdflib.Literal): |
194 |
| - if isinstance(data_triple_member.datatype, rdflib.URIRef): |
195 |
| - if concept_is_cdo_concept(data_triple_member.datatype): |
196 |
| - data_cdo_concepts.add(data_triple_member.datatype) |
197 |
| - |
198 |
| - return data_cdo_concepts - cdo_concepts |
199 |
| - |
200 |
| - |
201 |
| -def get_ontology_graph( |
202 |
| - case_version: Optional[str] = None, supplemental_graphs: Optional[List[str]] = None |
203 |
| -) -> rdflib.Graph: |
204 |
| - """ |
205 |
| - Get the ontology graph for the given case_version and any supplemental graphs. |
206 |
| -
|
207 |
| - :param case_version: the version of the CASE ontology to use. If None (i.e. null), the most recent version will be used. If "none" (the string), no pre-built version of CASE will be used. |
208 |
| - :param supplemental_graphs: a list of supplemental graphs to use. If None, no supplemental graphs will be used. |
209 |
| - :return: the ontology graph against which to validate the data graph. |
210 |
| - """ |
211 |
| - ontology_graph = rdflib.Graph() |
212 |
| - |
213 |
| - if case_version != "none": |
214 |
| - # Load bundled CASE ontology at requested version. |
215 |
| - if case_version is None or case_version == "": |
216 |
| - case_version = CURRENT_CASE_VERSION |
217 |
| - # If the first character case_version is numeric, prepend case- to it. This allows for the version to be passed |
218 |
| - # by the library as both case-1.2.0 and 1.2.0 |
219 |
| - if case_version[0].isdigit(): |
220 |
| - case_version = "case-" + case_version |
221 |
| - ttl_filename = case_version + ".ttl" |
222 |
| - _logger.debug("ttl_filename = %r.", ttl_filename) |
223 |
| - # Ensure the requested version of the CASE ontology is available and if not, throw an appropriate exception |
224 |
| - # that can be returned in a user-friendly message. |
225 |
| - if not importlib.resources.is_resource(case_utils.ontology, ttl_filename): |
226 |
| - raise NonExistentCASEVersionError( |
227 |
| - f"The requested version ({case_version}) of the CASE ontology is not available. Please choose a " |
228 |
| - f"different version. The latest supported version is: {CURRENT_CASE_VERSION}" |
229 |
| - ) |
230 |
| - ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename) |
231 |
| - ontology_graph.parse(data=ttl_data, format="turtle") |
232 |
| - |
233 |
| - if supplemental_graphs: |
234 |
| - for arg_ontology_graph in supplemental_graphs: |
235 |
| - _logger.debug("arg_ontology_graph = %r.", arg_ontology_graph) |
236 |
| - ontology_graph.parse(arg_ontology_graph) |
237 |
| - |
238 |
| - return ontology_graph |
239 |
| - |
240 |
| - |
241 | 61 | def validate(
|
242 | 62 | input_file: str,
|
243 | 63 | *args: Any,
|
|
0 commit comments