Skip to content

Commit 73e4683

Browse files
committed
Separate types and utils into discrete files
1 parent 4208a99 commit 73e4683

File tree

3 files changed

+210
-189
lines changed

3 files changed

+210
-189
lines changed

case_utils/case_validate/__init__.py

Lines changed: 9 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -32,212 +32,32 @@
3232
__version__ = "0.3.0"
3333

3434
import argparse
35-
import importlib.resources
3635
import logging
3736
import os
3837
import sys
3938
import warnings
40-
from typing import Any, Dict, List, Optional, Set, Tuple, Union
39+
from typing import Any, Dict, List, Optional, Tuple, Union
4140

4241
import pyshacl # type: ignore
4342
import rdflib
4443
from rdflib import Graph
4544

46-
import case_utils.ontology
45+
from case_utils.case_validate.validate_types import (
46+
NonExistentCDOConceptWarning,
47+
ValidationResult,
48+
)
49+
from case_utils.case_validate.validate_utils import (
50+
get_invalid_cdo_concepts,
51+
get_ontology_graph,
52+
)
4753
from case_utils.ontology.version_info import (
4854
CURRENT_CASE_VERSION,
4955
built_version_choices_list,
5056
)
5157

52-
NS_OWL = rdflib.OWL
53-
NS_RDF = rdflib.RDF
54-
NS_RDFS = rdflib.RDFS
55-
NS_SH = rdflib.SH
56-
5758
_logger = logging.getLogger(os.path.basename(__file__))
5859

5960

60-
class NonExistentCDOConceptWarning(UserWarning):
61-
"""
62-
This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags.
63-
"""
64-
65-
pass
66-
67-
68-
class NonExistentCASEVersionError(Exception):
69-
"""
70-
This class is used when an invalid CASE version is requested that is not supported by the library.
71-
"""
72-
73-
pass
74-
75-
76-
class ValidationResult:
77-
def __init__(
78-
self,
79-
conforms: bool,
80-
graph: Union[Exception, bytes, str, rdflib.Graph],
81-
text: str,
82-
undefined_concepts: Set[rdflib.URIRef],
83-
) -> None:
84-
self.conforms = conforms
85-
self.graph = graph
86-
self.text = text
87-
self.undefined_concepts = undefined_concepts
88-
89-
90-
def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
91-
"""
92-
Determine if a concept is part of the CDO ontology.
93-
94-
:param n_concept: The concept to check.
95-
:return: whether the concept is part of the CDO ontologies.
96-
"""
97-
concept_iri = str(n_concept)
98-
return concept_iri.startswith(
99-
"https://ontology.unifiedcyberontology.org/"
100-
) or concept_iri.startswith("https://ontology.caseontology.org/")
101-
102-
103-
def get_invalid_cdo_concepts(
104-
data_graph: rdflib.Graph, ontology_graph: rdflib.Graph
105-
) -> Set[rdflib.URIRef]:
106-
"""
107-
Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument.
108-
109-
:param data_graph: The data graph to validate.
110-
:param ontology_graph: The ontology graph to use for validation.
111-
:return: The list of concepts in the data graph that are not part of the CDO ontology.
112-
113-
>>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE
114-
>>> from rdflib import Graph, Literal, Namespace, URIRef
115-
>>> # Define a namespace for a knowledge base, and a namespace for custom extensions.
116-
>>> ns_kb = Namespace("http://example.org/kb/")
117-
>>> ns_ex = Namespace("http://example.org/ontology/")
118-
>>> dg = Graph()
119-
>>> og = Graph()
120-
>>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property.
121-
>>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class))
122-
>>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty))
123-
>>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty))
124-
>>> # Define an individual.
125-
>>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"]
126-
>>> n_uco_object
127-
rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c')
128-
>>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK.
129-
>>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject))
130-
>>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test")))
131-
>>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value")))
132-
>>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og)
133-
>>> invalid_cdo_concepts
134-
{rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')}
135-
>>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported.
136-
>>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts
137-
"""
138-
# Construct set of CDO concepts for data graph concept-existence review.
139-
cdo_concepts: Set[rdflib.URIRef] = set()
140-
141-
for n_structural_class in [
142-
NS_OWL.Class,
143-
NS_OWL.AnnotationProperty,
144-
NS_OWL.DatatypeProperty,
145-
NS_OWL.ObjectProperty,
146-
NS_RDFS.Datatype,
147-
NS_SH.NodeShape,
148-
NS_SH.PropertyShape,
149-
NS_SH.Shape,
150-
]:
151-
for ontology_triple in ontology_graph.triples(
152-
(None, NS_RDF.type, n_structural_class)
153-
):
154-
if not isinstance(ontology_triple[0], rdflib.URIRef):
155-
continue
156-
if concept_is_cdo_concept(ontology_triple[0]):
157-
cdo_concepts.add(ontology_triple[0])
158-
for n_ontology_predicate in [
159-
NS_OWL.backwardCompatibleWith,
160-
NS_OWL.imports,
161-
NS_OWL.incompatibleWith,
162-
NS_OWL.priorVersion,
163-
NS_OWL.versionIRI,
164-
]:
165-
for ontology_triple in ontology_graph.triples(
166-
(None, n_ontology_predicate, None)
167-
):
168-
assert isinstance(ontology_triple[0], rdflib.URIRef)
169-
assert isinstance(ontology_triple[2], rdflib.URIRef)
170-
cdo_concepts.add(ontology_triple[0])
171-
cdo_concepts.add(ontology_triple[2])
172-
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
173-
if not isinstance(ontology_triple[0], rdflib.URIRef):
174-
continue
175-
cdo_concepts.add(ontology_triple[0])
176-
177-
# Also load historical ontology and version IRIs.
178-
ontology_and_version_iris_data = importlib.resources.read_text(
179-
case_utils.ontology, "ontology_and_version_iris.txt"
180-
)
181-
for line in ontology_and_version_iris_data.split("\n"):
182-
cleaned_line = line.strip()
183-
if cleaned_line == "":
184-
continue
185-
cdo_concepts.add(rdflib.URIRef(cleaned_line))
186-
187-
data_cdo_concepts: Set[rdflib.URIRef] = set()
188-
for data_triple in data_graph.triples((None, None, None)):
189-
for data_triple_member in data_triple:
190-
if isinstance(data_triple_member, rdflib.URIRef):
191-
if concept_is_cdo_concept(data_triple_member):
192-
data_cdo_concepts.add(data_triple_member)
193-
elif isinstance(data_triple_member, rdflib.Literal):
194-
if isinstance(data_triple_member.datatype, rdflib.URIRef):
195-
if concept_is_cdo_concept(data_triple_member.datatype):
196-
data_cdo_concepts.add(data_triple_member.datatype)
197-
198-
return data_cdo_concepts - cdo_concepts
199-
200-
201-
def get_ontology_graph(
202-
case_version: Optional[str] = None, supplemental_graphs: Optional[List[str]] = None
203-
) -> rdflib.Graph:
204-
"""
205-
Get the ontology graph for the given case_version and any supplemental graphs.
206-
207-
:param case_version: the version of the CASE ontology to use. If None (i.e. null), the most recent version will be used. If "none" (the string), no pre-built version of CASE will be used.
208-
:param supplemental_graphs: a list of supplemental graphs to use. If None, no supplemental graphs will be used.
209-
:return: the ontology graph against which to validate the data graph.
210-
"""
211-
ontology_graph = rdflib.Graph()
212-
213-
if case_version != "none":
214-
# Load bundled CASE ontology at requested version.
215-
if case_version is None or case_version == "":
216-
case_version = CURRENT_CASE_VERSION
217-
# If the first character case_version is numeric, prepend case- to it. This allows for the version to be passed
218-
# by the library as both case-1.2.0 and 1.2.0
219-
if case_version[0].isdigit():
220-
case_version = "case-" + case_version
221-
ttl_filename = case_version + ".ttl"
222-
_logger.debug("ttl_filename = %r.", ttl_filename)
223-
# Ensure the requested version of the CASE ontology is available and if not, throw an appropriate exception
224-
# that can be returned in a user-friendly message.
225-
if not importlib.resources.is_resource(case_utils.ontology, ttl_filename):
226-
raise NonExistentCASEVersionError(
227-
f"The requested version ({case_version}) of the CASE ontology is not available. Please choose a "
228-
f"different version. The latest supported version is: {CURRENT_CASE_VERSION}"
229-
)
230-
ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename)
231-
ontology_graph.parse(data=ttl_data, format="turtle")
232-
233-
if supplemental_graphs:
234-
for arg_ontology_graph in supplemental_graphs:
235-
_logger.debug("arg_ontology_graph = %r.", arg_ontology_graph)
236-
ontology_graph.parse(arg_ontology_graph)
237-
238-
return ontology_graph
239-
240-
24161
def validate(
24262
input_file: str,
24363
*args: Any,
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Set, Union
2+
3+
import rdflib
4+
5+
6+
class ValidationResult:
7+
def __init__(
8+
self,
9+
conforms: bool,
10+
graph: Union[Exception, bytes, str, rdflib.Graph],
11+
text: str,
12+
undefined_concepts: Set[rdflib.URIRef],
13+
) -> None:
14+
self.conforms = conforms
15+
self.graph = graph
16+
self.text = text
17+
self.undefined_concepts = undefined_concepts
18+
19+
20+
class NonExistentCDOConceptWarning(UserWarning):
21+
"""
22+
This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags.
23+
"""
24+
25+
pass
26+
27+
28+
class NonExistentCASEVersionError(Exception):
29+
"""
30+
This class is used when an invalid CASE version is requested that is not supported by the library.
31+
"""
32+
33+
pass

0 commit comments

Comments
 (0)