Skip to content

Commit 2f32021

Browse files
authored
Merge pull request #77 from casework/add_concept_typo_review
Add CDO concept typo-checker based on set-differencing URIRefs using CDO prefixes
2 parents aaa00c6 + 9bd90a1 commit 2f32021

13 files changed

+388
-4
lines changed

CONTRIBUTE.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ pushd case_utils/ontology
2727
git add case-0.6.0.ttl # Assuming CASE 0.6.0 was just released.
2828
# and/or
2929
git add uco-0.8.0.ttl # Assuming UCO 0.8.0 was adopted in CASE 0.6.0.
30+
31+
git add ontology_and_version_iris.txt
3032
popd
3133
make check
3234
# Assuming `make check` passes:
33-
git commit -m "Build CASE 0.6.0 monolithic .ttl files" case_utils/ontology/case-0.6.0-subclasses.ttl case_utils/ontology/case-0.6.0.ttl
35+
git commit -m "Build CASE 0.6.0 monolithic .ttl files" case_utils/ontology/case-0.6.0-subclasses.ttl case_utils/ontology/case-0.6.0.ttl case_utils/ontology/ontology_and_version_iris.txt
3436
git commit -m "Update CASE ontology pointer to version 0.6.0" dependencies/CASE case_utils/ontology/version_info.py
3537
```
3638

@@ -43,4 +45,4 @@ pre-commit --version
4345
The `pre-commit` tool hooks into Git's commit machinery to run a set of linters and static analyzers over each change. To install `pre-commit` into Git's hooks, run:
4446
```bash
4547
pre-commit install
46-
```
48+
```

case_utils/case_validate/__init__.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,39 @@
3737
import os
3838
import sys
3939
import typing
40+
import warnings
4041

4142
import pyshacl # type: ignore
42-
import rdflib.util
43+
import rdflib
4344

4445
import case_utils.ontology
4546
from case_utils.ontology.version_info import (
4647
CURRENT_CASE_VERSION,
4748
built_version_choices_list,
4849
)
4950

51+
NS_OWL = rdflib.OWL
52+
NS_RDF = rdflib.RDF
53+
NS_RDFS = rdflib.RDFS
54+
5055
_logger = logging.getLogger(os.path.basename(__file__))
5156

5257

58+
class NonExistentCDOConceptWarning(UserWarning):
59+
"""
60+
This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags.
61+
"""
62+
63+
pass
64+
65+
66+
def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
67+
concept_iri = str(n_concept)
68+
return concept_iri.startswith(
69+
"https://ontology.unifiedcyberontology.org/"
70+
) or concept_iri.startswith("https://ontology.caseontology.org/")
71+
72+
5373
def main() -> None:
5474
parser = argparse.ArgumentParser(
5575
description="CASE wrapper to pySHACL command line tool."
@@ -160,6 +180,71 @@ def main() -> None:
160180
_logger.debug("arg_ontology_graph = %r.", arg_ontology_graph)
161181
ontology_graph.parse(arg_ontology_graph)
162182

183+
# Construct set of CDO concepts for data graph concept-existence review.
184+
cdo_concepts: typing.Set[rdflib.URIRef] = set()
185+
186+
for n_structural_class in [
187+
NS_OWL.Class,
188+
NS_OWL.AnnotationProperty,
189+
NS_OWL.DatatypeProperty,
190+
NS_OWL.ObjectProperty,
191+
NS_RDFS.Datatype,
192+
]:
193+
for ontology_triple in ontology_graph.triples(
194+
(None, NS_RDF.type, n_structural_class)
195+
):
196+
if not isinstance(ontology_triple[0], rdflib.URIRef):
197+
continue
198+
if concept_is_cdo_concept(ontology_triple[0]):
199+
cdo_concepts.add(ontology_triple[0])
200+
for n_ontology_predicate in [
201+
NS_OWL.backwardCompatibleWith,
202+
NS_OWL.imports,
203+
NS_OWL.incompatibleWith,
204+
NS_OWL.priorVersion,
205+
NS_OWL.versionIRI,
206+
]:
207+
for ontology_triple in ontology_graph.triples(
208+
(None, n_ontology_predicate, None)
209+
):
210+
assert isinstance(ontology_triple[0], rdflib.URIRef)
211+
assert isinstance(ontology_triple[2], rdflib.URIRef)
212+
cdo_concepts.add(ontology_triple[0])
213+
cdo_concepts.add(ontology_triple[2])
214+
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
215+
if not isinstance(ontology_triple[0], rdflib.URIRef):
216+
continue
217+
cdo_concepts.add(ontology_triple[0])
218+
219+
# Also load historical ontology and version IRIs.
220+
ontology_and_version_iris_data = importlib.resources.read_text(
221+
case_utils.ontology, "ontology_and_version_iris.txt"
222+
)
223+
for line in ontology_and_version_iris_data.split("\n"):
224+
cleaned_line = line.strip()
225+
if cleaned_line == "":
226+
continue
227+
cdo_concepts.add(rdflib.URIRef(cleaned_line))
228+
229+
data_cdo_concepts: typing.Set[rdflib.URIRef] = set()
230+
for data_triple in data_graph.triples((None, None, None)):
231+
for data_triple_member in data_triple:
232+
if isinstance(data_triple_member, rdflib.URIRef):
233+
if concept_is_cdo_concept(data_triple_member):
234+
data_cdo_concepts.add(data_triple_member)
235+
elif isinstance(data_triple_member, rdflib.Literal):
236+
if isinstance(data_triple_member.datatype, rdflib.URIRef):
237+
if concept_is_cdo_concept(data_triple_member.datatype):
238+
data_cdo_concepts.add(data_triple_member.datatype)
239+
240+
undefined_cdo_concepts = data_cdo_concepts - cdo_concepts
241+
for undefined_cdo_concept in sorted(undefined_cdo_concepts):
242+
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning)
243+
undefined_cdo_concepts_message = (
244+
"There were %d concepts with CDO IRIs in the data graph that are not in the ontology graph."
245+
% len(undefined_cdo_concepts)
246+
)
247+
163248
# Determine output format.
164249
# pySHACL's determination of output formatting is handled solely
165250
# through the -f flag. Other CASE CLI tools handle format
@@ -214,6 +299,13 @@ def main() -> None:
214299
% type(validation_graph)
215300
)
216301

302+
if len(undefined_cdo_concepts) > 0:
303+
warnings.warn(undefined_cdo_concepts_message)
304+
if not args.allow_warnings:
305+
undefined_cdo_concepts_alleviation_message = "The data graph is SHACL-conformant with the CDO ontologies, but nonexistent-concept references raise Warnings with this tool. Please either correct the concept names in the data graph; use the --ontology-graph flag to pass a corrected CDO ontology file, also using --built-version none; or, use the --allow-warnings flag."
306+
warnings.warn(undefined_cdo_concepts_alleviation_message)
307+
conforms = False
308+
217309
sys.exit(0 if conforms else 1)
218310

219311

case_utils/ontology/Makefile

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ RDF_TOOLKIT_JAR := $(uco_srcdir)/lib/rdf-toolkit.jar
2424
case_version := $(shell python3 version_info.py)
2525

2626
all: \
27-
case-$(case_version)-subclasses.ttl
27+
ontology_and_version_iris.txt
2828

2929
.PRECIOUS: \
3030
case-$(case_version).ttl
@@ -79,3 +79,16 @@ case-$(case_version)-subclasses.ttl: \
7979
clean:
8080
@rm -f \
8181
case-$(case_version)*.ttl
82+
83+
ontology_and_version_iris.txt: \
84+
src/ontology_and_version_iris.py \
85+
case-$(case_version)-subclasses.ttl
86+
# Guarantee venv is built. (Same rationale as in the subclasses.ttl recipe.)
87+
$(MAKE) \
88+
--directory $(case_srcdir)/tests \
89+
.venv.done.log
90+
source $(case_srcdir)/tests/venv/bin/activate \
91+
&& python3 src/ontology_and_version_iris.py \
92+
_$@ \
93+
case-*.ttl
94+
mv _$@ $@
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
http://case.example.org/core
2+
https://ontology.caseontology.org/case/case
3+
https://ontology.caseontology.org/case/case/0.7.1
4+
https://ontology.caseontology.org/case/case/1.0.0
5+
https://ontology.caseontology.org/case/investigation
6+
https://ontology.caseontology.org/case/investigation/0.7.1
7+
https://ontology.caseontology.org/case/investigation/1.0.0
8+
https://ontology.caseontology.org/case/vocabulary
9+
https://ontology.caseontology.org/case/vocabulary/0.7.1
10+
https://ontology.caseontology.org/case/vocabulary/1.0.0
11+
https://ontology.unifiedcyberontology.org/co
12+
https://ontology.unifiedcyberontology.org/co/1.0.0
13+
https://ontology.unifiedcyberontology.org/owl
14+
https://ontology.unifiedcyberontology.org/owl/1.0.0
15+
https://ontology.unifiedcyberontology.org/uco/action
16+
https://ontology.unifiedcyberontology.org/uco/action/0.9.1
17+
https://ontology.unifiedcyberontology.org/uco/action/1.0.0
18+
https://ontology.unifiedcyberontology.org/uco/configuration
19+
https://ontology.unifiedcyberontology.org/uco/configuration/1.0.0
20+
https://ontology.unifiedcyberontology.org/uco/core
21+
https://ontology.unifiedcyberontology.org/uco/core/0.9.1
22+
https://ontology.unifiedcyberontology.org/uco/core/1.0.0
23+
https://ontology.unifiedcyberontology.org/uco/identity
24+
https://ontology.unifiedcyberontology.org/uco/identity/0.9.1
25+
https://ontology.unifiedcyberontology.org/uco/identity/1.0.0
26+
https://ontology.unifiedcyberontology.org/uco/location
27+
https://ontology.unifiedcyberontology.org/uco/location/0.9.1
28+
https://ontology.unifiedcyberontology.org/uco/location/1.0.0
29+
https://ontology.unifiedcyberontology.org/uco/marking
30+
https://ontology.unifiedcyberontology.org/uco/marking/0.9.1
31+
https://ontology.unifiedcyberontology.org/uco/marking/1.0.0
32+
https://ontology.unifiedcyberontology.org/uco/observable
33+
https://ontology.unifiedcyberontology.org/uco/observable/0.9.1
34+
https://ontology.unifiedcyberontology.org/uco/observable/1.0.0
35+
https://ontology.unifiedcyberontology.org/uco/pattern
36+
https://ontology.unifiedcyberontology.org/uco/pattern/0.9.1
37+
https://ontology.unifiedcyberontology.org/uco/pattern/1.0.0
38+
https://ontology.unifiedcyberontology.org/uco/role
39+
https://ontology.unifiedcyberontology.org/uco/role/0.9.1
40+
https://ontology.unifiedcyberontology.org/uco/role/1.0.0
41+
https://ontology.unifiedcyberontology.org/uco/time
42+
https://ontology.unifiedcyberontology.org/uco/time/0.9.1
43+
https://ontology.unifiedcyberontology.org/uco/time/1.0.0
44+
https://ontology.unifiedcyberontology.org/uco/tool
45+
https://ontology.unifiedcyberontology.org/uco/tool/0.9.1
46+
https://ontology.unifiedcyberontology.org/uco/tool/1.0.0
47+
https://ontology.unifiedcyberontology.org/uco/types
48+
https://ontology.unifiedcyberontology.org/uco/types/0.9.1
49+
https://ontology.unifiedcyberontology.org/uco/types/1.0.0
50+
https://ontology.unifiedcyberontology.org/uco/uco
51+
https://ontology.unifiedcyberontology.org/uco/uco/0.9.1
52+
https://ontology.unifiedcyberontology.org/uco/uco/1.0.0
53+
https://ontology.unifiedcyberontology.org/uco/victim
54+
https://ontology.unifiedcyberontology.org/uco/victim/0.9.1
55+
https://ontology.unifiedcyberontology.org/uco/victim/1.0.0
56+
https://ontology.unifiedcyberontology.org/uco/vocabulary
57+
https://ontology.unifiedcyberontology.org/uco/vocabulary/0.9.1
58+
https://ontology.unifiedcyberontology.org/uco/vocabulary/1.0.0
59+
https://unifiedcyberontology.org/ontology/uco/action
60+
https://unifiedcyberontology.org/ontology/uco/core
61+
https://unifiedcyberontology.org/ontology/uco/identity
62+
https://unifiedcyberontology.org/ontology/uco/location
63+
https://unifiedcyberontology.org/ontology/uco/marking
64+
https://unifiedcyberontology.org/ontology/uco/observable
65+
https://unifiedcyberontology.org/ontology/uco/pattern
66+
https://unifiedcyberontology.org/ontology/uco/role
67+
https://unifiedcyberontology.org/ontology/uco/time
68+
https://unifiedcyberontology.org/ontology/uco/tool
69+
https://unifiedcyberontology.org/ontology/uco/types
70+
https://unifiedcyberontology.org/ontology/uco/uco
71+
https://unifiedcyberontology.org/ontology/uco/victim
72+
https://unifiedcyberontology.org/ontology/uco/vocabulary
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/usr/bin/env python3
2+
3+
# This software was developed at the National Institute of Standards
4+
# and Technology by employees of the Federal Government in the course
5+
# of their official duties. Pursuant to title 17 Section 105 of the
6+
# United States Code this software is not subject to copyright
7+
# protection and is in the public domain. NIST assumes no
8+
# responsibility whatsoever for its use by other parties, and makes
9+
# no guarantees, expressed or implied, about its quality,
10+
# reliability, or any other characteristic.
11+
#
12+
# We would appreciate acknowledgement if the software is used.
13+
14+
"""
15+
This script creates a list of all ontology and version IRIs that have ever existed in a CDO ontology to describe a CDO ontology. I.e. the subject of triples with owl:Ontology as predicate are included, as are the objects of version-referencing triples (owl:versionIRI, owl:incompatibleWith, etc.).
16+
"""
17+
18+
__version__ = "0.1.0"
19+
20+
import argparse
21+
import typing
22+
23+
import rdflib
24+
25+
NS_OWL = rdflib.OWL
26+
NS_RDF = rdflib.RDF
27+
28+
29+
def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
30+
"""
31+
This function is purposefully distinct from the function used in case_validate. Within this script, the publishing history of CASE and UCO is reviewed."""
32+
concept_iri = str(n_concept)
33+
return (
34+
concept_iri.startswith("https://ontology.unifiedcyberontology.org/")
35+
or concept_iri.startswith("https://ontology.caseontology.org/")
36+
or concept_iri.startswith("https://unifiedcyberontology.org/ontology/")
37+
or concept_iri.startswith("https://caseontology.org/ontology/")
38+
or concept_iri == "http://case.example.org/core"
39+
)
40+
41+
42+
def extract_ontology_iris(ontology_graph: rdflib.Graph) -> typing.Set[rdflib.URIRef]:
43+
"""
44+
Return all concepts describing the OWL Ontology in the input graph. This does not return classes, properties, etc. defined within the ontology; instead, it only returns the ontology IRI and annotations about the ontology.
45+
"""
46+
ontology_concepts: typing.Set[rdflib.URIRef] = set()
47+
for n_ontology_predicate in [
48+
NS_OWL.backwardCompatibleWith,
49+
NS_OWL.imports,
50+
NS_OWL.incompatibleWith,
51+
NS_OWL.priorVersion,
52+
NS_OWL.versionIRI,
53+
]:
54+
for ontology_triple in ontology_graph.triples(
55+
(None, n_ontology_predicate, None)
56+
):
57+
assert isinstance(ontology_triple[0], rdflib.URIRef)
58+
assert isinstance(ontology_triple[2], rdflib.URIRef)
59+
ontology_concepts.add(ontology_triple[0])
60+
ontology_concepts.add(ontology_triple[2])
61+
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
62+
if not isinstance(ontology_triple[0], rdflib.URIRef):
63+
continue
64+
if concept_is_cdo_concept(ontology_triple[0]):
65+
ontology_concepts.add(ontology_triple[0])
66+
return ontology_concepts
67+
68+
69+
def main() -> None:
70+
parser = argparse.ArgumentParser()
71+
parser.add_argument("out_txt")
72+
parser.add_argument("in_ttl", nargs="+")
73+
args = parser.parse_args()
74+
75+
cdo_concepts: typing.Set[rdflib.URIRef] = set()
76+
for in_ttl in args.in_ttl:
77+
ontology_graph = rdflib.Graph()
78+
ontology_graph.parse(in_ttl)
79+
ontology_concepts = extract_ontology_iris(ontology_graph)
80+
for ontology_concept in ontology_concepts:
81+
if concept_is_cdo_concept(ontology_concept):
82+
cdo_concepts.add(ontology_concept)
83+
84+
with open(args.out_txt, "w") as out_fh:
85+
for cdo_concept in sorted(cdo_concepts):
86+
print(cdo_concept, file=out_fh)
87+
88+
89+
if __name__ == "__main__":
90+
main()

0 commit comments

Comments
 (0)