Skip to content

Commit 15f00c9

Browse files
committed
Consolidate case_validate CLI validation logic into case_validate.validate
This patch separates implementation points between functionality distinct to `case_utils.validate` and `pyshacl.validate`. The `allow_warnings` and `inference` parameters provide CASE-specific documentation as an augmentation to `pyshacl.validate`'s documentation, but otherwise other documentation on `pyshacl.validate`'s keyword arguments is delegated to their upstream function. This patch removes some hardcoded parameter values in `pyshacl.validate`, letting the `case_validate` CLI or caller provide any runtime-requested values. Also, without functional impact, this patch sorts keyword parameters alphabetically. Signed-off-by: Alex Nelson <alexander.nelson@nist.gov>
1 parent 00f1360 commit 15f00c9

File tree

1 file changed

+43
-59
lines changed

1 file changed

+43
-59
lines changed

case_utils/case_validate/__init__.py

Lines changed: 43 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -59,59 +59,74 @@
5959

6060

6161
def validate(
62-
input_file: str,
62+
input_file: Union[List[str], str],
6363
*args: Any,
6464
case_version: Optional[str] = None,
6565
supplemental_graphs: Optional[List[str]] = None,
66-
abort_on_first: bool = False,
67-
inference: Optional[str] = None,
6866
**kwargs: Any,
6967
) -> ValidationResult:
7068
"""
7169
Validate the given data graph against the given CASE ontology version and supplemental graphs.
70+
7271
:param *args: The positional arguments to pass to the underlying pyshacl.validate function.
73-
:param input_file: The path to the file containing the data graph to validate.
74-
:param case_version: The version of the CASE ontology to use (e.g. 1.2.0). If None, the most recent version will
75-
be used.
76-
:param supplemental_graphs: The supplemental graphs to use. If None, no supplemental graphs will be used.
77-
:param abort_on_first: Whether to abort on the first validation error.
72+
:param input_file: The path to the file containing the data graph to validate. This can also be a list of paths to files containing data graphs to pool together.
73+
:param case_version: The version of the CASE ontology to use (e.g. 1.2.0). If None, the most recent version will be used.
74+
:param supplemental_graphs: File paths to supplemental graphs to use. If None, no supplemental graphs will be used.
75+
:param allow_warnings: In addition to affecting the conformance of SHACL validation, this will affect conformance based on unrecognized CDO concepts (likely, misspelled or miscapitalized) in the data graph. If allow_warnings is not True, any unrecognized concept using a CDO IRI prefix will cause conformance to be False.
7876
:param inference: The type of inference to use. If "none" (type str), no inference will be used. If None (type NoneType), pyshacl defaults will be used. Note that at the time of this writing (pySHACL 0.23.0), pyshacl defaults are no inferencing for the data graph, and RDFS inferencing for the SHACL graph, which for case_utils.validate includes the SHACL and OWL graphs.
7977
:param **kwargs: The keyword arguments to pass to the underlying pyshacl.validate function.
8078
:return: The validation result object containing the defined properties.
8179
"""
8280
# Convert the data graph string to a rdflib.Graph object.
8381
data_graph = rdflib.Graph()
84-
data_graph.parse(input_file)
82+
if isinstance(input_file, str):
83+
data_graph.parse(input_file)
84+
elif isinstance(input_file, list):
85+
for _data_graph_file in input_file:
86+
_logger.debug("_data_graph_file = %r.", _data_graph_file)
87+
if not isinstance(_data_graph_file, str):
88+
raise TypeError("Expected str, received %s." % type(_data_graph_file))
89+
data_graph.parse(_data_graph_file)
8590

8691
# Get the ontology graph from the case_version and supplemental_graphs arguments
8792
ontology_graph: Graph = get_ontology_graph(case_version, supplemental_graphs)
8893

89-
# Get the undefined CDO concepts
94+
# Get the undefined CDO concepts.
9095
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph)
9196

97+
# Warn about typo'd concepts before performing SHACL review.
98+
for undefined_cdo_concept in sorted(undefined_cdo_concepts):
99+
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning)
100+
undefined_cdo_concepts_message = (
101+
"There were %d concepts with CDO IRIs in the data graph that are not in the ontology graph."
102+
% len(undefined_cdo_concepts)
103+
)
104+
92105
# Validate data graph against ontology graph.
93106
validate_result: Tuple[
94107
bool, Union[Exception, bytes, str, rdflib.Graph], str
95108
] = pyshacl.validate(
96109
data_graph,
97110
*args,
98-
shacl_graph=ontology_graph,
99111
ont_graph=ontology_graph,
100-
inference=inference,
101-
meta_shacl=False,
102-
abort_on_first=abort_on_first,
103-
allow_infos=False,
104-
allow_warnings=False,
105-
debug=False,
106-
do_owl_imports=False,
112+
shacl_graph=ontology_graph,
107113
**kwargs,
108114
)
109115

110116
# Relieve RAM of the data graph after validation has run.
111117
del data_graph
112118

119+
conforms = validate_result[0]
120+
121+
if len(undefined_cdo_concepts) > 0:
122+
warnings.warn(undefined_cdo_concepts_message)
123+
if not kwargs.get("allow_warnings"):
124+
undefined_cdo_concepts_alleviation_message = "The data graph is SHACL-conformant with the CDO ontologies, but nonexistent-concept references raise Warnings with this tool. Please either correct the concept names in the data graph; use the --ontology-graph flag to pass a corrected CDO ontology file, also using --built-version none; or, use the --allow-warnings flag."
125+
warnings.warn(undefined_cdo_concepts_alleviation_message)
126+
conforms = False
127+
113128
return ValidationResult(
114-
validate_result[0],
129+
conforms,
115130
validate_result[1],
116131
validate_result[2],
117132
undefined_cdo_concepts,
@@ -212,25 +227,6 @@ def main() -> None:
212227

213228
args = parser.parse_args()
214229

215-
data_graph = rdflib.Graph()
216-
for in_graph in args.in_graph:
217-
_logger.debug("in_graph = %r.", in_graph)
218-
data_graph.parse(in_graph)
219-
220-
# Get the ontology graph based on the CASE version and supplemental graphs specified by the CLI
221-
ontology_graph = get_ontology_graph(
222-
case_version=args.built_version, supplemental_graphs=args.ontology_graph
223-
)
224-
225-
# Get the list of undefined CDO concepts in the graph
226-
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph)
227-
for undefined_cdo_concept in sorted(undefined_cdo_concepts):
228-
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning)
229-
undefined_cdo_concepts_message = (
230-
"There were %d concepts with CDO IRIs in the data graph that are not in the ontology graph."
231-
% len(undefined_cdo_concepts)
232-
)
233-
234230
# Determine output format.
235231
# pySHACL's determination of output formatting is handled solely
236232
# through the -f flag. Other CASE CLI tools handle format
@@ -241,28 +237,23 @@ def main() -> None:
241237
if args.format != "human":
242238
validator_kwargs["serialize_report_graph"] = args.format
243239

244-
validate_result: Tuple[
245-
bool, Union[Exception, bytes, str, rdflib.Graph], str
246-
] = pyshacl.validate(
247-
data_graph,
248-
shacl_graph=ontology_graph,
249-
ont_graph=ontology_graph,
250-
inference=args.inference,
251-
meta_shacl=args.metashacl,
240+
validation_result: ValidationResult = validate(
241+
args.in_graph,
252242
abort_on_first=args.abort,
253243
allow_infos=True if args.allow_infos else False,
254244
allow_warnings=True if args.allow_warnings else False,
245+
case_version=args.built_version,
255246
debug=True if args.debug else False,
256247
do_owl_imports=True if args.imports else False,
248+
inference=args.inference,
249+
meta_shacl=args.metashacl,
250+
supplemental_graphs=args.ontology_graph,
257251
**validator_kwargs,
258252
)
259253

260-
# Relieve RAM of the data graph after validation has run.
261-
del data_graph
262-
263-
conforms = validate_result[0]
264-
validation_graph = validate_result[1]
265-
validation_text = validate_result[2]
254+
conforms = validation_result.conforms
255+
validation_graph = validation_result.graph
256+
validation_text = validation_result.text
266257

267258
# NOTE: The output logistics code is adapted from pySHACL's file
268259
# pyshacl/cli.py. This section should be monitored for code drift.
@@ -284,13 +275,6 @@ def main() -> None:
284275
% type(validation_graph)
285276
)
286277

287-
if len(undefined_cdo_concepts) > 0:
288-
warnings.warn(undefined_cdo_concepts_message)
289-
if not args.allow_warnings:
290-
undefined_cdo_concepts_alleviation_message = "The data graph is SHACL-conformant with the CDO ontologies, but nonexistent-concept references raise Warnings with this tool. Please either correct the concept names in the data graph; use the --ontology-graph flag to pass a corrected CDO ontology file, also using --built-version none; or, use the --allow-warnings flag."
291-
warnings.warn(undefined_cdo_concepts_alleviation_message)
292-
conforms = False
293-
294278
sys.exit(0 if conforms else 1)
295279

296280

0 commit comments

Comments
 (0)