diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 316ed3f..e494514 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -17,8 +17,10 @@ __version__ = "0.3.0" +import argparse import datetime import hashlib +import logging import os import typing import warnings @@ -207,10 +209,9 @@ def create_file_node( def main() -> None: - import argparse - parser = argparse.ArgumentParser() parser.add_argument("--base-prefix", default=DEFAULT_PREFIX) + parser.add_argument("--debug", action="store_true") parser.add_argument("--disable-hashes", action="store_true") parser.add_argument("--disable-mtime", action="store_true") parser.add_argument( @@ -220,6 +221,8 @@ def main() -> None: parser.add_argument("in_file") args = parser.parse_args() + logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) + case_utils.local_uuid.configure() NS_BASE = rdflib.Namespace(args.base_prefix) diff --git a/case_utils/local_uuid.py b/case_utils/local_uuid.py index 7e526c0..b3b77ff 100644 --- a/case_utils/local_uuid.py +++ b/case_utils/local_uuid.py @@ -17,20 +17,91 @@ __version__ = "0.2.0" +import logging import os +import pathlib import sys +import typing +import warnings import uuid -USE_DEMO_UUID: bool = False +DEMO_UUID_BASE: typing.Optional[str] = None DEMO_UUID_COUNTER: int = 0 +_logger = logging.getLogger(pathlib.Path(__file__).name) + def configure() -> None: - global USE_DEMO_UUID + global DEMO_UUID_BASE if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") == "NONRANDOM_REQUESTED": - USE_DEMO_UUID = True + warnings.warn( + "Environment variable DEMO_UUID_REQUESTING_NONRANDOM is deprecated. See case_utils.local_uuid.demo_uuid for usage notes on its replacement, CASE_DEMO_NONRANDOM_UUID_BASE. Proceeding with random UUIDs.", + DeprecationWarning, + ) + return + + env_base_dir_name = os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE") + if env_base_dir_name is None: + return + + base_dir_original_path = pathlib.Path(env_base_dir_name) + if not base_dir_original_path.exists(): + warnings.warn( + "Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to an existing directory. Proceeding with random UUIDs." + ) + return + if not base_dir_original_path.is_dir(): + warnings.warn( + "Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to a directory. Proceeding with random UUIDs." + ) + return + + # Component: An emphasis this is an example. + demo_uuid_base_parts = ["example.org"] + + # Component: Present working directory, relative to CASE_DEMO_NONRANDOM_UUID_BASE if that environment variable is an ancestor of pwd. + base_dir_resolved_path = base_dir_original_path.resolve() + srcdir_original_path = pathlib.Path(os.getcwd()) + srcdir_resolved_path = srcdir_original_path.resolve() + # _logger.debug("base_dir_resolved_path = %r.", base_dir_resolved_path) + # _logger.debug("srcdir_resolved_path = %r.", srcdir_resolved_path) + try: + srcdir_relative_path = srcdir_resolved_path.relative_to(base_dir_resolved_path) + # _logger.debug("srcdir_relative_path = %r.", srcdir_relative_path) + demo_uuid_base_parts.append(str(srcdir_relative_path)) + except ValueError: + # If base_dir is not an ancestor directory of srcdir, default to srcdir. + # _logger.debug("PWD is not relative to base path.") + demo_uuid_base_parts.append(str(srcdir_resolved_path)) + + # Component: Command of argument vector. + env_venv_name = os.getenv("VIRTUAL_ENV") + if env_venv_name is None: + demo_uuid_base_parts.append(sys.argv[0]) + else: + command_original_path = pathlib.Path(sys.argv[0]) + command_resolved_path = command_original_path.resolve() + venv_original_path = pathlib.Path(env_venv_name) + venv_resolved_path = venv_original_path.resolve() + try: + command_relative_path = command_resolved_path.relative_to( + venv_resolved_path + ) + # _logger.debug("command_relative_path = %r.", command_relative_path) + demo_uuid_base_parts.append(str(command_relative_path)) + except ValueError: + # _logger.debug("Command path is not relative to virtual environment path.") + demo_uuid_base_parts.append(str(command_resolved_path)) + + if len(sys.argv) > 1: + # Component: Arguments of argument vector. + demo_uuid_base_parts.extend(sys.argv[1:]) + + # _logger.debug("demo_uuid_base_parts = %r.", demo_uuid_base_parts) + + DEMO_UUID_BASE = "/".join(demo_uuid_base_parts) def demo_uuid() -> str: @@ -39,29 +110,25 @@ def demo_uuid() -> str: WARNING: This function was developed for use ONLY for reducing (but not eliminating) version-control edits to identifiers in sample data. It creates UUIDs that are decidedly NOT random, and should remain consistent on repeated calls to the importing script. - To prevent accidental non-random UUID usage, an environment variable must be set to an uncommon string, hard-coded in this function. + To prevent accidental non-random UUID usage, an environment variable must be set to a string provided by the caller. The variable's required value is the path to some directory. The variable's recommended value is the equivalent of the Make variable "top_srcdir" - that is, the root directory of the containing Git repository, some parent of the current process's current working directory. """ + global DEMO_UUID_BASE global DEMO_UUID_COUNTER - if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") != "NONRANDOM_REQUESTED": - raise EnvironmentError( - "demo_uuid() called without DEMO_UUID_REQUESTING_NONRANDOM in environment." + if os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE") is None: + raise ValueError( + "demo_uuid() called without CASE_DEMO_NONRANDOM_UUID_BASE in environment." ) - # Component: An emphasis this is an example. - parts = ["example.org"] + if DEMO_UUID_BASE is None: + raise ValueError("demo_uuid() called with DEMO_UUID_BASE unset.") + + parts = [DEMO_UUID_BASE] # Component: Incrementing counter. DEMO_UUID_COUNTER += 1 parts.append(str(DEMO_UUID_COUNTER)) - # Component: Present working directory, replacing $HOME with '~'. - env_HOME: str = os.getenv("HOME", "/nonexistent") - parts.append(os.getcwd().replace(env_HOME, "~")) - - # Component: Argument vector. - parts.extend(sys.argv) - return str(uuid.uuid5(uuid.NAMESPACE_URL, "/".join(parts))) @@ -69,8 +136,8 @@ def local_uuid() -> str: """ Generate either a UUID4, or if requested via environment configuration, a non-random demo UUID. """ - global USE_DEMO_UUID - if USE_DEMO_UUID: - return demo_uuid() - else: + global DEMO_UUID_BASE + if DEMO_UUID_BASE is None: return str(uuid.uuid4()) + else: + return demo_uuid() diff --git a/tests/case_utils/case_file/Makefile b/tests/case_utils/case_file/Makefile index 28d7e62..0ce744a 100644 --- a/tests/case_utils/case_file/Makefile +++ b/tests/case_utils/case_file/Makefile @@ -114,9 +114,10 @@ sample.txt.json: \ $(top_srcdir)/case_utils/namespace.py \ sample.txt-nocompact.json rm -f $@ _$@ __$@ - export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ + export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \ && source $(tests_srcdir)/venv/bin/activate \ && case_file \ + --debug \ __$@ \ sample.txt source $(tests_srcdir)/venv/bin/activate \ @@ -146,9 +147,10 @@ sample.txt.ttl: \ $(top_srcdir)/case_utils/namespace.py \ sample.txt.done.log rm -f _$@ __$@ - export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ + export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \ && source $(tests_srcdir)/venv/bin/activate \ && case_file \ + --debug \ __$@ \ sample.txt java -jar $(RDF_TOOLKIT_JAR) \ @@ -169,9 +171,10 @@ sample.txt-disable_hashes.ttl: \ $(top_srcdir)/case_utils/namespace.py \ sample.txt.done.log rm -f _$@ __$@ - export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ + export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \ && source $(tests_srcdir)/venv/bin/activate \ && case_file \ + --debug \ --disable-hashes \ __$@ \ sample.txt @@ -194,9 +197,10 @@ sample.txt-nocompact.json: \ $(top_srcdir)/case_utils/namespace.py \ sample.txt.done.log rm -f _$@ - export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ + export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \ && source $(tests_srcdir)/venv/bin/activate \ && case_file \ + --debug \ _$@ \ sample.txt # To avoid making noisy, uninformative updates from blank node identifiers, only move the new file into place if it is not isomorphic with the Git-tracked version of the target. diff --git a/tests/case_utils/case_file/kb.json b/tests/case_utils/case_file/kb.json index 5b7100d..b968377 100644 --- a/tests/case_utils/case_file/kb.json +++ b/tests/case_utils/case_file/kb.json @@ -9,7 +9,23 @@ }, "@graph": [ { - "@id": "kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb", + "@id": "kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9", + "@type": "uco-observable:File", + "uco-core:hasFacet": { + "@type": "uco-observable:FileFacet", + "uco-observable:fileName": "sample.txt", + "uco-observable:modifiedTime": { + "@type": "xsd:dateTime", + "@value": "2010-01-02T03:04:56+00:00" + }, + "uco-observable:sizeInBytes": { + "@type": "xsd:integer", + "@value": "4" + } + } + }, + { + "@id": "kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04", "@type": "uco-observable:File", "uco-core:hasFacet": [ { @@ -78,22 +94,6 @@ } } ] - }, - { - "@id": "kb:file-b5e8a943-c556-5964-a618-8f0d000822af", - "@type": "uco-observable:File", - "uco-core:hasFacet": { - "@type": "uco-observable:FileFacet", - "uco-observable:fileName": "sample.txt", - "uco-observable:modifiedTime": { - "@type": "xsd:dateTime", - "@value": "2010-01-02T03:04:56+00:00" - }, - "uco-observable:sizeInBytes": { - "@type": "xsd:integer", - "@value": "4" - } - } } ] } \ No newline at end of file diff --git a/tests/case_utils/case_file/kb.ttl b/tests/case_utils/case_file/kb.ttl index 6241217..942249b 100644 --- a/tests/case_utils/case_file/kb.ttl +++ b/tests/case_utils/case_file/kb.ttl @@ -8,7 +8,17 @@ @prefix uco-vocabulary: . @prefix xsd: . -kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb +kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 + a uco-observable:File ; + uco-core:hasFacet [ + a uco-observable:FileFacet ; + uco-observable:fileName "sample.txt" ; + uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; + uco-observable:sizeInBytes "4"^^xsd:integer ; + ] ; + . + +kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04 a uco-observable:File ; uco-core:hasFacet [ @@ -46,13 +56,3 @@ kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb ; . -kb:file-b5e8a943-c556-5964-a618-8f0d000822af - a uco-observable:File ; - uco-core:hasFacet [ - a uco-observable:FileFacet ; - uco-observable:fileName "sample.txt" ; - uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; - uco-observable:sizeInBytes "4"^^xsd:integer ; - ] ; - . - diff --git a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl index 669aa51..edea5af 100644 --- a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl +++ b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl @@ -6,7 +6,7 @@ @prefix uco-observable: . @prefix xsd: . -kb:file-b5e8a943-c556-5964-a618-8f0d000822af +kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 a uco-observable:File ; uco-core:hasFacet [ a uco-observable:FileFacet ; diff --git a/tests/case_utils/case_file/sample.txt-nocompact.json b/tests/case_utils/case_file/sample.txt-nocompact.json index e71f43a..e848ebf 100644 --- a/tests/case_utils/case_file/sample.txt-nocompact.json +++ b/tests/case_utils/case_file/sample.txt-nocompact.json @@ -35,19 +35,19 @@ }, "@graph": [ { - "@id": "http://example.org/kb/file-2999db3b-5e66-53b2-8470-8bfd838c444e", + "@id": "http://example.org/kb/file-23f45d80-7b16-5e7f-ba34-40392fa4f8fc", "@type": "https://ontology.unifiedcyberontology.org/uco/observable/File", "https://ontology.unifiedcyberontology.org/uco/core/hasFacet": [ { - "@id": "_:Nf16b91a8337843bfa9e99cbcceca4fea" + "@id": "_:N7b4f54f0f233497a8981441323dfdd27" }, { - "@id": "_:Ne9cc5032d4804fa19a3fe75c4966febf" + "@id": "_:Nf3c8a77cca9e4807a8b016f8b94eae64" } ] }, { - "@id": "_:Nf16b91a8337843bfa9e99cbcceca4fea", + "@id": "_:N7b4f54f0f233497a8981441323dfdd27", "@type": "https://ontology.unifiedcyberontology.org/uco/observable/FileFacet", "https://ontology.unifiedcyberontology.org/uco/observable/fileName": "sample.txt", "https://ontology.unifiedcyberontology.org/uco/observable/modifiedTime": { @@ -57,26 +57,26 @@ "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 }, { - "@id": "_:Ne9cc5032d4804fa19a3fe75c4966febf", + "@id": "_:Nf3c8a77cca9e4807a8b016f8b94eae64", "@type": "https://ontology.unifiedcyberontology.org/uco/observable/ContentDataFacet", "https://ontology.unifiedcyberontology.org/uco/observable/hash": [ { - "@id": "_:N4074d88aa360403285b8cda26cfc74f2" + "@id": "_:N32345c071afb41eb8e4ac36497356af8" }, { - "@id": "_:N801158ea2716484791001371f60d5b60" + "@id": "_:N8f2e8439e67c4ae4b5d6dcf7947cf57a" }, { - "@id": "_:N550b15454d234888abb56ef4a1eb62a6" + "@id": "_:N94114772034d4c7f9ea8a7ce77a3b848" }, { - "@id": "_:N4a33b783148c43bd9bda1c04c581ea7c" + "@id": "_:Ndc9c78f6e42e4c96a9015ba58f3e8b3b" } ], "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 }, { - "@id": "_:N4074d88aa360403285b8cda26cfc74f2", + "@id": "_:N32345c071afb41eb8e4ac36497356af8", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", @@ -88,7 +88,7 @@ } }, { - "@id": "_:N801158ea2716484791001371f60d5b60", + "@id": "_:N8f2e8439e67c4ae4b5d6dcf7947cf57a", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", @@ -100,7 +100,7 @@ } }, { - "@id": "_:N550b15454d234888abb56ef4a1eb62a6", + "@id": "_:N94114772034d4c7f9ea8a7ce77a3b848", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", @@ -112,7 +112,7 @@ } }, { - "@id": "_:N4a33b783148c43bd9bda1c04c581ea7c", + "@id": "_:Ndc9c78f6e42e4c96a9015ba58f3e8b3b", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", diff --git a/tests/case_utils/case_file/sample.txt.json b/tests/case_utils/case_file/sample.txt.json index b656d00..2539324 100644 --- a/tests/case_utils/case_file/sample.txt.json +++ b/tests/case_utils/case_file/sample.txt.json @@ -35,19 +35,19 @@ }, "@graph": [ { - "@id": "kb:file-44aa82fe-a733-5892-8daa-7a28378d6afc", + "@id": "kb:file-789a91ef-6446-548c-9911-dcc5168f25ea", "@type": "uco-observable:File", "uco-core:hasFacet": [ { - "@id": "_:N86dc036c736c420684afd0e2cdea35e8" + "@id": "_:N9cb3cc2ab59546a3a8409b0a43beb4c3" }, { - "@id": "_:N30cc9a3cb8d741c3896d6ff81ae053f6" + "@id": "_:N34067c06b9364c50870f642861cac333" } ] }, { - "@id": "_:N86dc036c736c420684afd0e2cdea35e8", + "@id": "_:N9cb3cc2ab59546a3a8409b0a43beb4c3", "@type": "uco-observable:FileFacet", "uco-observable:fileName": "sample.txt", "uco-observable:modifiedTime": { @@ -57,26 +57,26 @@ "uco-observable:sizeInBytes": 4 }, { - "@id": "_:N30cc9a3cb8d741c3896d6ff81ae053f6", + "@id": "_:N34067c06b9364c50870f642861cac333", "@type": "uco-observable:ContentDataFacet", "uco-observable:hash": [ { - "@id": "_:Ncdaad817bed0440cb8df78ea13bca979" + "@id": "_:N8536d3196ade4a33898077091ffe7b37" }, { - "@id": "_:Nb29e43af29e74e30af823ba2ac7fc826" + "@id": "_:Nee102c2e670c450b9c7bfe532916631a" }, { - "@id": "_:Nad4c18c5ea3a45589ee33d98134f4f6e" + "@id": "_:N874bad97d6e94943a23d04234517307f" }, { - "@id": "_:N3a34ed3719864b4fbd9e6da88ae97d54" + "@id": "_:Nb1f22efd766f43768fc83a2183df2396" } ], "uco-observable:sizeInBytes": 4 }, { - "@id": "_:Ncdaad817bed0440cb8df78ea13bca979", + "@id": "_:N8536d3196ade4a33898077091ffe7b37", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -88,7 +88,7 @@ } }, { - "@id": "_:Nb29e43af29e74e30af823ba2ac7fc826", + "@id": "_:Nee102c2e670c450b9c7bfe532916631a", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -100,7 +100,7 @@ } }, { - "@id": "_:Nad4c18c5ea3a45589ee33d98134f4f6e", + "@id": "_:N874bad97d6e94943a23d04234517307f", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -112,7 +112,7 @@ } }, { - "@id": "_:N3a34ed3719864b4fbd9e6da88ae97d54", + "@id": "_:Nb1f22efd766f43768fc83a2183df2396", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", diff --git a/tests/case_utils/case_file/sample.txt.ttl b/tests/case_utils/case_file/sample.txt.ttl index 6327640..3bcac8f 100644 --- a/tests/case_utils/case_file/sample.txt.ttl +++ b/tests/case_utils/case_file/sample.txt.ttl @@ -8,7 +8,7 @@ @prefix uco-vocabulary: . @prefix xsd: . -kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb +kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04 a uco-observable:File ; uco-core:hasFacet [ diff --git a/tests/case_utils/test_local_uuid.py b/tests/case_utils/test_local_uuid.py new file mode 100644 index 0000000..c60ed09 --- /dev/null +++ b/tests/case_utils/test_local_uuid.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +import os + +import pytest + +import case_utils.local_uuid + + +def test_local_uuid_deprecation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("DEMO_UUID_REQUESTING_NONRANDOM", "NONRANDOM_REQUESTED") + with pytest.warns(DeprecationWarning): + case_utils.local_uuid.configure() + + +def test_local_uuid_nondirectory(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("CASE_DEMO_NONRANDOM_UUID_BASE", "/dev/null") + with pytest.warns(UserWarning): + case_utils.local_uuid.configure() + + +def test_local_uuid_nonexistent(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("CASE_DEMO_NONRANDOM_UUID_BASE", "/dev/nonexistent") + with pytest.warns(UserWarning): + case_utils.local_uuid.configure()