diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py
index 316ed3f..e494514 100644
--- a/case_utils/case_file/__init__.py
+++ b/case_utils/case_file/__init__.py
@@ -17,8 +17,10 @@
__version__ = "0.3.0"
+import argparse
import datetime
import hashlib
+import logging
import os
import typing
import warnings
@@ -207,10 +209,9 @@ def create_file_node(
def main() -> None:
- import argparse
-
parser = argparse.ArgumentParser()
parser.add_argument("--base-prefix", default=DEFAULT_PREFIX)
+ parser.add_argument("--debug", action="store_true")
parser.add_argument("--disable-hashes", action="store_true")
parser.add_argument("--disable-mtime", action="store_true")
parser.add_argument(
@@ -220,6 +221,8 @@ def main() -> None:
parser.add_argument("in_file")
args = parser.parse_args()
+ logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+
case_utils.local_uuid.configure()
NS_BASE = rdflib.Namespace(args.base_prefix)
diff --git a/case_utils/local_uuid.py b/case_utils/local_uuid.py
index 7e526c0..b3b77ff 100644
--- a/case_utils/local_uuid.py
+++ b/case_utils/local_uuid.py
@@ -17,20 +17,91 @@
__version__ = "0.2.0"
+import logging
import os
+import pathlib
import sys
+import typing
+import warnings
import uuid
-USE_DEMO_UUID: bool = False
+DEMO_UUID_BASE: typing.Optional[str] = None
DEMO_UUID_COUNTER: int = 0
+_logger = logging.getLogger(pathlib.Path(__file__).name)
+
def configure() -> None:
- global USE_DEMO_UUID
+ global DEMO_UUID_BASE
if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") == "NONRANDOM_REQUESTED":
- USE_DEMO_UUID = True
+ warnings.warn(
+ "Environment variable DEMO_UUID_REQUESTING_NONRANDOM is deprecated. See case_utils.local_uuid.demo_uuid for usage notes on its replacement, CASE_DEMO_NONRANDOM_UUID_BASE. Proceeding with random UUIDs.",
+ DeprecationWarning,
+ )
+ return
+
+ env_base_dir_name = os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE")
+ if env_base_dir_name is None:
+ return
+
+ base_dir_original_path = pathlib.Path(env_base_dir_name)
+ if not base_dir_original_path.exists():
+ warnings.warn(
+ "Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to an existing directory. Proceeding with random UUIDs."
+ )
+ return
+ if not base_dir_original_path.is_dir():
+ warnings.warn(
+ "Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to a directory. Proceeding with random UUIDs."
+ )
+ return
+
+ # Component: An emphasis this is an example.
+ demo_uuid_base_parts = ["example.org"]
+
+ # Component: Present working directory, relative to CASE_DEMO_NONRANDOM_UUID_BASE if that environment variable is an ancestor of pwd.
+ base_dir_resolved_path = base_dir_original_path.resolve()
+ srcdir_original_path = pathlib.Path(os.getcwd())
+ srcdir_resolved_path = srcdir_original_path.resolve()
+ # _logger.debug("base_dir_resolved_path = %r.", base_dir_resolved_path)
+ # _logger.debug("srcdir_resolved_path = %r.", srcdir_resolved_path)
+ try:
+ srcdir_relative_path = srcdir_resolved_path.relative_to(base_dir_resolved_path)
+ # _logger.debug("srcdir_relative_path = %r.", srcdir_relative_path)
+ demo_uuid_base_parts.append(str(srcdir_relative_path))
+ except ValueError:
+ # If base_dir is not an ancestor directory of srcdir, default to srcdir.
+ # _logger.debug("PWD is not relative to base path.")
+ demo_uuid_base_parts.append(str(srcdir_resolved_path))
+
+ # Component: Command of argument vector.
+ env_venv_name = os.getenv("VIRTUAL_ENV")
+ if env_venv_name is None:
+ demo_uuid_base_parts.append(sys.argv[0])
+ else:
+ command_original_path = pathlib.Path(sys.argv[0])
+ command_resolved_path = command_original_path.resolve()
+ venv_original_path = pathlib.Path(env_venv_name)
+ venv_resolved_path = venv_original_path.resolve()
+ try:
+ command_relative_path = command_resolved_path.relative_to(
+ venv_resolved_path
+ )
+ # _logger.debug("command_relative_path = %r.", command_relative_path)
+ demo_uuid_base_parts.append(str(command_relative_path))
+ except ValueError:
+ # _logger.debug("Command path is not relative to virtual environment path.")
+ demo_uuid_base_parts.append(str(command_resolved_path))
+
+ if len(sys.argv) > 1:
+ # Component: Arguments of argument vector.
+ demo_uuid_base_parts.extend(sys.argv[1:])
+
+ # _logger.debug("demo_uuid_base_parts = %r.", demo_uuid_base_parts)
+
+ DEMO_UUID_BASE = "/".join(demo_uuid_base_parts)
def demo_uuid() -> str:
@@ -39,29 +110,25 @@ def demo_uuid() -> str:
WARNING: This function was developed for use ONLY for reducing (but not eliminating) version-control edits to identifiers in sample data. It creates UUIDs that are decidedly NOT random, and should remain consistent on repeated calls to the importing script.
- To prevent accidental non-random UUID usage, an environment variable must be set to an uncommon string, hard-coded in this function.
+ To prevent accidental non-random UUID usage, an environment variable must be set to a string provided by the caller. The variable's required value is the path to some directory. The variable's recommended value is the equivalent of the Make variable "top_srcdir" - that is, the root directory of the containing Git repository, some parent of the current process's current working directory.
"""
+ global DEMO_UUID_BASE
global DEMO_UUID_COUNTER
- if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") != "NONRANDOM_REQUESTED":
- raise EnvironmentError(
- "demo_uuid() called without DEMO_UUID_REQUESTING_NONRANDOM in environment."
+ if os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE") is None:
+ raise ValueError(
+ "demo_uuid() called without CASE_DEMO_NONRANDOM_UUID_BASE in environment."
)
- # Component: An emphasis this is an example.
- parts = ["example.org"]
+ if DEMO_UUID_BASE is None:
+ raise ValueError("demo_uuid() called with DEMO_UUID_BASE unset.")
+
+ parts = [DEMO_UUID_BASE]
# Component: Incrementing counter.
DEMO_UUID_COUNTER += 1
parts.append(str(DEMO_UUID_COUNTER))
- # Component: Present working directory, replacing $HOME with '~'.
- env_HOME: str = os.getenv("HOME", "/nonexistent")
- parts.append(os.getcwd().replace(env_HOME, "~"))
-
- # Component: Argument vector.
- parts.extend(sys.argv)
-
return str(uuid.uuid5(uuid.NAMESPACE_URL, "/".join(parts)))
@@ -69,8 +136,8 @@ def local_uuid() -> str:
"""
Generate either a UUID4, or if requested via environment configuration, a non-random demo UUID.
"""
- global USE_DEMO_UUID
- if USE_DEMO_UUID:
- return demo_uuid()
- else:
+ global DEMO_UUID_BASE
+ if DEMO_UUID_BASE is None:
return str(uuid.uuid4())
+ else:
+ return demo_uuid()
diff --git a/tests/case_utils/case_file/Makefile b/tests/case_utils/case_file/Makefile
index 28d7e62..0ce744a 100644
--- a/tests/case_utils/case_file/Makefile
+++ b/tests/case_utils/case_file/Makefile
@@ -114,9 +114,10 @@ sample.txt.json: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt-nocompact.json
rm -f $@ _$@ __$@
- export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
+ export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
+ --debug \
__$@ \
sample.txt
source $(tests_srcdir)/venv/bin/activate \
@@ -146,9 +147,10 @@ sample.txt.ttl: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt.done.log
rm -f _$@ __$@
- export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
+ export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
+ --debug \
__$@ \
sample.txt
java -jar $(RDF_TOOLKIT_JAR) \
@@ -169,9 +171,10 @@ sample.txt-disable_hashes.ttl: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt.done.log
rm -f _$@ __$@
- export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
+ export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
+ --debug \
--disable-hashes \
__$@ \
sample.txt
@@ -194,9 +197,10 @@ sample.txt-nocompact.json: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt.done.log
rm -f _$@
- export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
+ export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
+ --debug \
_$@ \
sample.txt
# To avoid making noisy, uninformative updates from blank node identifiers, only move the new file into place if it is not isomorphic with the Git-tracked version of the target.
diff --git a/tests/case_utils/case_file/kb.json b/tests/case_utils/case_file/kb.json
index 5b7100d..b968377 100644
--- a/tests/case_utils/case_file/kb.json
+++ b/tests/case_utils/case_file/kb.json
@@ -9,7 +9,23 @@
},
"@graph": [
{
- "@id": "kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb",
+ "@id": "kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9",
+ "@type": "uco-observable:File",
+ "uco-core:hasFacet": {
+ "@type": "uco-observable:FileFacet",
+ "uco-observable:fileName": "sample.txt",
+ "uco-observable:modifiedTime": {
+ "@type": "xsd:dateTime",
+ "@value": "2010-01-02T03:04:56+00:00"
+ },
+ "uco-observable:sizeInBytes": {
+ "@type": "xsd:integer",
+ "@value": "4"
+ }
+ }
+ },
+ {
+ "@id": "kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04",
"@type": "uco-observable:File",
"uco-core:hasFacet": [
{
@@ -78,22 +94,6 @@
}
}
]
- },
- {
- "@id": "kb:file-b5e8a943-c556-5964-a618-8f0d000822af",
- "@type": "uco-observable:File",
- "uco-core:hasFacet": {
- "@type": "uco-observable:FileFacet",
- "uco-observable:fileName": "sample.txt",
- "uco-observable:modifiedTime": {
- "@type": "xsd:dateTime",
- "@value": "2010-01-02T03:04:56+00:00"
- },
- "uco-observable:sizeInBytes": {
- "@type": "xsd:integer",
- "@value": "4"
- }
- }
}
]
}
\ No newline at end of file
diff --git a/tests/case_utils/case_file/kb.ttl b/tests/case_utils/case_file/kb.ttl
index 6241217..942249b 100644
--- a/tests/case_utils/case_file/kb.ttl
+++ b/tests/case_utils/case_file/kb.ttl
@@ -8,7 +8,17 @@
@prefix uco-vocabulary: .
@prefix xsd: .
-kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb
+kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9
+ a uco-observable:File ;
+ uco-core:hasFacet [
+ a uco-observable:FileFacet ;
+ uco-observable:fileName "sample.txt" ;
+ uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ;
+ uco-observable:sizeInBytes "4"^^xsd:integer ;
+ ] ;
+ .
+
+kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04
a uco-observable:File ;
uco-core:hasFacet
[
@@ -46,13 +56,3 @@ kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb
;
.
-kb:file-b5e8a943-c556-5964-a618-8f0d000822af
- a uco-observable:File ;
- uco-core:hasFacet [
- a uco-observable:FileFacet ;
- uco-observable:fileName "sample.txt" ;
- uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ;
- uco-observable:sizeInBytes "4"^^xsd:integer ;
- ] ;
- .
-
diff --git a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl
index 669aa51..edea5af 100644
--- a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl
+++ b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl
@@ -6,7 +6,7 @@
@prefix uco-observable: .
@prefix xsd: .
-kb:file-b5e8a943-c556-5964-a618-8f0d000822af
+kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9
a uco-observable:File ;
uco-core:hasFacet [
a uco-observable:FileFacet ;
diff --git a/tests/case_utils/case_file/sample.txt-nocompact.json b/tests/case_utils/case_file/sample.txt-nocompact.json
index e71f43a..e848ebf 100644
--- a/tests/case_utils/case_file/sample.txt-nocompact.json
+++ b/tests/case_utils/case_file/sample.txt-nocompact.json
@@ -35,19 +35,19 @@
},
"@graph": [
{
- "@id": "http://example.org/kb/file-2999db3b-5e66-53b2-8470-8bfd838c444e",
+ "@id": "http://example.org/kb/file-23f45d80-7b16-5e7f-ba34-40392fa4f8fc",
"@type": "https://ontology.unifiedcyberontology.org/uco/observable/File",
"https://ontology.unifiedcyberontology.org/uco/core/hasFacet": [
{
- "@id": "_:Nf16b91a8337843bfa9e99cbcceca4fea"
+ "@id": "_:N7b4f54f0f233497a8981441323dfdd27"
},
{
- "@id": "_:Ne9cc5032d4804fa19a3fe75c4966febf"
+ "@id": "_:Nf3c8a77cca9e4807a8b016f8b94eae64"
}
]
},
{
- "@id": "_:Nf16b91a8337843bfa9e99cbcceca4fea",
+ "@id": "_:N7b4f54f0f233497a8981441323dfdd27",
"@type": "https://ontology.unifiedcyberontology.org/uco/observable/FileFacet",
"https://ontology.unifiedcyberontology.org/uco/observable/fileName": "sample.txt",
"https://ontology.unifiedcyberontology.org/uco/observable/modifiedTime": {
@@ -57,26 +57,26 @@
"https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4
},
{
- "@id": "_:Ne9cc5032d4804fa19a3fe75c4966febf",
+ "@id": "_:Nf3c8a77cca9e4807a8b016f8b94eae64",
"@type": "https://ontology.unifiedcyberontology.org/uco/observable/ContentDataFacet",
"https://ontology.unifiedcyberontology.org/uco/observable/hash": [
{
- "@id": "_:N4074d88aa360403285b8cda26cfc74f2"
+ "@id": "_:N32345c071afb41eb8e4ac36497356af8"
},
{
- "@id": "_:N801158ea2716484791001371f60d5b60"
+ "@id": "_:N8f2e8439e67c4ae4b5d6dcf7947cf57a"
},
{
- "@id": "_:N550b15454d234888abb56ef4a1eb62a6"
+ "@id": "_:N94114772034d4c7f9ea8a7ce77a3b848"
},
{
- "@id": "_:N4a33b783148c43bd9bda1c04c581ea7c"
+ "@id": "_:Ndc9c78f6e42e4c96a9015ba58f3e8b3b"
}
],
"https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4
},
{
- "@id": "_:N4074d88aa360403285b8cda26cfc74f2",
+ "@id": "_:N32345c071afb41eb8e4ac36497356af8",
"@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash",
"https://ontology.unifiedcyberontology.org/uco/types/hashMethod": {
"@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab",
@@ -88,7 +88,7 @@
}
},
{
- "@id": "_:N801158ea2716484791001371f60d5b60",
+ "@id": "_:N8f2e8439e67c4ae4b5d6dcf7947cf57a",
"@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash",
"https://ontology.unifiedcyberontology.org/uco/types/hashMethod": {
"@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab",
@@ -100,7 +100,7 @@
}
},
{
- "@id": "_:N550b15454d234888abb56ef4a1eb62a6",
+ "@id": "_:N94114772034d4c7f9ea8a7ce77a3b848",
"@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash",
"https://ontology.unifiedcyberontology.org/uco/types/hashMethod": {
"@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab",
@@ -112,7 +112,7 @@
}
},
{
- "@id": "_:N4a33b783148c43bd9bda1c04c581ea7c",
+ "@id": "_:Ndc9c78f6e42e4c96a9015ba58f3e8b3b",
"@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash",
"https://ontology.unifiedcyberontology.org/uco/types/hashMethod": {
"@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab",
diff --git a/tests/case_utils/case_file/sample.txt.json b/tests/case_utils/case_file/sample.txt.json
index b656d00..2539324 100644
--- a/tests/case_utils/case_file/sample.txt.json
+++ b/tests/case_utils/case_file/sample.txt.json
@@ -35,19 +35,19 @@
},
"@graph": [
{
- "@id": "kb:file-44aa82fe-a733-5892-8daa-7a28378d6afc",
+ "@id": "kb:file-789a91ef-6446-548c-9911-dcc5168f25ea",
"@type": "uco-observable:File",
"uco-core:hasFacet": [
{
- "@id": "_:N86dc036c736c420684afd0e2cdea35e8"
+ "@id": "_:N9cb3cc2ab59546a3a8409b0a43beb4c3"
},
{
- "@id": "_:N30cc9a3cb8d741c3896d6ff81ae053f6"
+ "@id": "_:N34067c06b9364c50870f642861cac333"
}
]
},
{
- "@id": "_:N86dc036c736c420684afd0e2cdea35e8",
+ "@id": "_:N9cb3cc2ab59546a3a8409b0a43beb4c3",
"@type": "uco-observable:FileFacet",
"uco-observable:fileName": "sample.txt",
"uco-observable:modifiedTime": {
@@ -57,26 +57,26 @@
"uco-observable:sizeInBytes": 4
},
{
- "@id": "_:N30cc9a3cb8d741c3896d6ff81ae053f6",
+ "@id": "_:N34067c06b9364c50870f642861cac333",
"@type": "uco-observable:ContentDataFacet",
"uco-observable:hash": [
{
- "@id": "_:Ncdaad817bed0440cb8df78ea13bca979"
+ "@id": "_:N8536d3196ade4a33898077091ffe7b37"
},
{
- "@id": "_:Nb29e43af29e74e30af823ba2ac7fc826"
+ "@id": "_:Nee102c2e670c450b9c7bfe532916631a"
},
{
- "@id": "_:Nad4c18c5ea3a45589ee33d98134f4f6e"
+ "@id": "_:N874bad97d6e94943a23d04234517307f"
},
{
- "@id": "_:N3a34ed3719864b4fbd9e6da88ae97d54"
+ "@id": "_:Nb1f22efd766f43768fc83a2183df2396"
}
],
"uco-observable:sizeInBytes": 4
},
{
- "@id": "_:Ncdaad817bed0440cb8df78ea13bca979",
+ "@id": "_:N8536d3196ade4a33898077091ffe7b37",
"@type": "uco-types:Hash",
"uco-types:hashMethod": {
"@type": "uco-vocabulary:HashNameVocab",
@@ -88,7 +88,7 @@
}
},
{
- "@id": "_:Nb29e43af29e74e30af823ba2ac7fc826",
+ "@id": "_:Nee102c2e670c450b9c7bfe532916631a",
"@type": "uco-types:Hash",
"uco-types:hashMethod": {
"@type": "uco-vocabulary:HashNameVocab",
@@ -100,7 +100,7 @@
}
},
{
- "@id": "_:Nad4c18c5ea3a45589ee33d98134f4f6e",
+ "@id": "_:N874bad97d6e94943a23d04234517307f",
"@type": "uco-types:Hash",
"uco-types:hashMethod": {
"@type": "uco-vocabulary:HashNameVocab",
@@ -112,7 +112,7 @@
}
},
{
- "@id": "_:N3a34ed3719864b4fbd9e6da88ae97d54",
+ "@id": "_:Nb1f22efd766f43768fc83a2183df2396",
"@type": "uco-types:Hash",
"uco-types:hashMethod": {
"@type": "uco-vocabulary:HashNameVocab",
diff --git a/tests/case_utils/case_file/sample.txt.ttl b/tests/case_utils/case_file/sample.txt.ttl
index 6327640..3bcac8f 100644
--- a/tests/case_utils/case_file/sample.txt.ttl
+++ b/tests/case_utils/case_file/sample.txt.ttl
@@ -8,7 +8,7 @@
@prefix uco-vocabulary: .
@prefix xsd: .
-kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb
+kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04
a uco-observable:File ;
uco-core:hasFacet
[
diff --git a/tests/case_utils/test_local_uuid.py b/tests/case_utils/test_local_uuid.py
new file mode 100644
index 0000000..c60ed09
--- /dev/null
+++ b/tests/case_utils/test_local_uuid.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# This software was developed at the National Institute of Standards
+# and Technology by employees of the Federal Government in the course
+# of their official duties. Pursuant to title 17 Section 105 of the
+# United States Code this software is not subject to copyright
+# protection and is in the public domain. NIST assumes no
+# responsibility whatsoever for its use by other parties, and makes
+# no guarantees, expressed or implied, about its quality,
+# reliability, or any other characteristic.
+#
+# We would appreciate acknowledgement if the software is used.
+
+import os
+
+import pytest
+
+import case_utils.local_uuid
+
+
+def test_local_uuid_deprecation(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("DEMO_UUID_REQUESTING_NONRANDOM", "NONRANDOM_REQUESTED")
+ with pytest.warns(DeprecationWarning):
+ case_utils.local_uuid.configure()
+
+
+def test_local_uuid_nondirectory(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("CASE_DEMO_NONRANDOM_UUID_BASE", "/dev/null")
+ with pytest.warns(UserWarning):
+ case_utils.local_uuid.configure()
+
+
+def test_local_uuid_nonexistent(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("CASE_DEMO_NONRANDOM_UUID_BASE", "/dev/nonexistent")
+ with pytest.warns(UserWarning):
+ case_utils.local_uuid.configure()