Skip to content

Change local_uuid setup parameter #49

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions case_utils/case_file/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@

__version__ = "0.3.0"

import argparse
import datetime
import hashlib
import logging
import os
import typing
import warnings
Expand Down Expand Up @@ -207,10 +209,9 @@ def create_file_node(


def main() -> None:
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--base-prefix", default=DEFAULT_PREFIX)
parser.add_argument("--debug", action="store_true")
parser.add_argument("--disable-hashes", action="store_true")
parser.add_argument("--disable-mtime", action="store_true")
parser.add_argument(
Expand All @@ -220,6 +221,8 @@ def main() -> None:
parser.add_argument("in_file")
args = parser.parse_args()

logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

case_utils.local_uuid.configure()

NS_BASE = rdflib.Namespace(args.base_prefix)
Expand Down
107 changes: 87 additions & 20 deletions case_utils/local_uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,91 @@

__version__ = "0.2.0"

import logging
import os
import pathlib
import sys
import typing
import warnings
import uuid

USE_DEMO_UUID: bool = False
DEMO_UUID_BASE: typing.Optional[str] = None

DEMO_UUID_COUNTER: int = 0

_logger = logging.getLogger(pathlib.Path(__file__).name)


def configure() -> None:
global USE_DEMO_UUID
global DEMO_UUID_BASE

if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") == "NONRANDOM_REQUESTED":
USE_DEMO_UUID = True
warnings.warn(
"Environment variable DEMO_UUID_REQUESTING_NONRANDOM is deprecated. See case_utils.local_uuid.demo_uuid for usage notes on its replacement, CASE_DEMO_NONRANDOM_UUID_BASE. Proceeding with random UUIDs.",
DeprecationWarning,
)
return

env_base_dir_name = os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE")
if env_base_dir_name is None:
return

base_dir_original_path = pathlib.Path(env_base_dir_name)
if not base_dir_original_path.exists():
warnings.warn(
"Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to an existing directory. Proceeding with random UUIDs."
)
return
if not base_dir_original_path.is_dir():
warnings.warn(
"Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to a directory. Proceeding with random UUIDs."
)
return

# Component: An emphasis this is an example.
demo_uuid_base_parts = ["example.org"]

# Component: Present working directory, relative to CASE_DEMO_NONRANDOM_UUID_BASE if that environment variable is an ancestor of pwd.
base_dir_resolved_path = base_dir_original_path.resolve()
srcdir_original_path = pathlib.Path(os.getcwd())
srcdir_resolved_path = srcdir_original_path.resolve()
# _logger.debug("base_dir_resolved_path = %r.", base_dir_resolved_path)
# _logger.debug("srcdir_resolved_path = %r.", srcdir_resolved_path)
try:
srcdir_relative_path = srcdir_resolved_path.relative_to(base_dir_resolved_path)
# _logger.debug("srcdir_relative_path = %r.", srcdir_relative_path)
demo_uuid_base_parts.append(str(srcdir_relative_path))
except ValueError:
# If base_dir is not an ancestor directory of srcdir, default to srcdir.
# _logger.debug("PWD is not relative to base path.")
demo_uuid_base_parts.append(str(srcdir_resolved_path))

# Component: Command of argument vector.
env_venv_name = os.getenv("VIRTUAL_ENV")
if env_venv_name is None:
demo_uuid_base_parts.append(sys.argv[0])
else:
command_original_path = pathlib.Path(sys.argv[0])
command_resolved_path = command_original_path.resolve()
venv_original_path = pathlib.Path(env_venv_name)
venv_resolved_path = venv_original_path.resolve()
try:
command_relative_path = command_resolved_path.relative_to(
venv_resolved_path
)
# _logger.debug("command_relative_path = %r.", command_relative_path)
demo_uuid_base_parts.append(str(command_relative_path))
except ValueError:
# _logger.debug("Command path is not relative to virtual environment path.")
demo_uuid_base_parts.append(str(command_resolved_path))

if len(sys.argv) > 1:
# Component: Arguments of argument vector.
demo_uuid_base_parts.extend(sys.argv[1:])

# _logger.debug("demo_uuid_base_parts = %r.", demo_uuid_base_parts)

DEMO_UUID_BASE = "/".join(demo_uuid_base_parts)


def demo_uuid() -> str:
Expand All @@ -39,38 +110,34 @@ def demo_uuid() -> str:

WARNING: This function was developed for use ONLY for reducing (but not eliminating) version-control edits to identifiers in sample data. It creates UUIDs that are decidedly NOT random, and should remain consistent on repeated calls to the importing script.

To prevent accidental non-random UUID usage, an environment variable must be set to an uncommon string, hard-coded in this function.
To prevent accidental non-random UUID usage, an environment variable must be set to a string provided by the caller. The variable's required value is the path to some directory. The variable's recommended value is the equivalent of the Make variable "top_srcdir" - that is, the root directory of the containing Git repository, some parent of the current process's current working directory.
"""
global DEMO_UUID_BASE
global DEMO_UUID_COUNTER

if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") != "NONRANDOM_REQUESTED":
raise EnvironmentError(
"demo_uuid() called without DEMO_UUID_REQUESTING_NONRANDOM in environment."
if os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE") is None:
raise ValueError(
"demo_uuid() called without CASE_DEMO_NONRANDOM_UUID_BASE in environment."
)

# Component: An emphasis this is an example.
parts = ["example.org"]
if DEMO_UUID_BASE is None:
raise ValueError("demo_uuid() called with DEMO_UUID_BASE unset.")

parts = [DEMO_UUID_BASE]

# Component: Incrementing counter.
DEMO_UUID_COUNTER += 1
parts.append(str(DEMO_UUID_COUNTER))

# Component: Present working directory, replacing $HOME with '~'.
env_HOME: str = os.getenv("HOME", "/nonexistent")
parts.append(os.getcwd().replace(env_HOME, "~"))

# Component: Argument vector.
parts.extend(sys.argv)

return str(uuid.uuid5(uuid.NAMESPACE_URL, "/".join(parts)))


def local_uuid() -> str:
"""
Generate either a UUID4, or if requested via environment configuration, a non-random demo UUID.
"""
global USE_DEMO_UUID
if USE_DEMO_UUID:
return demo_uuid()
else:
global DEMO_UUID_BASE
if DEMO_UUID_BASE is None:
return str(uuid.uuid4())
else:
return demo_uuid()
12 changes: 8 additions & 4 deletions tests/case_utils/case_file/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,10 @@ sample.txt.json: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt-nocompact.json
rm -f $@ _$@ __$@
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
--debug \
__$@ \
sample.txt
source $(tests_srcdir)/venv/bin/activate \
Expand Down Expand Up @@ -146,9 +147,10 @@ sample.txt.ttl: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt.done.log
rm -f _$@ __$@
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
--debug \
__$@ \
sample.txt
java -jar $(RDF_TOOLKIT_JAR) \
Expand All @@ -169,9 +171,10 @@ sample.txt-disable_hashes.ttl: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt.done.log
rm -f _$@ __$@
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
--debug \
--disable-hashes \
__$@ \
sample.txt
Expand All @@ -194,9 +197,10 @@ sample.txt-nocompact.json: \
$(top_srcdir)/case_utils/namespace.py \
sample.txt.done.log
rm -f _$@
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
&& source $(tests_srcdir)/venv/bin/activate \
&& case_file \
--debug \
_$@ \
sample.txt
# To avoid making noisy, uninformative updates from blank node identifiers, only move the new file into place if it is not isomorphic with the Git-tracked version of the target.
Expand Down
34 changes: 17 additions & 17 deletions tests/case_utils/case_file/kb.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,23 @@
},
"@graph": [
{
"@id": "kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb",
"@id": "kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9",
"@type": "uco-observable:File",
"uco-core:hasFacet": {
"@type": "uco-observable:FileFacet",
"uco-observable:fileName": "sample.txt",
"uco-observable:modifiedTime": {
"@type": "xsd:dateTime",
"@value": "2010-01-02T03:04:56+00:00"
},
"uco-observable:sizeInBytes": {
"@type": "xsd:integer",
"@value": "4"
}
}
},
{
"@id": "kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04",
"@type": "uco-observable:File",
"uco-core:hasFacet": [
{
Expand Down Expand Up @@ -78,22 +94,6 @@
}
}
]
},
{
"@id": "kb:file-b5e8a943-c556-5964-a618-8f0d000822af",
"@type": "uco-observable:File",
"uco-core:hasFacet": {
"@type": "uco-observable:FileFacet",
"uco-observable:fileName": "sample.txt",
"uco-observable:modifiedTime": {
"@type": "xsd:dateTime",
"@value": "2010-01-02T03:04:56+00:00"
},
"uco-observable:sizeInBytes": {
"@type": "xsd:integer",
"@value": "4"
}
}
}
]
}
22 changes: 11 additions & 11 deletions tests/case_utils/case_file/kb.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@
@prefix uco-vocabulary: <https://ontology.unifiedcyberontology.org/uco/vocabulary/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb
kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9
a uco-observable:File ;
uco-core:hasFacet [
a uco-observable:FileFacet ;
uco-observable:fileName "sample.txt" ;
uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ;
uco-observable:sizeInBytes "4"^^xsd:integer ;
] ;
.

kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04
a uco-observable:File ;
uco-core:hasFacet
[
Expand Down Expand Up @@ -46,13 +56,3 @@ kb:file-1080c4cc-7886-5a52-bac1-f6a2b16c0ddb
;
.

kb:file-b5e8a943-c556-5964-a618-8f0d000822af
a uco-observable:File ;
uco-core:hasFacet [
a uco-observable:FileFacet ;
uco-observable:fileName "sample.txt" ;
uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ;
uco-observable:sizeInBytes "4"^^xsd:integer ;
] ;
.

2 changes: 1 addition & 1 deletion tests/case_utils/case_file/sample.txt-disable_hashes.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@prefix uco-observable: <https://ontology.unifiedcyberontology.org/uco/observable/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

kb:file-b5e8a943-c556-5964-a618-8f0d000822af
kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9
a uco-observable:File ;
uco-core:hasFacet [
a uco-observable:FileFacet ;
Expand Down
Loading