Skip to content

Commit 68687c2

Browse files
committed
Update local_uuid configuration, deprecating prior variable
`case_utils.local_uuid` provides a deterministic UUIDv5 generator based on the execution environment. Unfortunately, its prior implementation was not behaving correctly when changing environments - a different operating system, or working in a different rooted directory, would cause UUID churn. The issue turned out to be paths embedding in two manners: 1. The prior implementation assumed work happening somewhere under a user-home directory. This is not always true. 2. The prior implementation did not catch that for a program `pip`-installed into a virtual environment in editable mode (or, possibly more generally than that), `sys.argv[0]` is the absolute path to the installed command in the virtual environment root. Hence, changing operating systems from macOS to Linux would change `/Users/me` to `/home/me` within the UUID seeding data. This patch revises the practice from functionally operating in a Boolean manner ("Was this special string passed?") to passing an anchoring directory, preferably the repository's Git-root directory. Usage of the prior variable `DEMO_UUID_REQUESTING_NONRANDOM` is now deprecated, raising a warning. A test is also added to confirm new warnings are raised. A follow-on patch will regenerate Make-managed files. Signed-off-by: Alex Nelson <alexander.nelson@nist.gov>
1 parent b929496 commit 68687c2

File tree

3 files changed

+127
-24
lines changed

3 files changed

+127
-24
lines changed

case_utils/local_uuid.py

Lines changed: 87 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,91 @@
1717

1818
__version__ = "0.2.0"
1919

20+
import logging
2021
import os
22+
import pathlib
2123
import sys
24+
import typing
25+
import warnings
2226
import uuid
2327

24-
USE_DEMO_UUID: bool = False
28+
DEMO_UUID_BASE: typing.Optional[str] = None
2529

2630
DEMO_UUID_COUNTER: int = 0
2731

32+
_logger = logging.getLogger(pathlib.Path(__file__).name)
33+
2834

2935
def configure() -> None:
30-
global USE_DEMO_UUID
36+
global DEMO_UUID_BASE
3137

3238
if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") == "NONRANDOM_REQUESTED":
33-
USE_DEMO_UUID = True
39+
warnings.warn(
40+
"Environment variable DEMO_UUID_REQUESTING_NONRANDOM is deprecated. See case_utils.local_uuid.demo_uuid for usage notes on its replacement, CASE_DEMO_NONRANDOM_UUID_BASE. Proceeding with random UUIDs.",
41+
DeprecationWarning,
42+
)
43+
return
44+
45+
env_base_dir_name = os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE")
46+
if env_base_dir_name is None:
47+
return
48+
49+
base_dir_original_path = pathlib.Path(env_base_dir_name)
50+
if not base_dir_original_path.exists():
51+
warnings.warn(
52+
"Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to an existing directory. Proceeding with random UUIDs."
53+
)
54+
return
55+
if not base_dir_original_path.is_dir():
56+
warnings.warn(
57+
"Environment variable CASE_DEMO_NONRANDOM_UUID_BASE is expected to refer to a directory. Proceeding with random UUIDs."
58+
)
59+
return
60+
61+
# Component: An emphasis this is an example.
62+
demo_uuid_base_parts = ["example.org"]
63+
64+
# Component: Present working directory, relative to CASE_DEMO_NONRANDOM_UUID_BASE if that environment variable is an ancestor of pwd.
65+
base_dir_resolved_path = base_dir_original_path.resolve()
66+
srcdir_original_path = pathlib.Path(os.getcwd())
67+
srcdir_resolved_path = srcdir_original_path.resolve()
68+
# _logger.debug("base_dir_resolved_path = %r.", base_dir_resolved_path)
69+
# _logger.debug("srcdir_resolved_path = %r.", srcdir_resolved_path)
70+
try:
71+
srcdir_relative_path = srcdir_resolved_path.relative_to(base_dir_resolved_path)
72+
# _logger.debug("srcdir_relative_path = %r.", srcdir_relative_path)
73+
demo_uuid_base_parts.append(str(srcdir_relative_path))
74+
except ValueError:
75+
# If base_dir is not an ancestor directory of srcdir, default to srcdir.
76+
# _logger.debug("PWD is not relative to base path.")
77+
demo_uuid_base_parts.append(str(srcdir_resolved_path))
78+
79+
# Component: Command of argument vector.
80+
env_venv_name = os.getenv("VIRTUAL_ENV")
81+
if env_venv_name is None:
82+
demo_uuid_base_parts.append(sys.argv[0])
83+
else:
84+
command_original_path = pathlib.Path(sys.argv[0])
85+
command_resolved_path = command_original_path.resolve()
86+
venv_original_path = pathlib.Path(env_venv_name)
87+
venv_resolved_path = venv_original_path.resolve()
88+
try:
89+
command_relative_path = command_resolved_path.relative_to(
90+
venv_resolved_path
91+
)
92+
# _logger.debug("command_relative_path = %r.", command_relative_path)
93+
demo_uuid_base_parts.append(str(command_relative_path))
94+
except ValueError:
95+
# _logger.debug("Command path is not relative to virtual environment path.")
96+
demo_uuid_base_parts.append(str(command_resolved_path))
97+
98+
if len(sys.argv) > 1:
99+
# Component: Arguments of argument vector.
100+
demo_uuid_base_parts.extend(sys.argv[1:])
101+
102+
# _logger.debug("demo_uuid_base_parts = %r.", demo_uuid_base_parts)
103+
104+
DEMO_UUID_BASE = "/".join(demo_uuid_base_parts)
34105

35106

36107
def demo_uuid() -> str:
@@ -39,38 +110,34 @@ def demo_uuid() -> str:
39110
40111
WARNING: This function was developed for use ONLY for reducing (but not eliminating) version-control edits to identifiers in sample data. It creates UUIDs that are decidedly NOT random, and should remain consistent on repeated calls to the importing script.
41112
42-
To prevent accidental non-random UUID usage, an environment variable must be set to an uncommon string, hard-coded in this function.
113+
To prevent accidental non-random UUID usage, an environment variable must be set to a string provided by the caller. The variable's required value is the path to some directory. The variable's recommended value is the equivalent of the Make variable "top_srcdir" - that is, the root directory of the containing Git repository, some parent of the current process's current working directory.
43114
"""
115+
global DEMO_UUID_BASE
44116
global DEMO_UUID_COUNTER
45117

46-
if os.getenv("DEMO_UUID_REQUESTING_NONRANDOM") != "NONRANDOM_REQUESTED":
47-
raise EnvironmentError(
48-
"demo_uuid() called without DEMO_UUID_REQUESTING_NONRANDOM in environment."
118+
if os.getenv("CASE_DEMO_NONRANDOM_UUID_BASE") is None:
119+
raise ValueError(
120+
"demo_uuid() called without CASE_DEMO_NONRANDOM_UUID_BASE in environment."
49121
)
50122

51-
# Component: An emphasis this is an example.
52-
parts = ["example.org"]
123+
if DEMO_UUID_BASE is None:
124+
raise ValueError("demo_uuid() called with DEMO_UUID_BASE unset.")
125+
126+
parts = [DEMO_UUID_BASE]
53127

54128
# Component: Incrementing counter.
55129
DEMO_UUID_COUNTER += 1
56130
parts.append(str(DEMO_UUID_COUNTER))
57131

58-
# Component: Present working directory, replacing $HOME with '~'.
59-
env_HOME: str = os.getenv("HOME", "/nonexistent")
60-
parts.append(os.getcwd().replace(env_HOME, "~"))
61-
62-
# Component: Argument vector.
63-
parts.extend(sys.argv)
64-
65132
return str(uuid.uuid5(uuid.NAMESPACE_URL, "/".join(parts)))
66133

67134

68135
def local_uuid() -> str:
69136
"""
70137
Generate either a UUID4, or if requested via environment configuration, a non-random demo UUID.
71138
"""
72-
global USE_DEMO_UUID
73-
if USE_DEMO_UUID:
74-
return demo_uuid()
75-
else:
139+
global DEMO_UUID_BASE
140+
if DEMO_UUID_BASE is None:
76141
return str(uuid.uuid4())
142+
else:
143+
return demo_uuid()

tests/case_utils/case_file/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ sample.txt.json: \
114114
$(top_srcdir)/case_utils/namespace.py \
115115
sample.txt-nocompact.json
116116
rm -f $@ _$@ __$@
117-
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
117+
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
118118
&& source $(tests_srcdir)/venv/bin/activate \
119119
&& case_file \
120120
--debug \
@@ -147,7 +147,7 @@ sample.txt.ttl: \
147147
$(top_srcdir)/case_utils/namespace.py \
148148
sample.txt.done.log
149149
rm -f _$@ __$@
150-
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
150+
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
151151
&& source $(tests_srcdir)/venv/bin/activate \
152152
&& case_file \
153153
--debug \
@@ -171,7 +171,7 @@ sample.txt-disable_hashes.ttl: \
171171
$(top_srcdir)/case_utils/namespace.py \
172172
sample.txt.done.log
173173
rm -f _$@ __$@
174-
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
174+
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
175175
&& source $(tests_srcdir)/venv/bin/activate \
176176
&& case_file \
177177
--debug \
@@ -197,7 +197,7 @@ sample.txt-nocompact.json: \
197197
$(top_srcdir)/case_utils/namespace.py \
198198
sample.txt.done.log
199199
rm -f _$@
200-
export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \
200+
export CASE_DEMO_NONRANDOM_UUID_BASE="$(top_srcdir)" \
201201
&& source $(tests_srcdir)/venv/bin/activate \
202202
&& case_file \
203203
--debug \

tests/case_utils/test_local_uuid.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python3
2+
3+
# This software was developed at the National Institute of Standards
4+
# and Technology by employees of the Federal Government in the course
5+
# of their official duties. Pursuant to title 17 Section 105 of the
6+
# United States Code this software is not subject to copyright
7+
# protection and is in the public domain. NIST assumes no
8+
# responsibility whatsoever for its use by other parties, and makes
9+
# no guarantees, expressed or implied, about its quality,
10+
# reliability, or any other characteristic.
11+
#
12+
# We would appreciate acknowledgement if the software is used.
13+
14+
import os
15+
16+
import pytest
17+
18+
import case_utils.local_uuid
19+
20+
21+
def test_local_uuid_deprecation(monkeypatch: pytest.MonkeyPatch) -> None:
22+
monkeypatch.setenv("DEMO_UUID_REQUESTING_NONRANDOM", "NONRANDOM_REQUESTED")
23+
with pytest.warns(DeprecationWarning):
24+
case_utils.local_uuid.configure()
25+
26+
27+
def test_local_uuid_nondirectory(monkeypatch: pytest.MonkeyPatch) -> None:
28+
monkeypatch.setenv("CASE_DEMO_NONRANDOM_UUID_BASE", "/dev/null")
29+
with pytest.warns(UserWarning):
30+
case_utils.local_uuid.configure()
31+
32+
33+
def test_local_uuid_nonexistent(monkeypatch: pytest.MonkeyPatch) -> None:
34+
monkeypatch.setenv("CASE_DEMO_NONRANDOM_UUID_BASE", "/dev/nonexistent")
35+
with pytest.warns(UserWarning):
36+
case_utils.local_uuid.configure()

0 commit comments

Comments
 (0)