Skip to content

Commit ccf99b2

Browse files
authored
Add env_nested_max_split setting (#534)
1 parent 65929cd commit ccf99b2

File tree

5 files changed

+117
-4
lines changed

5 files changed

+117
-4
lines changed

docs/index.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,58 @@ print(Settings().model_dump())
324324
`env_nested_delimiter` can be configured via the `model_config` as shown above, or via the
325325
`_env_nested_delimiter` keyword argument on instantiation.
326326

327+
By default environment variables are split by `env_nested_delimiter` into arbitrarily deep nested fields. You can limit
328+
the depth of the nested fields with the `env_nested_max_split` config setting. A common use case this is particularly useful
329+
is for two-level deep settings, where the `env_nested_delimiter` (usually a single `_`) may be a substring of model
330+
field names. For example:
331+
332+
```bash
333+
# your environment
334+
export GENERATION_LLM_PROVIDER='anthropic'
335+
export GENERATION_LLM_API_KEY='your-api-key'
336+
export GENERATION_LLM_API_VERSION='2024-03-15'
337+
```
338+
339+
You could load them into the following settings model:
340+
341+
```py
342+
from pydantic import BaseModel
343+
344+
from pydantic_settings import BaseSettings, SettingsConfigDict
345+
346+
347+
class LLMConfig(BaseModel):
348+
provider: str = 'openai'
349+
api_key: str
350+
api_type: str = 'azure'
351+
api_version: str = '2023-03-15-preview'
352+
353+
354+
class GenerationConfig(BaseSettings):
355+
model_config = SettingsConfigDict(
356+
env_nested_delimiter='_', env_nested_max_split=1, env_prefix='GENERATION_'
357+
)
358+
359+
llm: LLMConfig
360+
...
361+
362+
363+
print(GenerationConfig().model_dump())
364+
"""
365+
{
366+
'llm': {
367+
'provider': 'anthropic',
368+
'api_key': 'your-api-key',
369+
'api_type': 'azure',
370+
'api_version': '2024-03-15',
371+
}
372+
}
373+
"""
374+
```
375+
376+
Without `env_nested_max_split=1` set, `GENERATION_LLM_API_KEY` would be parsed as `llm.api.key` instead of `llm.api_key`
377+
and it would raise a `ValidationError`.
378+
327379
Nested environment variables take precedence over the top-level environment variable JSON
328380
(e.g. in the example above, `SUB_MODEL__V2` trumps `SUB_MODEL`).
329381

pydantic_settings/main.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class SettingsConfigDict(ConfigDict, total=False):
3838
env_file_encoding: str | None
3939
env_ignore_empty: bool
4040
env_nested_delimiter: str | None
41+
env_nested_max_split: int | None
4142
env_parse_none_str: str | None
4243
env_parse_enums: bool | None
4344
cli_prog_name: str | None
@@ -112,6 +113,7 @@ class BaseSettings(BaseModel):
112113
_env_file_encoding: The env file encoding, e.g. `'latin-1'`. Defaults to `None`.
113114
_env_ignore_empty: Ignore environment variables where the value is an empty string. Default to `False`.
114115
_env_nested_delimiter: The nested env values delimiter. Defaults to `None`.
116+
_env_nested_max_split: The nested env values maximum nesting. Defaults to `None`, which means no limit.
115117
_env_parse_none_str: The env string value that should be parsed (e.g. "null", "void", "None", etc.)
116118
into `None` type(None). Defaults to `None` type(None), which means no parsing should occur.
117119
_env_parse_enums: Parse enum field names to values. Defaults to `None.`, which means no parsing should occur.
@@ -148,6 +150,7 @@ def __init__(
148150
_env_file_encoding: str | None = None,
149151
_env_ignore_empty: bool | None = None,
150152
_env_nested_delimiter: str | None = None,
153+
_env_nested_max_split: int | None = None,
151154
_env_parse_none_str: str | None = None,
152155
_env_parse_enums: bool | None = None,
153156
_cli_prog_name: str | None = None,
@@ -178,6 +181,7 @@ def __init__(
178181
_env_file_encoding=_env_file_encoding,
179182
_env_ignore_empty=_env_ignore_empty,
180183
_env_nested_delimiter=_env_nested_delimiter,
184+
_env_nested_max_split=_env_nested_max_split,
181185
_env_parse_none_str=_env_parse_none_str,
182186
_env_parse_enums=_env_parse_enums,
183187
_cli_prog_name=_cli_prog_name,
@@ -232,6 +236,7 @@ def _settings_build_values(
232236
_env_file_encoding: str | None = None,
233237
_env_ignore_empty: bool | None = None,
234238
_env_nested_delimiter: str | None = None,
239+
_env_nested_max_split: int | None = None,
235240
_env_parse_none_str: str | None = None,
236241
_env_parse_enums: bool | None = None,
237242
_cli_prog_name: str | None = None,
@@ -270,6 +275,11 @@ def _settings_build_values(
270275
if _env_nested_delimiter is not None
271276
else self.model_config.get('env_nested_delimiter')
272277
)
278+
env_nested_max_split = (
279+
_env_nested_max_split
280+
if _env_nested_max_split is not None
281+
else self.model_config.get('env_nested_max_split')
282+
)
273283
env_parse_none_str = (
274284
_env_parse_none_str if _env_parse_none_str is not None else self.model_config.get('env_parse_none_str')
275285
)
@@ -333,6 +343,7 @@ def _settings_build_values(
333343
case_sensitive=case_sensitive,
334344
env_prefix=env_prefix,
335345
env_nested_delimiter=env_nested_delimiter,
346+
env_nested_max_split=env_nested_max_split,
336347
env_ignore_empty=env_ignore_empty,
337348
env_parse_none_str=env_parse_none_str,
338349
env_parse_enums=env_parse_enums,
@@ -344,6 +355,7 @@ def _settings_build_values(
344355
case_sensitive=case_sensitive,
345356
env_prefix=env_prefix,
346357
env_nested_delimiter=env_nested_delimiter,
358+
env_nested_max_split=env_nested_max_split,
347359
env_ignore_empty=env_ignore_empty,
348360
env_parse_none_str=env_parse_none_str,
349361
env_parse_enums=env_parse_enums,
@@ -412,6 +424,7 @@ def _settings_build_values(
412424
env_file_encoding=None,
413425
env_ignore_empty=False,
414426
env_nested_delimiter=None,
427+
env_nested_max_split=None,
415428
env_parse_none_str=None,
416429
env_parse_enums=None,
417430
cli_prog_name=None,

pydantic_settings/sources.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ def __init__(
735735
case_sensitive: bool | None = None,
736736
env_prefix: str | None = None,
737737
env_nested_delimiter: str | None = None,
738+
env_nested_max_split: int | None = None,
738739
env_ignore_empty: bool | None = None,
739740
env_parse_none_str: str | None = None,
740741
env_parse_enums: bool | None = None,
@@ -745,6 +746,10 @@ def __init__(
745746
self.env_nested_delimiter = (
746747
env_nested_delimiter if env_nested_delimiter is not None else self.config.get('env_nested_delimiter')
747748
)
749+
self.env_nested_max_split = (
750+
env_nested_max_split if env_nested_max_split is not None else self.config.get('env_nested_max_split')
751+
)
752+
self.maxsplit = (self.env_nested_max_split or 0) - 1
748753
self.env_prefix_len = len(self.env_prefix)
749754

750755
self.env_vars = self._load_env_vars()
@@ -910,11 +915,13 @@ def explode_env_vars(self, field_name: str, field: FieldInfo, env_vars: Mapping[
910915
]
911916
result: dict[str, Any] = {}
912917
for env_name, env_val in env_vars.items():
913-
if not any(env_name.startswith(prefix) for prefix in prefixes):
918+
try:
919+
prefix = next(prefix for prefix in prefixes if env_name.startswith(prefix))
920+
except StopIteration:
914921
continue
915922
# we remove the prefix before splitting in case the prefix has characters in common with the delimiter
916-
env_name_without_prefix = env_name[self.env_prefix_len :]
917-
_, *keys, last_key = env_name_without_prefix.split(self.env_nested_delimiter)
923+
env_name_without_prefix = env_name[len(prefix) :]
924+
*keys, last_key = env_name_without_prefix.split(self.env_nested_delimiter, self.maxsplit)
918925
env_var = result
919926
target_field: FieldInfo | None = field
920927
for key in keys:
@@ -964,6 +971,7 @@ def __init__(
964971
case_sensitive: bool | None = None,
965972
env_prefix: str | None = None,
966973
env_nested_delimiter: str | None = None,
974+
env_nested_max_split: int | None = None,
967975
env_ignore_empty: bool | None = None,
968976
env_parse_none_str: str | None = None,
969977
env_parse_enums: bool | None = None,
@@ -977,6 +985,7 @@ def __init__(
977985
case_sensitive,
978986
env_prefix,
979987
env_nested_delimiter,
988+
env_nested_max_split,
980989
env_ignore_empty,
981990
env_parse_none_str,
982991
env_parse_enums,

tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ def docs_test_env():
7979
setenv.set('SUB_MODEL__V3', '3')
8080
setenv.set('SUB_MODEL__DEEP__V4', 'v4')
8181

82+
# envs for parsing environment variable values example with env_nested_max_split=1
83+
setenv.set('GENERATION_LLM_PROVIDER', 'anthropic')
84+
setenv.set('GENERATION_LLM_API_KEY', 'your-api-key')
85+
setenv.set('GENERATION_LLM_API_VERSION', '2024-03-15')
86+
8287
yield setenv
8388

8489
setenv.clear()

tests/test_settings.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pathlib
55
import sys
66
import uuid
7-
from datetime import datetime, timezone
7+
from datetime import date, datetime, timezone
88
from enum import IntEnum
99
from pathlib import Path
1010
from typing import Any, Callable, Dict, Generic, Hashable, List, Optional, Set, Tuple, Type, TypeVar, Union
@@ -398,6 +398,40 @@ class Cfg(BaseSettings):
398398
assert Cfg().model_dump() == {'sub_model': {'v1': '-1-', 'v2': '-2-'}}
399399

400400

401+
@pytest.mark.parametrize('env_prefix', [None, 'prefix_', 'prefix__'])
402+
def test_nested_env_max_split(env, env_prefix):
403+
class Person(BaseModel):
404+
sex: Literal['M', 'F']
405+
first_name: str
406+
date_of_birth: date
407+
408+
class Cfg(BaseSettings):
409+
caregiver: Person
410+
significant_other: Optional[Person] = None
411+
next_of_kin: Optional[Person] = None
412+
413+
model_config = SettingsConfigDict(env_nested_delimiter='_', env_nested_max_split=1)
414+
if env_prefix is not None:
415+
model_config['env_prefix'] = env_prefix
416+
417+
env_prefix = env_prefix or ''
418+
env.set(env_prefix + 'caregiver_sex', 'M')
419+
env.set(env_prefix + 'caregiver_first_name', 'Joe')
420+
env.set(env_prefix + 'caregiver_date_of_birth', '1975-09-12')
421+
env.set(env_prefix + 'significant_other_sex', 'F')
422+
env.set(env_prefix + 'significant_other_first_name', 'Jill')
423+
env.set(env_prefix + 'significant_other_date_of_birth', '1998-04-19')
424+
env.set(env_prefix + 'next_of_kin_sex', 'M')
425+
env.set(env_prefix + 'next_of_kin_first_name', 'Jack')
426+
env.set(env_prefix + 'next_of_kin_date_of_birth', '1999-04-19')
427+
428+
assert Cfg().model_dump() == {
429+
'caregiver': {'sex': 'M', 'first_name': 'Joe', 'date_of_birth': date(1975, 9, 12)},
430+
'significant_other': {'sex': 'F', 'first_name': 'Jill', 'date_of_birth': date(1998, 4, 19)},
431+
'next_of_kin': {'sex': 'M', 'first_name': 'Jack', 'date_of_birth': date(1999, 4, 19)},
432+
}
433+
434+
401435
class DateModel(BaseModel):
402436
pips: bool = False
403437

0 commit comments

Comments
 (0)