Skip to content

Commit b1d59f7

Browse files
autogenerate field classes from schema
1 parent b875a0b commit b1d59f7

File tree

2 files changed

+557
-0
lines changed

2 files changed

+557
-0
lines changed

utils/dsl-generator.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
trim_blocks=True,
3232
lstrip_blocks=True,
3333
)
34+
field_py = jinja_env.get_template("field.py.tpl")
3435
query_py = jinja_env.get_template("query.py.tpl")
3536
aggs_py = jinja_env.get_template("aggs.py.tpl")
3637
response_init_py = jinja_env.get_template("response.__init__.py.tpl")
@@ -738,6 +739,35 @@ def interface_to_python_class(
738739
}
739740
)
740741
k["buckets_as_dict"] = generic_type
742+
elif namespace == '_types.mapping':
743+
if arg['name'] in ['fields', 'properties']:
744+
# Python DSL provides a high level representation for the
745+
# "fields" and 'properties' properties that many types support
746+
k['args'].append({
747+
'name': arg['name'],
748+
"type": 'Union[Mapping[str, Field], "DefaultType"]',
749+
"doc": [f":arg {arg['name']}:"],
750+
"required": False,
751+
})
752+
if 'params' not in k:
753+
k['params'] = []
754+
k['params'].append({'name': arg['name'], 'param': {'type': 'field', 'hash': True}})
755+
756+
else:
757+
# also the Python DSL provides implementations of analyzers
758+
# and normalizers, so here we make sure these are noted as
759+
# params.
760+
self.add_attribute(
761+
k, arg, for_types_py=for_types_py, for_response=for_response
762+
)
763+
if arg['name'].endswith('analyzer'):
764+
if 'params' not in k:
765+
k['params'] = []
766+
k['params'].append({'name': arg['name'], 'param': {'type': 'analyzer'}})
767+
elif arg['name'].endswith('normalizer'):
768+
if 'params' not in k:
769+
k['params'] = []
770+
k['params'].append({'name': arg['name'], 'param': {'type': 'normalizer'}})
741771
else:
742772
if interface == "Hit" and arg["name"].startswith("_"):
743773
# Python DSL removes the undersore prefix from all the
@@ -766,6 +796,77 @@ def interface_to_python_class(
766796
return k
767797

768798

799+
def generate_field_py(schema, filename):
800+
"""Generate field.py with all the Elasticsearch fields as Python classes.
801+
"""
802+
float_fields = ['half_float', 'scaled_float', 'double', 'rank_feature']
803+
integer_fields = ['byte', 'short', 'long']
804+
range_fields = ['integer_range', 'float_range', 'long_range', 'double_range', 'date_range']
805+
object_fields = ['nested']
806+
coerced_fields = ['boolean', 'date', 'float', 'object', 'dense_vector', 'integer', 'ip', 'binary', 'percolator']
807+
808+
classes = []
809+
property = schema.find_type("Property", "_types.mapping")
810+
for type_ in property["type"]["items"]:
811+
if type_["type"]["name"] == "DynamicProperty":
812+
# no support for dynamic properties
813+
continue
814+
field = schema.find_type(type_["type"]["name"], type_["type"]["namespace"])
815+
name = class_name = ''
816+
for prop in field["properties"]:
817+
if prop["name"] == "type":
818+
if prop["type"]["kind"] != "literal_value":
819+
raise RuntimeError(f"Unexpected property type {prop}")
820+
name = prop["type"]["value"]
821+
class_name = "".join([n.title() for n in name.split("_")])
822+
k = schema.interface_to_python_class(
823+
type_["type"]["name"], type_["type"]["namespace"], for_types_py=False, for_response=False
824+
)
825+
k['name'] = class_name
826+
k['field'] = name
827+
k['coerced'] = name in coerced_fields
828+
if name in float_fields:
829+
k['parent'] = 'Float'
830+
elif name in integer_fields:
831+
k['parent'] = 'Integer'
832+
elif name in range_fields:
833+
k['parent'] = 'RangeField'
834+
elif name in object_fields:
835+
k['parent'] = 'Object'
836+
else:
837+
k['parent'] = 'Field'
838+
k['args'] = [prop for prop in k['args'] if prop['name'] != 'type']
839+
if name == 'object':
840+
# the DSL's object field has a doc_class argument
841+
k['args'] = [
842+
{
843+
"name": "doc_class",
844+
"type": "Union[Type[\"InnerDoc\"], \"DefaultType\"]",
845+
"doc": [":arg doc_class: base doc class that handles mapping.",
846+
" If no `doc_class` is provided, new instance of `InnerDoc` will be created,",
847+
" populated with `properties` and used. Can not be provided together with `properties`"],
848+
"required": False,
849+
}
850+
] + k['args']
851+
elif name == "date":
852+
k['args'] = [
853+
{
854+
"name": "default_timezone",
855+
"type": "Union[str, \"tzinfo\", \"DefaultType\"]",
856+
"doc": [":arg default_timezone: timezone that will be automatically used for tz-naive values",
857+
" May be instance of `datetime.tzinfo` or string containing TZ offset"],
858+
"required": False,
859+
}
860+
] + k['args']
861+
classes.append(k)
862+
# make sure parent classes appear first
863+
classes = sorted(classes, key=lambda k: f'AA{k["name"]}' if k['name'] in ['Float', 'Integer', 'Object'] else k['name'])
864+
865+
with open(filename, "wt") as f:
866+
f.write(field_py.render(classes=classes))
867+
print(f"Generated {filename}.")
868+
869+
769870
def generate_query_py(schema, filename):
770871
"""Generate query.py with all the properties of `QueryContainer` as Python
771872
classes.
@@ -849,6 +950,7 @@ def generate_types_py(schema, filename):
849950

850951
if __name__ == "__main__":
851952
schema = ElasticsearchSchema()
953+
generate_field_py(schema, "elasticsearch/dsl/field.py")
852954
generate_query_py(schema, "elasticsearch/dsl/query.py")
853955
generate_aggs_py(schema, "elasticsearch/dsl/aggs.py")
854956
generate_response_init_py(schema, "elasticsearch/dsl/response/__init__.py")

0 commit comments

Comments
 (0)