|
| 1 | +# Licensed to Elasticsearch B.V. under one or more contributor |
| 2 | +# license agreements. See the NOTICE file distributed with |
| 3 | +# this work for additional information regarding copyright |
| 4 | +# ownership. Elasticsearch B.V. licenses this file to you under |
| 5 | +# the Apache License, Version 2.0 (the "License"); you may |
| 6 | +# not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | + |
| 18 | +""" |
| 19 | +Simple example with a single Document demonstrating how schema can be managed, |
| 20 | +including upgrading with reindexing. |
| 21 | +
|
| 22 | +Key concepts: |
| 23 | +
|
| 24 | + * setup() function to first initialize the schema (as index template) in |
| 25 | + elasticsearch. Can be called any time (recommended with every deploy of |
| 26 | + your app). |
| 27 | +
|
| 28 | + * migrate() function to be called any time when the schema changes - it |
| 29 | + will create a new index (by incrementing the version) and update the alias. |
| 30 | + By default it will also (before flipping the alias) move the data from the |
| 31 | + previous index to the new one. |
| 32 | +
|
| 33 | + * BlogPost._matches() class method is required for this code to work since |
| 34 | + otherwise BlogPost will not be used to deserialize the documents as those |
| 35 | + will have index set to the concrete index whereas the class refers to the |
| 36 | + alias. |
| 37 | +""" |
| 38 | +import asyncio |
| 39 | +from datetime import datetime |
| 40 | +from fnmatch import fnmatch |
| 41 | + |
| 42 | +from elasticsearch_dsl import AsyncDocument, Date, Keyword, Text, async_connections |
| 43 | + |
| 44 | +ALIAS = "test-blog" |
| 45 | +PATTERN = ALIAS + "-*" |
| 46 | + |
| 47 | + |
| 48 | +class BlogPost(AsyncDocument): |
| 49 | + title = Text() |
| 50 | + published = Date() |
| 51 | + tags = Keyword(multi=True) |
| 52 | + content = Text() |
| 53 | + |
| 54 | + def is_published(self): |
| 55 | + return self.published and datetime.now() > self.published |
| 56 | + |
| 57 | + @classmethod |
| 58 | + def _matches(cls, hit): |
| 59 | + # override _matches to match indices in a pattern instead of just ALIAS |
| 60 | + # hit is the raw dict as returned by elasticsearch |
| 61 | + return fnmatch(hit["_index"], PATTERN) |
| 62 | + |
| 63 | + class Index: |
| 64 | + # we will use an alias instead of the index |
| 65 | + name = ALIAS |
| 66 | + # set settings and possibly other attributes of the index like |
| 67 | + # analyzers |
| 68 | + settings = {"number_of_shards": 1, "number_of_replicas": 0} |
| 69 | + |
| 70 | + |
| 71 | +async def setup(): |
| 72 | + """ |
| 73 | + Create the index template in elasticsearch specifying the mappings and any |
| 74 | + settings to be used. This can be run at any time, ideally at every new code |
| 75 | + deploy. |
| 76 | + """ |
| 77 | + # create an index template |
| 78 | + index_template = BlogPost._index.as_template(ALIAS, PATTERN) |
| 79 | + # upload the template into elasticsearch |
| 80 | + # potentially overriding the one already there |
| 81 | + await index_template.save() |
| 82 | + |
| 83 | + # create the first index if it doesn't exist |
| 84 | + if not await BlogPost._index.exists(): |
| 85 | + await migrate(move_data=False) |
| 86 | + |
| 87 | + |
| 88 | +async def migrate(move_data=True, update_alias=True): |
| 89 | + """ |
| 90 | + Upgrade function that creates a new index for the data. Optionally it also can |
| 91 | + (and by default will) reindex previous copy of the data into the new index |
| 92 | + (specify ``move_data=False`` to skip this step) and update the alias to |
| 93 | + point to the latest index (set ``update_alias=False`` to skip). |
| 94 | +
|
| 95 | + Note that while this function is running the application can still perform |
| 96 | + any and all searches without any loss of functionality. It should, however, |
| 97 | + not perform any writes at this time as those might be lost. |
| 98 | + """ |
| 99 | + # construct a new index name by appending current timestamp |
| 100 | + next_index = PATTERN.replace("*", datetime.now().strftime("%Y%m%d%H%M%S%f")) |
| 101 | + |
| 102 | + # get the low level connection |
| 103 | + es = async_connections.get_connection() |
| 104 | + |
| 105 | + # create new index, it will use the settings from the template |
| 106 | + await es.indices.create(index=next_index) |
| 107 | + |
| 108 | + if move_data: |
| 109 | + # move data from current alias to the new index |
| 110 | + await es.options(request_timeout=3600).reindex( |
| 111 | + body={"source": {"index": ALIAS}, "dest": {"index": next_index}} |
| 112 | + ) |
| 113 | + # refresh the index to make the changes visible |
| 114 | + await es.indices.refresh(index=next_index) |
| 115 | + |
| 116 | + if update_alias: |
| 117 | + # repoint the alias to point to the newly created index |
| 118 | + await es.indices.update_aliases( |
| 119 | + body={ |
| 120 | + "actions": [ |
| 121 | + {"remove": {"alias": ALIAS, "index": PATTERN}}, |
| 122 | + {"add": {"alias": ALIAS, "index": next_index}}, |
| 123 | + ] |
| 124 | + } |
| 125 | + ) |
| 126 | + |
| 127 | + |
| 128 | +async def main(): |
| 129 | + # initiate the default connection to elasticsearch |
| 130 | + async_connections.create_connection(hosts=["http://localhost:9200"]) |
| 131 | + |
| 132 | + # create the empty index |
| 133 | + await setup() |
| 134 | + |
| 135 | + # create a new document |
| 136 | + bp = BlogPost( |
| 137 | + _id=0, |
| 138 | + title="Hello World!", |
| 139 | + tags=["testing", "dummy"], |
| 140 | + content=open(__file__).read(), |
| 141 | + ) |
| 142 | + await bp.save(refresh=True) |
| 143 | + |
| 144 | + # create new index |
| 145 | + await migrate() |
| 146 | + |
| 147 | + # close the connection |
| 148 | + await async_connections.get_connection().close() |
| 149 | + |
| 150 | + |
| 151 | +if __name__ == "__main__": |
| 152 | + asyncio.run(main()) |
0 commit comments