diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 77f70226..b1512306 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,57 +1,53 @@ repos: - - repo: https://github.com/PyCQA/isort - rev: 5.8.0 - hooks: - - id: isort - language_version: python3.8 - - - repo: https://github.com/psf/black - rev: 20.8b1 - hooks: - - id: black - args: ['--safe'] - language_version: python3.8 - - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.0 - hooks: - - id: flake8 - language_version: python3.8 - args: [ - # E501 let black handle all line length decisions - # W503 black conflicts with "line break before operator" rule - # E203 black conflicts with "whitespace before ':'" rule - '--ignore=E501,W503,E203,C901'] - - - repo: https://github.com/chewse/pre-commit-mirrors-pydocstyle - # 2.1.1 - rev: v2.1.1 - hooks: - - id: pydocstyle - language_version: python3.8 - exclude: '.*(test|alembic|scripts).*' - args: [ - # Check for docstring presence only - '--select=D1', + - repo: https://github.com/PyCQA/isort + rev: 5.8.0 + hooks: + - id: isort + language_version: python3.8 + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black + args: [ '--safe' ] + language_version: python3.8 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.0 + hooks: + - id: flake8 + language_version: python3.8 + args: [ + # E501 let black handle all line length decisions + # W503 black conflicts with "line break before operator" rule + # E203 black conflicts with "whitespace before ':'" rule + '--ignore=E501,W503,E203,C901' ] + - repo: https://github.com/chewse/pre-commit-mirrors-pydocstyle + # 2.1.1 + rev: v2.1.1 + hooks: + - id: pydocstyle + language_version: python3.8 + exclude: '.*(test|alembic|scripts).*' + args: [ + # Check for docstring presence only + '--select=D1', - ] - # Don't require docstrings for tests - # '--match=(?!test).*\.py'] -# - -# repo: https://github.com/pre-commit/mirrors-mypy -# rev: v0.770 -# hooks: -# - id: mypy -# language_version: python3.8 -# args: [--no-strict-optional, --ignore-missing-imports] - - - repo: https://github.com/PyCQA/pydocstyle - rev: 6.0.0 - hooks: - - id: pydocstyle - language_version: python3.8 - exclude: '.*(test|alembic|scripts).*' - #args: [ - # Don't require docstrings for tests - #'--match=(?!test|alembic|scripts).*\.py', - #] \ No newline at end of file + ] + # Don't require docstrings for tests + # '--match=(?!test).*\.py'] + # - + # repo: https://github.com/pre-commit/mirrors-mypy + # rev: v0.770 + # hooks: + # - id: mypy + # language_version: python3.8 + # args: [--no-strict-optional, --ignore-missing-imports] + - repo: https://github.com/PyCQA/pydocstyle + rev: 6.0.0 + hooks: + - id: pydocstyle + language_version: python3.8 + exclude: '.*(test|alembic|scripts).*' + #args: [ + # Don't require docstrings for tests + #'--match=(?!test|alembic|scripts).*\.py', + #] \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index c43f980f..f03ba0e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,10 @@ FROM python:3.8-slim as base FROM base as builder # Any python libraries that require system libraries to be installed will likely # need the following packages in order to build -RUN apt-get update && apt-get install -y build-essential git +RUN apt-get update && \ + apt-get install -y build-essential git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt @@ -11,10 +14,9 @@ ARG install_dev_dependencies=true WORKDIR /app -# Install stac_fastapi.types COPY . /app ENV PATH=$PATH:/install/bin -RUN mkdir -p /install && \ - pip install -e ./stac_fastapi/elasticsearch[dev,server] +RUN mkdir -p /install +RUN pip install --no-cache-dir -e ./stac_fastapi/elasticsearch[dev,server] diff --git a/README.md b/README.md index fa248c89..457de3ae 100644 --- a/README.md +++ b/README.md @@ -10,30 +10,51 @@ Install [pre-commit](https://pre-commit.com/#install). Prior to commit, run: -``` +```shell pre-commit run --all-files` ``` +```shell +cd stac_fastapi/elasticsearch +pip install .[dev] +``` + ## Building -``` +```shell docker-compose build ``` ## Running API on localhost:8083 -``` +```shell docker-compose up ``` -## Testing +To create a new Collection: +```shell +curl -X "POST" "http://localhost:8083/collections" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "id": "my_collection" +}' ``` + +Note: this "Collections Transaction" behavior is not part of the STAC API, but may be soon. + +## Testing + +```shell make test ``` ## Ingest sample data -``` +```shell make ingest ``` + +## Elasticsearch Mappings + +Mappings apply to search index, not source. \ No newline at end of file diff --git a/stac_fastapi/elasticsearch/setup.py b/stac_fastapi/elasticsearch/setup.py index e78bad36..3088351a 100644 --- a/stac_fastapi/elasticsearch/setup.py +++ b/stac_fastapi/elasticsearch/setup.py @@ -27,6 +27,7 @@ "pre-commit", "requests", "ciso8601", + "overrides", ], "docs": ["mkdocs", "mkdocs-material", "pdocs"], "server": ["uvicorn[standard]>=0.12.0,<0.14.0"], diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py index 8c3468fe..5db2007e 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py @@ -227,7 +227,8 @@ def get_search( return resp - def bbox2poly(self, b0, b1, b2, b3): + @staticmethod + def bbox2poly(b0, b1, b2, b3): """Transform bbox to polygon.""" poly = [[[b0, b1], [b2, b1], [b2, b3], [b0, b3], [b0, b1]]] return poly diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/serializers.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/serializers.py index a8c19413..f196ff7e 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/serializers.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/serializers.py @@ -24,7 +24,7 @@ class ItemSerializer(Serializer): """Serialization methods for STAC items.""" @classmethod - def stac_to_db(cls, stac_data: TypedDict, base_url: str) -> stac_types.Item: + def stac_to_db(cls, stac_data: stac_types.Item, base_url: str) -> stac_types.Item: """Transform STAC Item to database-ready STAC Item.""" item_links = ItemLinks( collection_id=stac_data["collection"], diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/transactions.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/transactions.py index 1746b8f9..52ed2e00 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/transactions.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/transactions.py @@ -2,10 +2,12 @@ import logging from datetime import datetime, timezone +from typing import Optional import attr import elasticsearch from elasticsearch import helpers +from overrides import overrides from stac_fastapi.elasticsearch.config import ElasticsearchSettings from stac_fastapi.elasticsearch.serializers import CollectionSerializer, ItemSerializer @@ -30,37 +32,37 @@ class TransactionsClient(BaseTransactionsClient): settings = ElasticsearchSettings() client = settings.create_client - def _create_item_index(self): - mapping = { - "mappings": { - "properties": { - "geometry": {"type": "geo_shape"}, - "id": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, - "properties__datetime": { - "type": "text", - "fields": {"keyword": {"type": "keyword"}}, - }, - } - } + mappings = { + "properties": { + "geometry": {"type": "geo_shape"}, + "id": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, + "properties__datetime": { + "type": "text", + "fields": {"keyword": {"type": "keyword"}}, + }, } + } - _ = self.client.indices.create( + def create_item_index(self): + """Create the index for Items.""" + self.client.indices.create( index="stac_items", - body=mapping, + body={"mappings": self.mappings}, ignore=400, # ignore 400 already exists code ) - def create_item(self, model: stac_types.Item, **kwargs): + @overrides + def create_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item: """Create item.""" base_url = str(kwargs["request"].base_url) - self._create_item_index() + self.create_item_index() # If a feature collection is posted - if model["type"] == "FeatureCollection": + if item["type"] == "FeatureCollection": bulk_client = BulkTransactionsClient() processed_items = [ bulk_client._preprocess_item(item, base_url) - for item in model["features"] + for item in item["features"] ] return_msg = f"Successfully added {len(processed_items)} items." bulk_client.bulk_sync(processed_items) @@ -68,69 +70,44 @@ def create_item(self, model: stac_types.Item, **kwargs): return return_msg # If a single item is posted - if not self.client.exists(index="stac_collections", id=model["collection"]): - raise ForeignKeyError(f"Collection {model['collection']} does not exist") + if not self.client.exists(index="stac_collections", id=item["collection"]): + raise ForeignKeyError(f"Collection {item['collection']} does not exist") - if self.client.exists(index="stac_items", id=model["id"]): + if self.client.exists(index="stac_items", id=item["id"]): raise ConflictError( - f"Item {model['id']} in collection {model['collection']} already exists" + f"Item {item['id']} in collection {item['collection']} already exists" ) - data = ItemSerializer.stac_to_db(model, base_url) - - self.client.index( - index="stac_items", doc_type="_doc", id=model["id"], document=data - ) - return ItemSerializer.db_to_stac(model, base_url) - - def create_collection(self, model: stac_types.Collection, **kwargs): - """Create collection.""" - base_url = str(kwargs["request"].base_url) - collection_links = CollectionLinks( - collection_id=model["id"], base_url=base_url - ).create_links() - model["links"] = collection_links - - self._create_item_index() + data = ItemSerializer.stac_to_db(item, base_url) - if self.client.exists(index="stac_collections", id=model["id"]): - raise ConflictError(f"Collection {model['id']} already exists") self.client.index( - index="stac_collections", doc_type="_doc", id=model["id"], document=model + index="stac_items", doc_type="_doc", id=item["id"], document=data ) - return CollectionSerializer.db_to_stac(model, base_url) + return ItemSerializer.db_to_stac(item, base_url) - def update_item(self, model: stac_types.Item, **kwargs): + @overrides + def update_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item: """Update item.""" base_url = str(kwargs["request"].base_url) now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") - model["properties"]["updated"] = str(now) + item["properties"]["updated"] = str(now) - if not self.client.exists(index="stac_collections", id=model["collection"]): - raise ForeignKeyError(f"Collection {model['collection']} does not exist") - if not self.client.exists(index="stac_items", id=model["id"]): + if not self.client.exists(index="stac_collections", id=item["collection"]): + raise ForeignKeyError(f"Collection {item['collection']} does not exist") + if not self.client.exists(index="stac_items", id=item["id"]): raise NotFoundError( - f"Item {model['id']} in collection {model['collection']} doesn't exist" + f"Item {item['id']} in collection {item['collection']} doesn't exist" ) - self.delete_item(model["id"], model["collection"]) - self.create_item(model, **kwargs) + self.delete_item(item["id"], item["collection"]) + self.create_item(item, **kwargs) # self.client.update(index="stac_items",doc_type='_doc',id=model["id"], # body=model) - return ItemSerializer.db_to_stac(model, base_url) - - def update_collection(self, model: stac_types.Collection, **kwargs): - """Update collection.""" - base_url = str(kwargs["request"].base_url) - try: - _ = self.client.get(index="stac_collections", id=model["id"]) - except elasticsearch.exceptions.NotFoundError: - raise NotFoundError(f"Collection {model['id']} not found") - self.delete_collection(model["id"]) - self.create_collection(model, **kwargs) - - return CollectionSerializer.db_to_stac(model, base_url) + return ItemSerializer.db_to_stac(item, base_url) - def delete_item(self, item_id: str, collection_id: str, **kwargs): + @overrides + def delete_item( + self, item_id: str, collection_id: str, **kwargs + ) -> stac_types.Item: """Delete item.""" try: _ = self.client.get(index="stac_items", id=item_id) @@ -138,7 +115,46 @@ def delete_item(self, item_id: str, collection_id: str, **kwargs): raise NotFoundError(f"Item {item_id} not found") self.client.delete(index="stac_items", doc_type="_doc", id=item_id) - def delete_collection(self, collection_id: str, **kwargs): + @overrides + def create_collection( + self, collection: stac_types.Collection, **kwargs + ) -> stac_types.Collection: + """Create collection.""" + base_url = str(kwargs["request"].base_url) + collection_links = CollectionLinks( + collection_id=collection["id"], base_url=base_url + ).create_links() + collection["links"] = collection_links + + self.create_item_index() + + if self.client.exists(index="stac_collections", id=collection["id"]): + raise ConflictError(f"Collection {collection['id']} already exists") + self.client.index( + index="stac_collections", + doc_type="_doc", + id=collection["id"], + document=collection, + ) + return CollectionSerializer.db_to_stac(collection, base_url) + + @overrides + def update_collection( + self, collection: stac_types.Collection, **kwargs + ) -> stac_types.Collection: + """Update collection.""" + base_url = str(kwargs["request"].base_url) + try: + _ = self.client.get(index="stac_collections", id=collection["id"]) + except elasticsearch.exceptions.NotFoundError: + raise NotFoundError(f"Collection {collection['id']} not found") + self.delete_collection(collection["id"]) + self.create_collection(collection, **kwargs) + + return CollectionSerializer.db_to_stac(collection, base_url) + + @overrides + def delete_collection(self, collection_id: str, **kwargs) -> stac_types.Collection: """Delete collection.""" try: _ = self.client.get(index="stac_collections", id=collection_id) @@ -178,10 +194,13 @@ def bulk_sync(self, processed_items): ] helpers.bulk(self.client, actions) - def bulk_item_insert(self, items: Items, **kwargs) -> str: + @overrides + def bulk_item_insert( + self, items: Items, chunk_size: Optional[int] = None, **kwargs + ) -> str: """Bulk item insertion using es.""" transactions_client = TransactionsClient() - transactions_client._create_item_index() + transactions_client.create_item_index() try: base_url = str(kwargs["request"].base_url) except Exception: