Skip to content

PYTHON-2371 Add Azure and GCP support for CSFLE #506

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .evergreen/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -357,9 +357,14 @@ functions:
working_dir: "src"
script: |
if [ -n "${test_encryption}" ]; then
cat <<EOT > fle_aws_creds.sh
cat <<EOT > fle_creds.sh
export FLE_AWS_KEY="${fle_aws_key}"
export FLE_AWS_SECRET="${fle_aws_secret}"
export FLE_AZURE_CLIENTID="${fle_azure_clientid}"
export FLE_AZURE_TENANTID="${fle_azure_tenantid}"
export FLE_AZURE_CLIENTSECRET="${fle_azure_clientsecret}"
export FLE_GCP_EMAIL="${fle_gcp_email}"
export FLE_GCP_PRIVATEKEY="${fle_gcp_privatekey}"
EOT
fi
- command: shell.exec
Expand All @@ -381,8 +386,8 @@ functions:
if [ -n "${test_encryption}" ]; then
# Disable xtrace (just in case it was accidentally set).
set +x
. ./fle_aws_creds.sh
rm -f ./fle_aws_creds.sh
. ./fle_creds.sh
rm -f ./fle_creds.sh
export LIBMONGOCRYPT_URL="${libmongocrypt_url}"
export TEST_ENCRYPTION=1
fi
Expand Down
44 changes: 39 additions & 5 deletions pymongo/encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,21 @@ def __init__(self, kms_providers, key_vault_namespace, key_vault_client,
- `aws`: Map with "accessKeyId" and "secretAccessKey" as strings.
These are the AWS access key ID and AWS secret access key used
to generate KMS messages.
- `local`: Map with "key" as a 96-byte array or string. "key"
is the master key used to encrypt/decrypt data keys. This key
should be generated and stored as securely as possible.
- `azure`: Map with "tenantId", "clientId", and "clientSecret" as
strings. Additionally, "identityPlatformEndpoint" may also be
specified as a string (defaults to 'login.microsoftonline.com').
These are the Azure Active Directory credentials used to
generate Azure Key Vault messages.
- `gcp`: Map with "email" as a string and "privateKey"
as `bytes` or a base64 encoded string (unicode on Python 2).
Additionally, "endpoint" may also be specified as a string
(defaults to 'oauth2.googleapis.com'). These are the
credentials used to generate Google Cloud KMS messages.
- `local`: Map with "key" as `bytes` (96 bytes in length) or
a base64 encoded string (unicode on Python 2) which decodes
to 96 bytes. "key" is the master key used to encrypt/decrypt
data keys. This key should be generated and stored as securely
as possible.

- `key_vault_namespace`: The namespace for the key vault collection.
The key vault collection contains all data keys used for encryption
Expand Down Expand Up @@ -409,8 +421,10 @@ def create_data_key(self, kms_provider, master_key=None,
"aws" and "local".
- `master_key`: Identifies a KMS-specific key used to encrypt the
new data key. If the kmsProvider is "local" the `master_key` is
not applicable and may be omitted. If the `kms_provider` is "aws"
it is required and has the following fields::
not applicable and may be omitted.

If the `kms_provider` is "aws" it is required and has the
following fields::

- `region` (string): Required. The AWS region, e.g. "us-east-1".
- `key` (string): Required. The Amazon Resource Name (ARN) to
Expand All @@ -419,6 +433,26 @@ def create_data_key(self, kms_provider, master_key=None,
requests to. May include port number, e.g.
"kms.us-east-1.amazonaws.com:443".

If the `kms_provider` is "azure" it is required and has the
following fields::

- `keyVaultEndpoint` (string): Required. Host with optional
port, e.g. "example.vault.azure.net".
- `keyName` (string): Required. Key name in the key vault.
- `keyVersion` (string): Optional. Version of the key to use.

If the `kms_provider` is "gcp" it is required and has the
following fields::

- `projectId` (string): Required. The Google cloud project ID.
- `location` (string): Required. The GCP location, e.g. "us-east1".
- `keyRing` (string): Required. Name of the key ring that contains
the key to use.
- `keyName` (string): Required. Name of the key to use.
- `keyVersion` (string): Optional. Version of the key to use.
- `endpoint` (string): Optional. Host with optional port.
Defaults to "cloudkms.googleapis.com".

- `key_alt_names` (optional): An optional list of string alternate
names used to reference a key. If a key is created with alternate
names, then encryption may refer to the key by the unique alternate
Expand Down
18 changes: 15 additions & 3 deletions pymongo/encryption_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,21 @@ def __init__(self, kms_providers, key_vault_namespace,
- `aws`: Map with "accessKeyId" and "secretAccessKey" as strings.
These are the AWS access key ID and AWS secret access key used
to generate KMS messages.
- `local`: Map with "key" as a 96-byte array or string. "key"
is the master key used to encrypt/decrypt data keys. This key
should be generated and stored as securely as possible.
- `azure`: Map with "tenantId", "clientId", and "clientSecret" as
strings. Additionally, "identityPlatformEndpoint" may also be
specified as a string (defaults to 'login.microsoftonline.com').
These are the Azure Active Directory credentials used to
generate Azure Key Vault messages.
- `gcp`: Map with "email" as a string and "privateKey"
as `bytes` or a base64 encoded string (unicode on Python 2).
Additionally, "endpoint" may also be specified as a string
(defaults to 'oauth2.googleapis.com'). These are the
credentials used to generate Google Cloud KMS messages.
- `local`: Map with "key" as `bytes` (96 bytes in length) or
a base64 encoded string (unicode on Python 2) which decodes
to 96 bytes. "key" is the master key used to encrypt/decrypt
data keys. This key should be generated and stored as securely
as possible.

- `key_vault_namespace`: The namespace for the key vault collection.
The key vault collection contains all data keys used for encryption
Expand Down
33 changes: 33 additions & 0 deletions test/client-side-encryption/custom/azure-dek.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"_id": {
"$binary": {
"base64": "As3URE1jRcyHOPjaLWHOXA==",
"subType": "04"
}
},
"keyMaterial": {
"$binary": {
"base64": "df6fFLZqBsZSnQz2SnTYWNBtznIHktVSDMaidAdL7yVVgxBJQ0DyPZUR2HDQB4hdYym3w4C+VGqzcyTZNJOXn6nJzpGrGlIQMcjv93HE4sP2d245ShQCi1nTkLmMaXN63E2fzltOY3jW7ojf5Z4+r8kxmzyfymmSRgo0w8AF7lUWvFhnBYoE4tE322L31vtAK3Zj8pTPvw8/TcUdMSI9Y669IIzxbMy5yMPmdzpnb8nceUv6/CJoeiLhbt5GgaHqIAv7tHFOY8ZX8ztowMLa3GeAjd9clvzraDTqrfMFYco/kDKAW5iPQQ+Xuy1fP8tyFp0ZwaL/7Ed2sc819j8FTQ==",
"subType": "00"
}
},
"creationDate": {
"$date": {
"$numberLong": "1601573901680"
}
},
"updateDate": {
"$date": {
"$numberLong": "1601573901680"
}
},
"status": {
"$numberInt": "0"
},
"masterKey": {
"provider": "azure",
"keyVaultEndpoint": "key-vault-kevinalbs.vault.azure.net",
"keyName": "test-key"
}
}

32 changes: 32 additions & 0 deletions test/client-side-encryption/custom/azure-gcp-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"db.coll": {
"bsonType": "object",
"properties": {
"secret_azure": {
"encrypt": {
"keyId": [{
"$binary": {
"base64": "As3URE1jRcyHOPjaLWHOXA==",
"subType": "04"
}
}],
"algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic",
"bsonType": "string"
}
},
"secret_gcp": {
"encrypt": {
"keyId": [{
"$binary": {
"base64": "osU8SLxJRHONbl8Oh5o+eg==",
"subType": "04"
}
}],
"algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic",
"bsonType": "string"
}
}
}
}
}

35 changes: 35 additions & 0 deletions test/client-side-encryption/custom/gcp-dek.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"_id": {
"$binary": {
"base64": "osU8SLxJRHONbl8Oh5o+eg==",
"subType": "04"
}
},
"keyMaterial": {
"$binary": {
"base64": "CiQAg4LDql74hjYPZ957Z7YpCrD6yTVVXKegflJDstQ/xngTyx0SiQEAkWNo/fjPj6jMNSvEop07/29Fu72QHFDRYM3e/KFHfnMQjKzfxb1yX1dC6MbO5FZG/UNBkXlJgPqbHNVuizea3QC24kV5iOiEb4nTM7+RW+8TfVb6QerWWe6MjC+kNpj4LMVcc1lFfVDeGgpJLyMLNGitrjR16qH8qQTNbGNy0toTL69JUmgS8Q==",
"subType": "00"
}
},
"creationDate": {
"$date": {
"$numberLong": "1601574333107"
}
},
"updateDate": {
"$date": {
"$numberLong": "1601574333107"
}
},
"status": {
"$numberInt": "0"
},
"masterKey": {
"provider": "gcp",
"projectId": "csfle-poc",
"location": "global",
"keyRing": "test",
"keyName": "quickstart"
}
}

130 changes: 130 additions & 0 deletions test/test_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import traceback
import socket
import sys
import textwrap
import uuid

sys.path[0:0] = [""]
Expand All @@ -30,6 +31,7 @@
STANDARD,
UUID_SUBTYPE)
from bson.codec_options import CodecOptions
from bson.py3compat import _unicode
from bson.errors import BSONError
from bson.json_util import JSONOptions
from bson.son import SON
Expand All @@ -52,6 +54,7 @@
from test.utils import (TestCreator,
camel_to_snake_args,
OvertCommandListener,
WhiteListEventListener,
rs_or_single_client,
wait_until)
from test.utils_spec_runner import SpecRunner
Expand Down Expand Up @@ -1105,5 +1108,132 @@ def test_05_endpoint_invalid_host(self):
'aws', master_key=master_key)


class AzureGCPEncryptionTestMixin(object):
DEK = None
KMS_PROVIDER_MAP = None
KEYVAULT_DB = 'keyvault'
KEYVAULT_COLL = 'datakeys'

def setUp(self):
keyvault = self.client.get_database(
self.KEYVAULT_DB).get_collection(
self.KEYVAULT_COLL)
create_key_vault(keyvault, self.DEK)

def _test_explicit(self, expectation):
client_encryption = ClientEncryption(
self.KMS_PROVIDER_MAP,
'.'.join([self.KEYVAULT_DB, self.KEYVAULT_COLL]),
client_context.client,
OPTS)
self.addCleanup(client_encryption.close)

ciphertext = client_encryption.encrypt(
'test',
algorithm=Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
key_id=Binary.from_uuid(self.DEK['_id'], STANDARD))

self.assertEqual(bytes(ciphertext), base64.b64decode(expectation))
self.assertEqual(client_encryption.decrypt(ciphertext), 'test')

def _test_automatic(self, expectation_extjson, payload):
encrypted_db = "db"
encrypted_coll = "coll"
keyvault_namespace = '.'.join([self.KEYVAULT_DB, self.KEYVAULT_COLL])

encryption_opts = AutoEncryptionOpts(
self.KMS_PROVIDER_MAP,
keyvault_namespace,
schema_map=self.SCHEMA_MAP)

insert_listener = WhiteListEventListener('insert')
client = rs_or_single_client(
auto_encryption_opts=encryption_opts,
event_listeners=[insert_listener])
self.addCleanup(client.close)

coll = client.get_database(encrypted_db).get_collection(
encrypted_coll, codec_options=OPTS,
write_concern=WriteConcern("majority"))
coll.drop()

expected_document = json_util.loads(
expectation_extjson, json_options=JSON_OPTS)

coll.insert_one(payload)
event = insert_listener.results['started'][0]
inserted_doc = event.command['documents'][0]

for key, value in expected_document.items():
self.assertEqual(value, inserted_doc[key])

output_doc = coll.find_one({})
for key, value in payload.items():
self.assertEqual(output_doc[key], value)


AZURE_CREDS = {
'tenantId': os.environ.get('FLE_AZURE_TENANTID', ''),
'clientId': os.environ.get('FLE_AZURE_CLIENTID', ''),
'clientSecret': os.environ.get('FLE_AZURE_CLIENTSECRET', '')}


class TestAzureEncryption(AzureGCPEncryptionTestMixin,
EncryptionIntegrationTest):
@classmethod
@unittest.skipUnless(any(AZURE_CREDS.values()),
'Azure environment credentials are not set')
def setUpClass(cls):
cls.KMS_PROVIDER_MAP = {'azure': AZURE_CREDS}
cls.DEK = json_data(BASE, 'custom', 'azure-dek.json')
cls.SCHEMA_MAP = json_data(BASE, 'custom', 'azure-gcp-schema.json')
super(TestAzureEncryption, cls).setUpClass()

def test_explicit(self):
return self._test_explicit(
'AQLN1ERNY0XMhzj42i1hzlwC8/OSU9bHfaQRmmRF5l7d5ZpqJX13qF5zSyExo8N9c1b6uS/LoKrHNzcEMKNrkpi3jf2HiShTFRF0xi8AOD9yfw==')

def test_automatic(self):
expected_document_extjson = textwrap.dedent("""
{"secret_azure": {
"$binary": {
"base64": "AQLN1ERNY0XMhzj42i1hzlwC8/OSU9bHfaQRmmRF5l7d5ZpqJX13qF5zSyExo8N9c1b6uS/LoKrHNzcEMKNrkpi3jf2HiShTFRF0xi8AOD9yfw==",
"subType": "06"}
}}""")
return self._test_automatic(
expected_document_extjson, {"secret_azure": "test"})


GCP_CREDS = {
'email': os.environ.get('FLE_GCP_EMAIL', ''),
'privateKey': _unicode(os.environ.get('FLE_GCP_PRIVATEKEY', ''))}


class TestGCPEncryption(AzureGCPEncryptionTestMixin,
EncryptionIntegrationTest):
@classmethod
@unittest.skipUnless(any(GCP_CREDS.values()),
'GCP environment credentials are not set')
def setUpClass(cls):
cls.KMS_PROVIDER_MAP = {'gcp': GCP_CREDS}
cls.DEK = json_data(BASE, 'custom', 'gcp-dek.json')
cls.SCHEMA_MAP = json_data(BASE, 'custom', 'azure-gcp-schema.json')
super(TestGCPEncryption, cls).setUpClass()

def test_explicit(self):
return self._test_explicit(
'AaLFPEi8SURzjW5fDoeaPnoCGcOFAmFOPpn5584VPJJ8iXIgml3YDxMRZD9IWv5otyoft8fBzL1LsDEp0lTeB32cV1gOj0IYeAKHhGIleuHZtA==')

def test_automatic(self):
expected_document_extjson = textwrap.dedent("""
{"secret_gcp": {
"$binary": {
"base64": "AaLFPEi8SURzjW5fDoeaPnoCGcOFAmFOPpn5584VPJJ8iXIgml3YDxMRZD9IWv5otyoft8fBzL1LsDEp0lTeB32cV1gOj0IYeAKHhGIleuHZtA==",
"subType": "06"}
}}""")
return self._test_automatic(
expected_document_extjson, {"secret_gcp": "test"})


if __name__ == "__main__":
unittest.main()