diff --git a/bson/binary.py b/bson/binary.py index cb89c69da2..d1f5aae7d2 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -69,6 +69,8 @@ class UuidRepresentation: code. When decoding a BSON binary field with a UUID subtype, a :class:`~bson.binary.Binary` instance will be returned instead of a :class:`uuid.UUID` instance. + + See :ref:`unspecified-representation-details` for details. .. versionadded:: 3.11 """ @@ -79,6 +81,8 @@ class UuidRepresentation: :class:`uuid.UUID` instances will automatically be encoded to and decoded from BSON binary, using RFC-4122 byte order with binary subtype :data:`UUID_SUBTYPE`. + + See :ref:`standard-representation-details` for details. .. versionadded:: 3.11 """ @@ -89,6 +93,8 @@ class UuidRepresentation: :class:`uuid.UUID` instances will automatically be encoded to and decoded from BSON binary, using RFC-4122 byte order with binary subtype :data:`OLD_UUID_SUBTYPE`. + + See :ref:`python-legacy-representation-details` for details. .. versionadded:: 3.11 """ @@ -99,6 +105,8 @@ class UuidRepresentation: :class:`uuid.UUID` instances will automatically be encoded to and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, using the Java driver's legacy byte order. + + See :ref:`java-legacy-representation-details` for details. .. versionadded:: 3.11 """ @@ -109,6 +117,8 @@ class UuidRepresentation: :class:`uuid.UUID` instances will automatically be encoded to and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, using the C# driver's legacy byte order. + + See :ref:`csharp-legacy-representation-details` for details. .. versionadded:: 3.11 """ @@ -220,6 +230,7 @@ def from_uuid(cls, uuid, uuid_representation=UuidRepresentation.STANDARD): - `uuid_representation`: A member of :class:`~bson.binary.UuidRepresentation`. Default: :const:`~bson.binary.UuidRepresentation.STANDARD`. + See :ref:`handling-uuid-data-example` for details. .. versionadded:: 3.11 """ @@ -236,7 +247,8 @@ def from_uuid(cls, uuid, uuid_representation=UuidRepresentation.STANDARD): "UuidRepresentation.UNSPECIFIED. UUIDs can be manually " "converted to bson.Binary instances using " "bson.Binary.from_uuid() or a different UuidRepresentation " - "can be configured.") + "can be configured. See the documentation for " + "UuidRepresentation for more information.") subtype = OLD_UUID_SUBTYPE if uuid_representation == UuidRepresentation.PYTHON_LEGACY: @@ -266,6 +278,7 @@ def as_uuid(self, uuid_representation=UuidRepresentation.STANDARD): - `uuid_representation`: A member of :class:`~bson.binary.UuidRepresentation`. Default: :const:`~bson.binary.UuidRepresentation.STANDARD`. + See :ref:`handling-uuid-data-example` for details. .. versionadded:: 3.11 """ diff --git a/bson/codec_options.py b/bson/codec_options.py index a514cc92d0..4ffcdb0a59 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -218,7 +218,10 @@ class CodecOptions(_options_base): naive. Defaults to ``False``. - `uuid_representation`: The BSON representation to use when encoding and decoding instances of :class:`~uuid.UUID`. Defaults to - :data:`~bson.binary.PYTHON_LEGACY`. + :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`. New + applications should consider setting this to + :data:`~bson.binary.UuidRepresentation.STANDARD` for cross language + compatibility. See :ref:`handling-uuid-data-example` for details. - `unicode_decode_error_handler`: The error handler to apply when a Unicode-related error occurs during BSON decoding that would otherwise raise :exc:`UnicodeDecodeError`. Valid options include diff --git a/doc/examples/index.rst b/doc/examples/index.rst index baadd74464..f8828cdfd7 100644 --- a/doc/examples/index.rst +++ b/doc/examples/index.rst @@ -32,3 +32,4 @@ MongoDB, you can start it like so: tailable tls encryption + uuid diff --git a/doc/examples/uuid.rst b/doc/examples/uuid.rst new file mode 100644 index 0000000000..9b6762dc88 --- /dev/null +++ b/doc/examples/uuid.rst @@ -0,0 +1,509 @@ +.. _handling-uuid-data-example: + +Handling UUID Data +================== + +PyMongo ships with built-in support for dealing with UUID types. +It is straightforward to store native :class:`uuid.UUID` objects +to MongoDB and retrieve them as native :class:`uuid.UUID` objects:: + + from pymongo import MongoClient + from bson.binary import UuidRepresentation + from uuid import uuid4 + + # use the 'standard' representation for cross-language compatibility. + client = MongoClient(uuid_representation=UuidRepresentation.STANDARD) + collection = client.get_database('uuid_db').get_collection('uuid_coll') + + # remove all documents from collection + collection.delete_many({}) + + # create a native uuid object + uuid_obj = uuid4() + + # save the native uuid object to MongoDB + collection.insert_one({'uuid': uuid_obj}) + + # retrieve the stored uuid object from MongoDB + document = collection.find_one({}) + + # check that the retrieved UUID matches the inserted UUID + assert document['uuid'] == uuid_obj + +Native :class:`uuid.UUID` objects can also be used as part of MongoDB +queries:: + + document = collection.find({'uuid': uuid_obj}) + assert document['uuid'] == uuid_obj + +The above examples illustrate the simplest of use-cases - one where the +UUID is generated by, and used in the same application. However, +the situation can be significantly more complex when dealing with a MongoDB +deployment that contains UUIDs created by other drivers as the Java and CSharp +drivers have historically encoded UUIDs using a byte-order that is different +from the one used by PyMongo. Applications that require interoperability across +these drivers must specify the appropriate +:class:`~bson.binary.UuidRepresentation`. + +In the following sections, we describe how drivers have historically differed +in their encoding of UUIDs, and how applications can use the +:class:`~bson.binary.UuidRepresentation` configuration option to maintain +cross-language compatibility. + +.. attention:: New applications that do not share a MongoDB deployment with + any other application and that have never stored UUIDs in MongoDB + should use the ``standard`` UUID representation for cross-language + compatibility. See :ref:`configuring-uuid-representation` for details + on how to configure the :class:`~bson.binary.UuidRepresentation`. + +.. _example-legacy-uuid: + +Legacy Handling of UUID Data +---------------------------- + +Historically, MongoDB Drivers have used different byte-ordering +while serializing UUID types to :class:`~bson.binary.Binary`. +Consider, for instance, a UUID with the following canonical textual +representation:: + + 00112233-4455-6677-8899-aabbccddeeff + +This UUID would historically be serialized by the Python driver as:: + + 00112233-4455-6677-8899-aabbccddeeff + +The same UUID would historically be serialized by the C# driver as:: + + 33221100-5544-7766-8899-aabbccddeeff + +Finally, the same UUID would historically be serialized by the Java driver as:: + + 77665544-3322-1100-ffee-ddccbbaa9988 + +.. note:: For in-depth information about the the byte-order historically + used by different drivers, see the `Handling of Native UUID Types + Specification + `_. + +This difference in the byte-order of UUIDs encoded by different drivers can +result in highly unintuitive behavior in some scenarios. We detail two such +scenarios in the next sections. + +Scenario 1: Applications Share a MongoDB Deployment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Consider the following situation: + +* Application ``C`` written in C# generates a UUID and uses it as the ``_id`` + of a document that it proceeds to insert into the ``uuid_test`` collection of + the ``example_db`` database. Let's assume that the canonical textual + representation of the generated UUID is:: + + 00112233-4455-6677-8899-aabbccddeeff + +* Application ``P`` written in Python attempts to ``find`` the document + written by application ``C`` in the following manner:: + + from uuid import UUID + collection = client.example_db.uuid_test + result = collection.find_one({'_id': UUID('00112233-4455-6677-8899-aabbccddeeff')}) + + In this instance, ``result`` will never be the document that + was inserted by application ``C`` in the previous step. This is because of + the different byte-order used by the C# driver for representing UUIDs as + BSON Binary. The following query, on the other hand, will successfully find + this document:: + + result = collection.find_one({'_id': UUID('33221100-5544-7766-8899-aabbccddeeff')}) + +This example demonstrates how the differing byte-order used by different +drivers can hamper interoperability. To workaround this problem, users should +configure their ``MongoClient`` with the appropriate +:class:`~bson.binary.UuidRepresentation` (in this case, ``client`` in application +``P`` can be configured to use the +:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation to +avoid the unintuitive behavior) as described in +:ref:`configuring-uuid-representation`. + +Scenario 2: Round-Tripping UUIDs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the following examples, we see how using a misconfigured +:class:`~bson.binary.UuidRepresentation` can cause an application +to inadvertently change the :class:`~bson.binary.Binary` subtype, and in some +cases, the bytes of the :class:`~bson.binary.Binary` field itself when +round-tripping documents containing UUIDs. + +Consider the following situation:: + + from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS + from bson.binary import Binary, UuidRepresentation + from uuid import uuid4 + + # Using UuidRepresentation.PYTHON_LEGACY stores a Binary subtype-3 UUID + python_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY) + input_uuid = uuid4() + collection = client.testdb.get_collection('test', codec_options=python_opts) + collection.insert_one({'_id': 'foo', 'uuid': input_uuid}) + assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)})['_id'] == 'foo' + + # Retrieving this document using UuidRepresentation.STANDARD returns a native UUID + std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD) + std_collection = client.testdb.get_collection('test', codec_options=std_opts) + doc = std_collection.find_one({'_id': 'foo'}) + assert doc['uuid'] == input_uuid + + # Round-tripping the retrieved document silently changes the Binary subtype to 4 + std_collection.replace_one({'_id': 'foo'}, doc) + assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) is None + round_tripped_doc = collection.find_one({'uuid': Binary(input_uuid.bytes, 4)}) + assert doc == round_tripped_doc + + +In this example, round-tripping the document using the incorrect +:class:`~bson.binary.UuidRepresentation` (``STANDARD`` instead of +``PYTHON_LEGACY``) changes the :class:`~bson.binary.Binary` subtype as a +side-effect. **Note that this can also happen when the situation is reversed - +i.e. when the original document is written using ``STANDARD`` representation +and then round-tripped using the ``PYTHON_LEGACY`` representation.** + +In the next example, we see the consequences of incorrectly using a +representation that modifies byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``) +when round-tripping documents:: + + from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS + from bson.binary import Binary, UuidRepresentation + from uuid import uuid4 + + # Using UuidRepresentation.STANDARD stores a Binary subtype-4 UUID + std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD) + input_uuid = uuid4() + collection = client.testdb.get_collection('test', codec_options=std_opts) + collection.insert_one({'_id': 'baz', 'uuid': input_uuid}) + assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)})['_id'] == 'baz' + + # Retrieving this document using UuidRepresentation.JAVA_LEGACY returns a native UUID + # without modifying the UUID byte-order + java_opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY) + java_collection = client.testdb.get_collection('test', codec_options=java_opts) + doc = java_collection.find_one({'_id': 'baz'}) + assert doc['uuid'] == input_uuid + + # Round-tripping the retrieved document silently changes the Binary bytes and subtype + java_collection.replace_one({'_id': 'baz'}, doc) + assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) is None + assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)}) is None + round_tripped_doc = collection.find_one({'_id': 'baz'}) + assert round_tripped_doc['uuid'] == Binary(input_uuid.bytes, 3).as_uuid(UuidRepresentation.JAVA_LEGACY) + + +In this case, using the incorrect :class:`~bson.binary.UuidRepresentation` +(``JAVA_LEGACY`` instead of ``STANDARD``) changes the +:class:`~bson.binary.Binary` bytes and subtype as a side-effect. +**Note that this happens when any representation that +manipulates byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``) is incorrectly +used to round-trip UUIDs written with ``STANDARD``. When the situation is +reversed - i.e. when the original document is written using ``CSHARP_LEGACY`` +or ``JAVA_LEGACY`` and then round-tripped using ``STANDARD`` - +only the :class:`~bson.binary.Binary` subtype is changed.** + +.. note:: Starting in PyMongo 4.0, these issue will be resolved as + the ``STANDARD`` representation will decode Binary subtype 3 fields as + :class:`~bson.binary.Binary` objects of subtype 3 (instead of + :class:`uuid.UUID`), and each of the ``LEGACY_*`` representations will + decode Binary subtype 4 fields to :class:`~bson.binary.Binary` objects of + subtype 4 (instead of :class:`uuid.UUID`). + +.. _configuring-uuid-representation: + +Configuring a UUID Representation +--------------------------------- + +Users can workaround the problems described above by configuring their +applications with the appropriate :class:`~bson.binary.UuidRepresentation`. +Configuring the representation modifies PyMongo's behavior while +encoding :class:`uuid.UUID` objects to BSON and decoding +Binary subtype 3 and 4 fields from BSON. + +Applications can set the UUID representation in one of the following ways: + +#. At the ``MongoClient`` level using the ``uuidRepresentation`` URI option, + e.g.:: + + client = MongoClient("mongodb://a:27107/?uuidRepresentation=javaLegacy") + + Valid values are: + + .. list-table:: + :header-rows: 1 + + * - Value + - UUID Representation + + * - ``pythonLegacy`` + - :ref:`python-legacy-representation-details` + + * - ``javaLegacy`` + - :ref:`java-legacy-representation-details` + + * - ``csharpLegacy`` + - :ref:`csharp-legacy-representation-details` + + * - ``standard`` + - :ref:`standard-representation-details` + + * - ``unspecified`` + - :ref:`unspecified-representation-details` + +#. Using the ``uuid_representation`` kwarg option, e.g.:: + + from bson.binary import UuidRepresentation + client = MongoClient(uuid_representation=UuidRepresentation.PYTHON_LEGACY) + +#. By supplying a suitable :class:`~bson.codec_options.CodecOptions` + instance, e.g.:: + + from bson.codec_options import CodecOptions + csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY) + csharp_database = client.get_database('csharp_db', codec_options=csharp_opts) + csharp_collection = client.testdb.get_collection('csharp_coll', codec_options=csharp_opts) + +Supported UUID Representations +------------------------------ + +.. list-table:: + :header-rows: 1 + + * - UUID Representation + - Default? + - Encode :class:`uuid.UUID` to + - Decode :class:`~bson.binary.Binary` subtype 4 to + - Decode :class:`~bson.binary.Binary` subtype 3 to + + * - :ref:`python-legacy-representation-details` + - Yes, in PyMongo>=2.9,<4 + - :class:`~bson.binary.Binary` subtype 3 with standard byte-order + - :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4 + - :class:`uuid.UUID` + + * - :ref:`java-legacy-representation-details` + - No + - :class:`~bson.binary.Binary` subtype 3 with Java legacy byte-order + - :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4 + - :class:`uuid.UUID` + + * - :ref:`csharp-legacy-representation-details` + - No + - :class:`~bson.binary.Binary` subtype 3 with C# legacy byte-order + - :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4 + - :class:`uuid.UUID` + + * - :ref:`standard-representation-details` + - No + - :class:`~bson.binary.Binary` subtype 4 + - :class:`uuid.UUID` + - :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 3 in PyMongo>=4 + + * - :ref:`unspecified-representation-details` + - Yes, in PyMongo>=4 + - Raise :exc:`ValueError` + - :class:`~bson.binary.Binary` subtype 4 + - :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 3 in PyMongo>=4 + +We now detail the behavior and use-case for each supported UUID +representation. + +.. _python-legacy-representation-details: + +``PYTHON_LEGACY`` +^^^^^^^^^^^^^^^^^ + +.. attention:: This uuid representation should be used when reading UUIDs + generated by existing applications that use the Python driver + but **don't** explicitly set a UUID representation. + +.. attention:: :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` + has been the default uuid representation since PyMongo 2.9. + +The :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` representation +corresponds to the legacy representation of UUIDs used by PyMongo. This +representation conforms with +`RFC 4122 Section 4.1.2 `_. + +The following example illustrates the use of this representation:: + + from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS + from bson.binary import UuidRepresentation + + # No configured UUID representation + collection = client.python_legacy.get_collection('test', codec_options=DEFAULT_CODEC_OPTIONS) + + # Using UuidRepresentation.PYTHON_LEGACY + pylegacy_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY) + pylegacy_collection = client.python_legacy.get_collection('test', codec_options=pylegacy_opts) + + # UUIDs written by PyMongo with no UuidRepresentation configured can be queried using PYTHON_LEGACY + uuid_1 = uuid4() + collection.insert_one({'uuid': uuid_1}) + document = pylegacy_collection.find_one({'uuid': uuid_1}) + + # UUIDs written using PYTHON_LEGACY can be read by PyMongo with no UuidRepresentation configured + uuid_2 = uuid4() + pylegacy_collection.insert_one({'uuid': uuid_2}) + document = collection.find_one({'uuid': uuid_2}) + +``PYTHON_LEGACY`` encodes native :class:`uuid.UUID` objects to +:class:`~bson.binary.Binary` subtype 3 objects, preserving the same +byte-order as :attr:`~uuid.UUID.bytes`:: + + from bson.binary import Binary + + document = collection.find_one({'uuid': Binary(uuid_2.bytes, subtype=3)}) + assert document['uuid'] == uuid_2 + +.. _java-legacy-representation-details: + +``JAVA_LEGACY`` +^^^^^^^^^^^^^^^ + +.. attention:: This UUID representation should be used when reading UUIDs + written to MongoDB by the legacy applications (i.e. applications that don't + use the ``STANDARD`` representation) using the Java driver. + +The :data:`~bson.binary.UuidRepresentation.JAVA_LEGACY` representation +corresponds to the legacy representation of UUIDs used by the MongoDB Java +Driver. + +.. note:: The ``JAVA_LEGACY`` representation reverses the order of bytes 0-7, + and bytes 8-15. + +As an example, consider the same UUID described in :ref:`example-legacy-uuid`. +Let us assume that an application used the Java driver without an explicitly +specified UUID representation to insert the example UUID +``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this +value using PyMongo with no UUID representation specified, we end up with an +entirely different UUID:: + + UUID('77665544-3322-1100-ffee-ddccbbaa9988') + +However, if we explicitly set the representation to +:data:`~bson.binary.UuidRepresentation.JAVA_LEGACY`, we get the correct result:: + + UUID('00112233-4455-6677-8899-aabbccddeeff') + +PyMongo uses the specified UUID representation to reorder the BSON bytes and +load them correctly. ``JAVA_LEGACY`` encodes native :class:`uuid.UUID` objects +to :class:`~bson.binary.Binary` subtype 3 objects, while performing the same +byte-reordering as the legacy Java driver's UUID to BSON encoder. + +.. _csharp-legacy-representation-details: + +``CSHARP_LEGACY`` +^^^^^^^^^^^^^^^^^ + +.. attention:: This UUID representation should be used when reading UUIDs + written to MongoDB by the legacy applications (i.e. applications that don't + use the ``STANDARD`` representation) using the C# driver. + +The :data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation +corresponds to the legacy representation of UUIDs used by the MongoDB Java +Driver. + +.. note:: The ``CSHARP_LEGACY`` representation reverses the order of bytes 0-3, + bytes 4-5, and bytes 6-7. + +As an example, consider the same UUID described in :ref:`example-legacy-uuid`. +Let us assume that an application used the C# driver without an explicitly +specified UUID representation to insert the example UUID +``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this +value using PyMongo with no UUID representation specified, we end up with an +entirely different UUID:: + + UUID('33221100-5544-7766-8899-aabbccddeeff') + +However, if we explicitly set the representation to +:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY`, we get the correct result:: + + UUID('00112233-4455-6677-8899-aabbccddeeff') + +PyMongo uses the specified UUID representation to reorder the BSON bytes and +load them correctly. ``CSHARP_LEGACY`` encodes native :class:`uuid.UUID` +objects to :class:`~bson.binary.Binary` subtype 3 objects, while performing +the same byte-reordering as the legacy C# driver's UUID to BSON encoder. + +.. _standard-representation-details: + +``STANDARD`` +^^^^^^^^^^^^ + +.. attention:: This UUID representation should be used by new applications + that have never stored UUIDs in MongoDB. + +The :data:`~bson.binary.UuidRepresentation.STANDARD` representation +enables cross-language compatibility by ensuring the same byte-ordering +when encoding UUIDs from all drivers. UUIDs written by a driver with this +representation configured will be handled correctly by every other provided +it is also configured with the ``STANDARD`` representation. + +``STANDARD`` encodes native :class:`uuid.UUID` objects to +:class:`~bson.binary.Binary` subtype 4 objects. + +.. _unspecified-representation-details: + +``UNSPECIFIED`` +^^^^^^^^^^^^^^^ + +.. attention:: Starting in PyMongo 4.0, + :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` will be the default + UUID representation used by PyMongo. + +The :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` representation +prevents the incorrect interpretation of UUID bytes by stopping short of +automatically converting UUID fields in BSON to native UUID types. Loading +a UUID when using this representation returns a :class:`~bson.binary.Binary` +object instead. If required, users can coerce the decoded +:class:`~bson.binary.Binary` objects into native UUIDs using the +:meth:`~bson.binary.Binary.as_uuid` method and specifying the appropriate +representation format. The following example shows +what this might look like for a UUID stored by the C# driver:: + + from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS + from bson.binary import Binary, UuidRepresentation + from uuid import uuid4 + + # Using UuidRepresentation.CSHARP_LEGACY + csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY) + + # Store a legacy C#-formatted UUID + input_uuid = uuid4() + collection = client.testdb.get_collection('test', codec_options=csharp_opts) + collection.insert_one({'_id': 'foo', 'uuid': input_uuid}) + + # Using UuidRepresentation.UNSPECIFIED + unspec_opts = CodecOptions(uuid_representation=UuidRepresentation.UNSPECIFIED) + unspec_collection = client.testdb.get_collection('test', codec_options=unspec_opts) + + # UUID fields are decoded as Binary when UuidRepresentation.UNSPECIFIED is configured + document = unspec_collection.find_one({'_id': 'foo'}) + decoded_field = document['uuid'] + assert isinstance(decoded_field, Binary) + + # Binary.as_uuid() can be used to coerce the decoded value to a native UUID + decoded_uuid = decoded_field.as_uuid(UuidRepresentation.CSHARP_LEGACY) + assert decoded_uuid == input_uuid + +Native :class:`uuid.UUID` objects cannot directly be encoded to +:class:`~bson.binary.Binary` when the UUID representation is ``UNSPECIFIED`` +and attempting to do so will result in an exception:: + + unspec_collection.insert_one({'_id': 'bar', 'uuid': uuid4()}) + Traceback (most recent call last): + ... + ValueError: cannot encode native uuid.UUID with UuidRepresentation.UNSPECIFIED. UUIDs can be manually converted to bson.Binary instances using bson.Binary.from_uuid() or a different UuidRepresentation can be configured. See the documentation for UuidRepresentation for more information. + +Instead, applications using :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` +must explicitly coerce a native UUID using the +:meth:`~bson.binary.Binary.from_uuid` method:: + + explicit_binary = Binary.from_uuid(uuid4(), UuidRepresentation.PYTHON_LEGACY) + unspec_collection.insert_one({'_id': 'bar', 'uuid': explicit_binary}) diff --git a/pymongo/mongo_client.py b/pymongo/mongo_client.py index 2a6e9d180f..c625ccbede 100644 --- a/pymongo/mongo_client.py +++ b/pymongo/mongo_client.py @@ -339,8 +339,9 @@ def __init__( - `uuidRepresentation`: The BSON representation to use when encoding from and decoding to instances of :class:`~uuid.UUID`. Valid values are `pythonLegacy` (the default), `javaLegacy`, - `csharpLegacy` and `standard`. New applications should consider - setting this to `standard` for cross language compatibility. + `csharpLegacy`, `standard` and `unspecified`. New applications + should consider setting this to `standard` for cross language + compatibility. See :ref:`handling-uuid-data-example` for details. | **Write Concern options:** | (Only set if passed. No default values.)