From 78b269839a42b05cb45d78c2b4f4daea5566af40 Mon Sep 17 00:00:00 2001 From: William Zhou Date: Mon, 15 Mar 2021 17:10:13 -0700 Subject: [PATCH 1/6] added example for raw bson usage --- bson/raw_bson.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/bson/raw_bson.py b/bson/raw_bson.py index 31b0d1b66c..bc707663aa 100644 --- a/bson/raw_bson.py +++ b/bson/raw_bson.py @@ -13,6 +13,46 @@ # limitations under the License. """Tools for representing raw BSON documents. + +Inserting and Retrieving RawBSONDocuments +========================================= + +Example: Moving a document between different databases/collections +.. code-block:: python + + import bsonjs + from pymongo import MongoClient + from bson.raw_bson import RawBSONDocument + + client = MongoClient("Localhost", 27017, document_class=RawBSONDocument) + db = client.db + doc = {"_id": 1, "test": "1"} + doc_bson = bsonjs.loads('{"_id": 1, "test": "1"}') + + # add original document to collection + result = db.collection.insert_one(doc) + assert result.acknowledged + + # retrieve doc from collection + retrieved_doc = db.collection.find_one({"test":"1"}) + assert retrieved_doc.raw == doc_bson + + # insert raw BSON into replica db/collection + replica_db = client.replica_db + result = replica_db.collection.insert_one(retrieved_doc) + assert result.acknowledged + + # retrieve doc from replica db/collection + retrieved_replica_doc = replica_db.collection.find_one({"test":"1"}) + assert retrieved_replica_doc.raw == doc_bson + assert bsonjs.dumps(retrieved_replica_doc.raw) == '{ "_id" : 1, "test" : "1" }' + +For use cases like moving documents across different databases or writing binary +blobs to disk, using raw BSON documents provides better speed and avoids the +overhead of decoding BSON to JSON. + +.. versionadded:: 3.12 + """ from collections.abc import Mapping as _Mapping From 905b92304292f134140c03c48bee54298def6ee0 Mon Sep 17 00:00:00 2001 From: William Zhou Date: Tue, 16 Mar 2021 16:58:10 -0700 Subject: [PATCH 2/6] update test to do multiple inserts/copies --- bson/raw_bson.py | 72 +++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/bson/raw_bson.py b/bson/raw_bson.py index bc707663aa..c8f7bbb19f 100644 --- a/bson/raw_bson.py +++ b/bson/raw_bson.py @@ -18,41 +18,57 @@ ========================================= Example: Moving a document between different databases/collections -.. code-block:: python - import bsonjs +.. testsetup:: from pymongo import MongoClient - from bson.raw_bson import RawBSONDocument - - client = MongoClient("Localhost", 27017, document_class=RawBSONDocument) - db = client.db - doc = {"_id": 1, "test": "1"} - doc_bson = bsonjs.loads('{"_id": 1, "test": "1"}') - - # add original document to collection - result = db.collection.insert_one(doc) - assert result.acknowledged - - # retrieve doc from collection - retrieved_doc = db.collection.find_one({"test":"1"}) - assert retrieved_doc.raw == doc_bson - - # insert raw BSON into replica db/collection - replica_db = client.replica_db - result = replica_db.collection.insert_one(retrieved_doc) - assert result.acknowledged - - # retrieve doc from replica db/collection - retrieved_replica_doc = replica_db.collection.find_one({"test":"1"}) - assert retrieved_replica_doc.raw == doc_bson - assert bsonjs.dumps(retrieved_replica_doc.raw) == '{ "_id" : 1, "test" : "1" }' + client = MongoClient("localhost", 27017, document_class=RawBSONDocument) + client.drop_database('db') + client.drop_database('replica_db') + +.. doctest:: + + >>> import bson + >>> from pymongo import MongoClient + >>> from bson.raw_bson import RawBSONDocument + >>> client = MongoClient("localhost", 27017, document_class=bson.) + >>> db = client.db + >>> docs = [ + ... {'a': 1}, + ... {'b': 1}, + ... {'c': 1}, + ... {'d': 1}] + >>> result = db.test.insert_many(docs) + >>> assert result.acknowledged + >>> replica_db = client.replica_db + >>> for doc in db.test.find(): + ... print(f"raw document: {doc.raw}") + ... print(f"decoded document: {bson.decode(doc.raw)}") + ... replica_db.test.insert_one(doc) + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'a': 1} + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'b': 1} + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'c': 1} + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'd': 1} + >>> for doc in replica_db.test.find(): + ... print(f"raw document: {doc.raw}") + ... print(f"decoded document: {bson.decode(doc.raw)}") + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'a': 1} + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'b': 1} + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'c': 1} + raw document: b'...' + decoded document: {'_id': ObjectId('...'), 'd': 1} For use cases like moving documents across different databases or writing binary blobs to disk, using raw BSON documents provides better speed and avoids the -overhead of decoding BSON to JSON. +overhead of decoding or encoding BSON. .. versionadded:: 3.12 - """ from collections.abc import Mapping as _Mapping From 1bea157142595b2f7b1472bb42035d958a3875ae Mon Sep 17 00:00:00 2001 From: William Zhou Date: Tue, 16 Mar 2021 17:01:49 -0700 Subject: [PATCH 3/6] added assertion for each copy add --- bson/raw_bson.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bson/raw_bson.py b/bson/raw_bson.py index c8f7bbb19f..60ec1a2eed 100644 --- a/bson/raw_bson.py +++ b/bson/raw_bson.py @@ -32,18 +32,18 @@ >>> from bson.raw_bson import RawBSONDocument >>> client = MongoClient("localhost", 27017, document_class=bson.) >>> db = client.db - >>> docs = [ + >>> result = db.test.insert_many([ ... {'a': 1}, ... {'b': 1}, ... {'c': 1}, - ... {'d': 1}] - >>> result = db.test.insert_many(docs) + ... {'d': 1}]) >>> assert result.acknowledged >>> replica_db = client.replica_db >>> for doc in db.test.find(): ... print(f"raw document: {doc.raw}") ... print(f"decoded document: {bson.decode(doc.raw)}") - ... replica_db.test.insert_one(doc) + ... result = replica_db.test.insert_one(doc) + ... assert result.acknowledged raw document: b'...' decoded document: {'_id': ObjectId('...'), 'a': 1} raw document: b'...' @@ -54,7 +54,7 @@ decoded document: {'_id': ObjectId('...'), 'd': 1} >>> for doc in replica_db.test.find(): ... print(f"raw document: {doc.raw}") - ... print(f"decoded document: {bson.decode(doc.raw)}") + ... print(f"decoded document: {bson.decode(doc.raw)}") raw document: b'...' decoded document: {'_id': ObjectId('...'), 'a': 1} raw document: b'...' From 5697eca2e32c1fdd4808c765f8f7361c5405fe63 Mon Sep 17 00:00:00 2001 From: William Zhou Date: Wed, 17 Mar 2021 11:33:51 -0700 Subject: [PATCH 4/6] update ticket --- bson/raw_bson.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bson/raw_bson.py b/bson/raw_bson.py index 60ec1a2eed..15b5b61d6c 100644 --- a/bson/raw_bson.py +++ b/bson/raw_bson.py @@ -30,7 +30,7 @@ >>> import bson >>> from pymongo import MongoClient >>> from bson.raw_bson import RawBSONDocument - >>> client = MongoClient("localhost", 27017, document_class=bson.) + >>> client = MongoClient("localhost", 27017, document_class=RawBSONDocument) >>> db = client.db >>> result = db.test.insert_many([ ... {'a': 1}, @@ -42,8 +42,7 @@ >>> for doc in db.test.find(): ... print(f"raw document: {doc.raw}") ... print(f"decoded document: {bson.decode(doc.raw)}") - ... result = replica_db.test.insert_one(doc) - ... assert result.acknowledged + ... replica_db.test.insert_one(doc) raw document: b'...' decoded document: {'_id': ObjectId('...'), 'a': 1} raw document: b'...' @@ -67,8 +66,6 @@ For use cases like moving documents across different databases or writing binary blobs to disk, using raw BSON documents provides better speed and avoids the overhead of decoding or encoding BSON. - -.. versionadded:: 3.12 """ from collections.abc import Mapping as _Mapping From 1d8557897c70668690dc2187386458122156baae Mon Sep 17 00:00:00 2001 From: William Zhou Date: Wed, 17 Mar 2021 12:47:56 -0700 Subject: [PATCH 5/6] remove lines, fix indentation --- bson/raw_bson.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/bson/raw_bson.py b/bson/raw_bson.py index 15b5b61d6c..60165d3e05 100644 --- a/bson/raw_bson.py +++ b/bson/raw_bson.py @@ -21,7 +21,7 @@ .. testsetup:: from pymongo import MongoClient - client = MongoClient("localhost", 27017, document_class=RawBSONDocument) + client = MongoClient(document_class=RawBSONDocument) client.drop_database('db') client.drop_database('replica_db') @@ -30,14 +30,12 @@ >>> import bson >>> from pymongo import MongoClient >>> from bson.raw_bson import RawBSONDocument - >>> client = MongoClient("localhost", 27017, document_class=RawBSONDocument) + >>> client = MongoClient(document_class=RawBSONDocument) >>> db = client.db - >>> result = db.test.insert_many([ - ... {'a': 1}, - ... {'b': 1}, - ... {'c': 1}, - ... {'d': 1}]) - >>> assert result.acknowledged + >>> result = db.test.insert_many([{'a': 1}, + ... {'b': 1}, + ... {'c': 1}, + ... {'d': 1}]) >>> replica_db = client.replica_db >>> for doc in db.test.find(): ... print(f"raw document: {doc.raw}") @@ -51,17 +49,6 @@ decoded document: {'_id': ObjectId('...'), 'c': 1} raw document: b'...' decoded document: {'_id': ObjectId('...'), 'd': 1} - >>> for doc in replica_db.test.find(): - ... print(f"raw document: {doc.raw}") - ... print(f"decoded document: {bson.decode(doc.raw)}") - raw document: b'...' - decoded document: {'_id': ObjectId('...'), 'a': 1} - raw document: b'...' - decoded document: {'_id': ObjectId('...'), 'b': 1} - raw document: b'...' - decoded document: {'_id': ObjectId('...'), 'c': 1} - raw document: b'...' - decoded document: {'_id': ObjectId('...'), 'd': 1} For use cases like moving documents across different databases or writing binary blobs to disk, using raw BSON documents provides better speed and avoids the From f00fdb9575831cced3033d76676b6efa231d2b93 Mon Sep 17 00:00:00 2001 From: William Zhou Date: Wed, 17 Mar 2021 13:20:07 -0700 Subject: [PATCH 6/6] remove lines, fix indentation --- bson/raw_bson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bson/raw_bson.py b/bson/raw_bson.py index 60165d3e05..4ee0394ad4 100644 --- a/bson/raw_bson.py +++ b/bson/raw_bson.py @@ -40,7 +40,7 @@ >>> for doc in db.test.find(): ... print(f"raw document: {doc.raw}") ... print(f"decoded document: {bson.decode(doc.raw)}") - ... replica_db.test.insert_one(doc) + ... result = replica_db.test.insert_one(doc) raw document: b'...' decoded document: {'_id': ObjectId('...'), 'a': 1} raw document: b'...'