20
20
from typing import Dict , Union , Optional , Callable
21
21
22
22
from fastavro import schemaless_reader , schemaless_writer
23
-
24
- from confluent_kafka .schema_registry .common .avro import AvroSchema , _schema_loads , get_inline_tags , parse_schema_with_repo , transform
23
+ from confluent_kafka .schema_registry .common import asyncinit
24
+ from confluent_kafka .schema_registry .common .avro import AvroSchema , _schema_loads , \
25
+ get_inline_tags , parse_schema_with_repo , transform , _ContextStringIO , AVRO_TYPE
25
26
26
27
from confluent_kafka .schema_registry import (_MAGIC_BYTE ,
27
- Schema ,
28
- topic_subject_name_strategy ,
29
- RuleMode ,
30
- AsyncSchemaRegistryClient )
28
+ Schema ,
29
+ topic_subject_name_strategy ,
30
+ RuleMode ,
31
+ AsyncSchemaRegistryClient ,
32
+ prefix_schema_id_serializer ,
33
+ dual_schema_id_deserializer )
31
34
from confluent_kafka .serialization import (SerializationError ,
32
35
SerializationContext )
33
- from confluent_kafka .schema_registry .common import asyncinit
34
- from confluent_kafka .schema_registry .common import _ContextStringIO
35
36
from confluent_kafka .schema_registry .rule_registry import RuleRegistry
36
- from confluent_kafka .schema_registry .serde import AsyncBaseSerializer , AsyncBaseDeserializer , ParsedSchemaCache
37
+ from confluent_kafka .schema_registry .serde import AsyncBaseSerializer , AsyncBaseDeserializer , ParsedSchemaCache , SchemaId
38
+
37
39
38
40
__all__ = [
39
41
'_resolve_named_schema' ,
40
42
'AsyncAvroSerializer' ,
41
43
'AsyncAvroDeserializer' ,
42
44
]
43
45
46
+
44
47
async def _resolve_named_schema (
45
48
schema : Schema , schema_registry_client : AsyncSchemaRegistryClient
46
49
) -> Dict [str , AvroSchema ]:
@@ -113,6 +116,12 @@ class AsyncAvroSerializer(AsyncBaseSerializer):
113
116
| | | |
114
117
| | | Defaults to topic_subject_name_strategy. |
115
118
+-----------------------------+----------+--------------------------------------------------+
119
+ | | | Callable(bytes, SerializationContext, schema_id) |
120
+ | | | -> bytes |
121
+ | | | |
122
+ | ``schema.id.serializer`` | callable | Defines how the schema id/guid is serialized. |
123
+ | | | Defaults to prefix_schema_id_serializer. |
124
+ +-----------------------------+----------+--------------------------------------------------+
116
125
117
126
Schemas are registered against subject names in Confluent Schema Registry that
118
127
define a scope in which the schemas can be evolved. By default, the subject name
@@ -172,7 +181,8 @@ class AsyncAvroSerializer(AsyncBaseSerializer):
172
181
'use.schema.id' : None ,
173
182
'use.latest.version' : False ,
174
183
'use.latest.with.metadata' : None ,
175
- 'subject.name.strategy' : topic_subject_name_strategy }
184
+ 'subject.name.strategy' : topic_subject_name_strategy ,
185
+ 'schema.id.serializer' : prefix_schema_id_serializer }
176
186
177
187
async def __init__ (
178
188
self ,
@@ -234,6 +244,10 @@ async def __init__(
234
244
self ._subject_name_func = conf_copy .pop ('subject.name.strategy' )
235
245
if not callable (self ._subject_name_func ):
236
246
raise ValueError ("subject.name.strategy must be callable" )
247
+
248
+ self ._schema_id_deserializer = conf_copy .pop ('schema.id.deserializer' )
249
+ if not callable (self ._schema_id_deserializer ):
250
+ raise ValueError ("schema.id.deserializer must be callable" )
237
251
238
252
if len (conf_copy ) > 0 :
239
253
raise ValueError ("Unrecognized properties: {}"
@@ -297,19 +311,20 @@ async def __serialize(self, obj: object, ctx: Optional[SerializationContext] = N
297
311
subject = self ._subject_name_func (ctx , self ._schema_name )
298
312
latest_schema = await self ._get_reader_schema (subject )
299
313
if latest_schema is not None :
300
- self ._schema_id = latest_schema .schema_id
314
+ self ._schema_id = SchemaId ( AVRO_TYPE , latest_schema .schema_id , latest_schema . guid )
301
315
elif subject not in self ._known_subjects :
302
316
# Check to ensure this schema has been registered under subject_name.
303
317
if self ._auto_register :
304
318
# The schema name will always be the same. We can't however register
305
319
# a schema without a subject so we set the schema_id here to handle
306
320
# the initial registration.
307
- self . _schema_id = await self ._registry .register_schema (
321
+ registered_schema = await self ._registry .register_schema_full_response (
308
322
subject , self ._schema , self ._normalize_schemas )
323
+ self ._schema_id = SchemaId (AVRO_TYPE , registered_schema .schema_id , registered_schema .guid )
309
324
else :
310
325
registered_schema = await self ._registry .lookup_schema (
311
326
subject , self ._schema , self ._normalize_schemas )
312
- self ._schema_id = registered_schema .schema_id
327
+ self ._schema_id = SchemaId ( AVRO_TYPE , registered_schema .schema_id , registered_schema . guid )
313
328
314
329
self ._known_subjects .add (subject )
315
330
@@ -320,7 +335,7 @@ async def __serialize(self, obj: object, ctx: Optional[SerializationContext] = N
320
335
321
336
if latest_schema is not None :
322
337
parsed_schema = await self ._get_parsed_schema (latest_schema .schema )
323
- field_transformer = lambda rule_ctx , field_transform , msg : ( # noqa: E731
338
+ def field_transformer ( rule_ctx , field_transform , msg ): return ( # noqa: E731
324
339
transform (rule_ctx , parsed_schema , msg , field_transform ))
325
340
value = self ._execute_rules (ctx , subject , RuleMode .WRITE , None ,
326
341
latest_schema .schema , value , get_inline_tags (parsed_schema ),
@@ -334,7 +349,7 @@ async def __serialize(self, obj: object, ctx: Optional[SerializationContext] = N
334
349
# write the record to the rest of the buffer
335
350
schemaless_writer (fo , parsed_schema , value )
336
351
337
- return fo .getvalue ()
352
+ return self . _schema_id_serializer ( fo .getvalue (), ctx , self . _schema_id )
338
353
339
354
async def _get_parsed_schema (self , schema : Schema ) -> AvroSchema :
340
355
parsed_schema = self ._parsed_schemas .get_parsed_schema (schema )
@@ -378,7 +393,12 @@ class AsyncAvroDeserializer(AsyncBaseDeserializer):
378
393
| | | |
379
394
| | | Defaults to topic_subject_name_strategy. |
380
395
+-----------------------------+----------+--------------------------------------------------+
381
-
396
+ | | | Callable(bytes, SerializationContext, schema_id) |
397
+ | | | -> io.BytesIO |
398
+ | | | |
399
+ | ``schema.id.deserializer`` | callable | Defines how the schema id/guid is deserialized. |
400
+ | | | Defaults to dual_schema_id_deserializer. |
401
+ +-----------------------------+----------+--------------------------------------------------+
382
402
Note:
383
403
By default, Avro complex types are returned as dicts. This behavior can
384
404
be overridden by registering a callable ``from_dict`` with the deserializer to
@@ -415,7 +435,8 @@ class AsyncAvroDeserializer(AsyncBaseDeserializer):
415
435
416
436
_default_conf = {'use.latest.version' : False ,
417
437
'use.latest.with.metadata' : None ,
418
- 'subject.name.strategy' : topic_subject_name_strategy }
438
+ 'subject.name.strategy' : topic_subject_name_strategy ,
439
+ 'schema.id.deserializer' : dual_schema_id_deserializer }
419
440
420
441
async def __init__ (
421
442
self ,
@@ -460,6 +481,11 @@ async def __init__(
460
481
if not callable (self ._subject_name_func ):
461
482
raise ValueError ("subject.name.strategy must be callable" )
462
483
484
+ self ._schema_id_serializer = conf_copy .pop ('schema.id.serializer' )
485
+ if not callable (self ._schema_id_serializer ):
486
+ raise ValueError ("schema.id.serializer must be callable" )
487
+
488
+
463
489
if len (conf_copy ) > 0 :
464
490
raise ValueError ("Unrecognized properties: {}"
465
491
.format (", " .join (conf_copy .keys ())))
@@ -513,61 +539,61 @@ async def __deserialize(self, data: bytes, ctx: Optional[SerializationContext] =
513
539
"message was not produced with a Confluent "
514
540
"Schema Registry serializer" .format (len (data )))
515
541
516
- subject = self ._subject_name_func (ctx , None )
542
+ subject = self ._subject_name_func (ctx , None ) if ctx else None
517
543
latest_schema = None
518
544
if subject is not None :
519
545
latest_schema = await self ._get_reader_schema (subject )
520
546
521
- with _ContextStringIO ( data ) as payload :
522
- magic , schema_id = unpack ( '>bI' , payload . read ( 5 ) )
523
- if magic != _MAGIC_BYTE :
524
- raise SerializationError ( "Unexpected magic byte {}. This message "
525
- "was not produced with a Confluent "
526
- "Schema Registry serializer" . format ( magic ))
527
-
528
- writer_schema_raw = await self ._registry . get_schema ( schema_id )
529
- writer_schema = await self . _get_parsed_schema ( writer_schema_raw )
530
-
531
- if subject is None :
532
- subject = self . _subject_name_func ( ctx , writer_schema . get ( "name" ))
533
- if subject is not None :
534
- latest_schema = await self . _get_reader_schema ( subject )
535
-
536
- if latest_schema is not None :
537
- migrations = self . _get_migrations ( subject , writer_schema_raw , latest_schema , None )
538
- reader_schema_raw = latest_schema . schema
539
- reader_schema = await self ._get_parsed_schema ( latest_schema . schema )
540
- elif self . _schema is not None :
541
- migrations = None
542
- reader_schema_raw = self . _schema
543
- reader_schema = self . _reader_schema
544
- else :
545
- migrations = None
546
- reader_schema_raw = writer_schema_raw
547
- reader_schema = writer_schema
548
-
549
- if migrations :
550
- obj_dict = schemaless_reader ( payload ,
551
- writer_schema ,
552
- None ,
553
- self . _return_record_name )
554
- obj_dict = self . _execute_migrations ( ctx , subject , migrations , obj_dict )
555
- else :
556
- obj_dict = schemaless_reader ( payload ,
557
- writer_schema ,
558
- reader_schema ,
559
- self . _return_record_name )
547
+ schema_id = SchemaId ( AVRO_TYPE )
548
+ payload = self . _schema_id_deserializer ( data , ctx , schema_id )
549
+
550
+ writer_schema_raw = self . _get_writer_schema ( schema_id , subject )
551
+ writer_schema = self . _get_parsed_schema ( writer_schema_raw )
552
+
553
+ if subject is None :
554
+ subject = self ._subject_name_func ( ctx , writer_schema . get ( "name" )) if ctx else None
555
+ if subject is not None :
556
+ latest_schema = await self . _get_reader_schema ( subject )
557
+
558
+ if latest_schema is not None :
559
+ migrations = self . _get_migrations ( subject , writer_schema_raw , latest_schema , None )
560
+ reader_schema_raw = latest_schema . schema
561
+ reader_schema = await self . _get_parsed_schema ( latest_schema . schema )
562
+ elif self . _schema is not None :
563
+ migrations = None
564
+ reader_schema_raw = self . _schema
565
+ reader_schema = self ._reader_schema
566
+ else :
567
+ migrations = None
568
+ reader_schema_raw = writer_schema_raw
569
+ reader_schema = writer_schema
570
+
571
+ if migrations :
572
+ obj_dict = schemaless_reader ( payload ,
573
+ writer_schema ,
574
+ None ,
575
+ self . _return_record_name )
576
+ obj_dict = self . _execute_migrations ( ctx , subject , migrations , obj_dict )
577
+ else :
578
+ obj_dict = schemaless_reader ( payload ,
579
+ writer_schema ,
580
+ reader_schema ,
581
+ self . _return_record_name )
582
+
583
+
584
+
585
+
560
586
561
- field_transformer = lambda rule_ctx , field_transform , message : ( # noqa: E731
562
- transform (rule_ctx , reader_schema , message , field_transform ))
563
- obj_dict = self ._execute_rules (ctx , subject , RuleMode .READ , None ,
564
- reader_schema_raw , obj_dict , get_inline_tags (reader_schema ),
565
- field_transformer )
587
+ field_transformer = lambda rule_ctx , field_transform , message : ( # noqa: E731
588
+ transform (rule_ctx , reader_schema , message , field_transform ))
589
+ obj_dict = self ._execute_rules (ctx , subject , RuleMode .READ , None ,
590
+ reader_schema_raw , obj_dict , get_inline_tags (reader_schema ),
591
+ field_transformer )
566
592
567
- if self ._from_dict is not None :
568
- return self ._from_dict (obj_dict , ctx )
593
+ if self ._from_dict is not None :
594
+ return self ._from_dict (obj_dict , ctx )
569
595
570
- return obj_dict
596
+ return obj_dict
571
597
572
598
async def _get_parsed_schema (self , schema : Schema ) -> AvroSchema :
573
599
parsed_schema = self ._parsed_schemas .get_parsed_schema (schema )
0 commit comments