Skip to content

Commit a0b700a

Browse files
Merge pull request #48 from networktocode/gfm-progress-bar
Add progress callback and example
2 parents 269df51 + 97aff5f commit a0b700a

File tree

12 files changed

+371
-42
lines changed

12 files changed

+371
-42
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ A.sync_from(B)
2222
A.sync_to(B)
2323
```
2424

25+
You may wish to peruse the [`diffsync` GitHub topic](https://github.com/topics/diffsync) for examples of projects using this library.
26+
2527
# Getting started
2628

27-
To be able to properly compare different datasets, DiffSync relies on a shared datamodel that both systems must use.
29+
To be able to properly compare different datasets, DiffSync relies on a shared data model that both systems must use.
2830
Specifically, each system or dataset must provide a `DiffSync` "adapter" subclass, which in turn represents its dataset as instances of one or more `DiffSyncModel` data model classes.
2931

3032
When comparing two systems, DiffSync detects the intersection between the two systems (which data models they have in common, and which attributes are shared between each pair of data models) and uses this intersection to compare and/or synchronize the data.
@@ -39,9 +41,9 @@ Each `DiffSyncModel` subclass supports the following class-level attributes:
3941
- `_attributes` - List of non-identifier instance field names for this object; used to identify the fields in common between data models for different systems (Optional)
4042
- `_children` - Dict of `{<model_name>: <field_name>}` indicating which fields store references to child data model instances. (Optional)
4143

42-
> DiffSyncModel instances must be uniquely identified by their unique id, composed of all fields defined in `_identifiers`. The unique id must be globally meaningful (such as an unique instance name or slug), as it is used to identify object correspondence between differing systems or data sets. It **must not** be a value that is only locally meaningful, such as a database primary key integer value.
44+
> DiffSyncModel instances must be uniquely identified by their unique ID (or, in database terminology, [natural key](https://en.wikipedia.org/wiki/Natural_key)), which is composed of the union of all fields defined in `_identifiers`. The unique ID must be globally meaningful (such as an unique instance name or slug), as it is used to identify object correspondence between differing systems or data sets. It **must not** be a value that is only locally meaningful to a specific data set, such as a database primary key value.
4345
44-
> Only fields listed in `_identifiers`, `_attributes`, or `_children` will be potentially included in comparison and synchronization between systems or data sets. Any other fields will be ignored; this allows for a model to additionally contain fields that are only locally relevant (such as database primary key values) and therefore are irrelevant to comparisons.
46+
> Only fields listed in `_identifiers`, `_attributes`, or `_children` will be potentially included in comparison and synchronization between systems or data sets. Any other fields will be ignored; this allows for a model to additionally contain fields that are only locally relevant (such as database primary key values) and therefore are irrelevant to comparison and synchronization.
4547
4648
```python
4749
from typing import List, Optional

diffsync/__init__.py

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""DiffSync front-end classes and logic.
22
3-
Copyright (c) 2020 Network To Code, LLC <info@networktocode.com>
3+
Copyright (c) 2020-2021 Network To Code, LLC <info@networktocode.com>
44
55
Licensed under the Apache License, Version 2.0 (the "License");
66
you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
1616
"""
1717
from collections import defaultdict
1818
from inspect import isclass
19-
from typing import ClassVar, Dict, List, Mapping, MutableMapping, Optional, Text, Tuple, Type, Union
19+
from typing import Callable, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Text, Tuple, Type, Union
2020

2121
from pydantic import BaseModel, PrivateAttr
2222
import structlog # type: ignore
@@ -359,7 +359,7 @@ def remove_child(self, child: "DiffSyncModel"):
359359
class DiffSync:
360360
"""Class for storing a group of DiffSyncModel instances and diffing/synchronizing to another DiffSync instance."""
361361

362-
# Add mapping of names to specific model classes here:
362+
# In any subclass, you would add mapping of names to specific model classes here:
363363
# modelname1 = MyModelClass1
364364
# modelname2 = MyModelClass2
365365

@@ -418,6 +418,10 @@ def __str__(self):
418418
def __repr__(self):
419419
return f"<{str(self)}>"
420420

421+
def __len__(self):
422+
"""Total number of elements stored in self._data."""
423+
return sum(len(entries) for entries in self._data.values())
424+
421425
def load(self):
422426
"""Load all desired data from whatever backend data source into this instance."""
423427
# No-op in this generic class
@@ -451,29 +455,45 @@ def str(self, indent: int = 0) -> str:
451455
# Synchronization between DiffSync instances
452456
# ------------------------------------------------------------------------------
453457

454-
def sync_from(self, source: "DiffSync", diff_class: Type[Diff] = Diff, flags: DiffSyncFlags = DiffSyncFlags.NONE):
458+
def sync_from(
459+
self,
460+
source: "DiffSync",
461+
diff_class: Type[Diff] = Diff,
462+
flags: DiffSyncFlags = DiffSyncFlags.NONE,
463+
callback: Optional[Callable[[Text, int, int], None]] = None,
464+
):
455465
"""Synchronize data from the given source DiffSync object into the current DiffSync object.
456466
457467
Args:
458468
source (DiffSync): object to sync data from into this one
459469
diff_class (class): Diff or subclass thereof to use to calculate the diffs to use for synchronization
460470
flags (DiffSyncFlags): Flags influencing the behavior of this sync.
471+
callback (function): Function with parameters (stage, current, total), to be called at intervals as the
472+
calculation of the diff and subsequent sync proceed.
461473
"""
462-
diff = self.diff_from(source, diff_class=diff_class, flags=flags)
463-
syncer = DiffSyncSyncer(diff=diff, src_diffsync=source, dst_diffsync=self, flags=flags)
474+
diff = self.diff_from(source, diff_class=diff_class, flags=flags, callback=callback)
475+
syncer = DiffSyncSyncer(diff=diff, src_diffsync=source, dst_diffsync=self, flags=flags, callback=callback)
464476
result = syncer.perform_sync()
465477
if result:
466478
self.sync_complete(source, diff, flags, syncer.base_logger)
467479

468-
def sync_to(self, target: "DiffSync", diff_class: Type[Diff] = Diff, flags: DiffSyncFlags = DiffSyncFlags.NONE):
480+
def sync_to(
481+
self,
482+
target: "DiffSync",
483+
diff_class: Type[Diff] = Diff,
484+
flags: DiffSyncFlags = DiffSyncFlags.NONE,
485+
callback: Optional[Callable[[Text, int, int], None]] = None,
486+
):
469487
"""Synchronize data from the current DiffSync object into the given target DiffSync object.
470488
471489
Args:
472490
target (DiffSync): object to sync data into from this one.
473491
diff_class (class): Diff or subclass thereof to use to calculate the diffs to use for synchronization
474492
flags (DiffSyncFlags): Flags influencing the behavior of this sync.
493+
callback (function): Function with parameters (stage, current, total), to be called at intervals as the
494+
calculation of the diff and subsequent sync proceed.
475495
"""
476-
target.sync_from(self, diff_class=diff_class, flags=flags)
496+
target.sync_from(self, diff_class=diff_class, flags=flags, callback=callback)
477497

478498
def sync_complete(
479499
self,
@@ -502,29 +522,43 @@ def sync_complete(
502522
# ------------------------------------------------------------------------------
503523

504524
def diff_from(
505-
self, source: "DiffSync", diff_class: Type[Diff] = Diff, flags: DiffSyncFlags = DiffSyncFlags.NONE
525+
self,
526+
source: "DiffSync",
527+
diff_class: Type[Diff] = Diff,
528+
flags: DiffSyncFlags = DiffSyncFlags.NONE,
529+
callback: Optional[Callable[[Text, int, int], None]] = None,
506530
) -> Diff:
507531
"""Generate a Diff describing the difference from the other DiffSync to this one.
508532
509533
Args:
510534
source (DiffSync): Object to diff against.
511535
diff_class (class): Diff or subclass thereof to use for diff calculation and storage.
512536
flags (DiffSyncFlags): Flags influencing the behavior of this diff operation.
537+
callback (function): Function with parameters (stage, current, total), to be called at intervals as the
538+
calculation of the diff proceeds.
513539
"""
514-
differ = DiffSyncDiffer(src_diffsync=source, dst_diffsync=self, flags=flags, diff_class=diff_class)
540+
differ = DiffSyncDiffer(
541+
src_diffsync=source, dst_diffsync=self, flags=flags, diff_class=diff_class, callback=callback
542+
)
515543
return differ.calculate_diffs()
516544

517545
def diff_to(
518-
self, target: "DiffSync", diff_class: Type[Diff] = Diff, flags: DiffSyncFlags = DiffSyncFlags.NONE
546+
self,
547+
target: "DiffSync",
548+
diff_class: Type[Diff] = Diff,
549+
flags: DiffSyncFlags = DiffSyncFlags.NONE,
550+
callback: Optional[Callable[[Text, int, int], None]] = None,
519551
) -> Diff:
520552
"""Generate a Diff describing the difference from this DiffSync to another one.
521553
522554
Args:
523555
target (DiffSync): Object to diff against.
524556
diff_class (class): Diff or subclass thereof to use for diff calculation and storage.
525557
flags (DiffSyncFlags): Flags influencing the behavior of this diff operation.
558+
callback (function): Function with parameters (stage, current, total), to be called at intervals as the
559+
calculation of the diff proceeds.
526560
"""
527-
return target.diff_from(self, diff_class=diff_class, flags=flags)
561+
return target.diff_from(self, diff_class=diff_class, flags=flags, callback=callback)
528562

529563
# ------------------------------------------------------------------------------
530564
# Object Storage Management
@@ -567,21 +601,21 @@ def get(
567601
raise ObjectNotFound(f"{modelname} {uid} not present in {self.name}")
568602
return self._data[modelname][uid]
569603

570-
def get_all(self, obj: Union[Text, DiffSyncModel, Type[DiffSyncModel]]):
604+
def get_all(self, obj: Union[Text, DiffSyncModel, Type[DiffSyncModel]]) -> List[DiffSyncModel]:
571605
"""Get all objects of a given type.
572606
573607
Args:
574608
obj: DiffSyncModel class or instance, or modelname string, that defines the type of the objects to retrieve
575609
576610
Returns:
577-
ValuesList[DiffSyncModel]: List of Object
611+
List[DiffSyncModel]: List of Object
578612
"""
579613
if isinstance(obj, str):
580614
modelname = obj
581615
else:
582616
modelname = obj.get_type()
583617

584-
return self._data[modelname].values()
618+
return list(self._data[modelname].values())
585619

586620
def get_by_uids(
587621
self, uids: List[Text], obj: Union[Text, DiffSyncModel, Type[DiffSyncModel]]

diffsync/diff.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Diff and DiffElement classes for DiffSync.
22
3-
Copyright (c) 2020 Network To Code, LLC <info@networktocode.com>
3+
Copyright (c) 2020-2021 Network To Code, LLC <info@networktocode.com>
44
55
Licensed under the Apache License, Version 2.0 (the "License");
66
you may not use this file except in compliance with the License.
@@ -33,6 +33,13 @@ def __init__(self):
3333
`self.children[group][unique_id] == DiffElement(...)`
3434
"""
3535

36+
def __len__(self):
37+
"""Total number of DiffElements stored herein."""
38+
total = 0
39+
for child in self.get_children():
40+
total += len(child)
41+
return total
42+
3643
def complete(self):
3744
"""Method to call when this Diff has been fully populated with data and is "complete".
3845
@@ -205,6 +212,13 @@ def __str__(self):
205212
f"{self.source_name}{self.dest_name} : {self.get_attrs_diffs()}"
206213
)
207214

215+
def __len__(self):
216+
"""Total number of DiffElements in this one, including itself."""
217+
total = 1 # self
218+
for child in self.get_children():
219+
total += len(child)
220+
return total
221+
208222
@property
209223
def action(self) -> Optional[Text]:
210224
"""Action, if any, that should be taken to remediate the diffs described by this element.

0 commit comments

Comments
 (0)