Skip to content

draft: add delete batches #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 35 additions & 12 deletions dbcleanup/management/commands/dbcleanup.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from django.core.management import CommandError, BaseCommand
from django.conf import settings
from django.db import connection, transaction
from django.db.models import ManyToManyField
from django.db.models import ManyToManyField, Max, Min, Q
from django.utils import timezone
from django.contrib.contenttypes.models import ContentType
from django.db.migrations.loader import MigrationLoader

from dbcleanup import utils, models

REQUIRED_TABLES = {'django_migrations'}
BATCH_SIZE = 5000


class Command(BaseCommand):
Expand Down Expand Up @@ -139,21 +140,43 @@ def _clean_history(self, options):
ct = ContentType.objects.get_by_natural_key(*model_tuple)
# normalize model name to match against .delete() return labels (and for capitalized printing!)
model = ct.model_class()._meta.label
q = ct.get_all_objects_for_this_type(**{f'{field}__lt': timezone.now() - timezone.timedelta(days=log_size)})
q = ct.get_all_objects_for_this_type()
filtered = q.filter(**{f"{field}__lt": timezone.now() - timezone.timedelta(days=log_size)}).aggregate(
Min("id"), Max("id")
)
min_id = filtered["id__min"]
max_id = filtered["id__max"]
rows_deleted = {}

try:
deleted, rows_deleted = self._clean_history_intention(model, q, options)
except CascadeException as e:
_exit = 1
self.stderr.write(f'{model} cleanup aborted as it would cascade to:\n')
self._clean_history_print(e.args[2].items(), err=True)
continue
while True:
batch = q.filter(
Q(id__lte=min_id + BATCH_SIZE),
Q(id__gte=min_id),
Q(**{f"{field}__lt": timezone.now() - timezone.timedelta(days=log_size)}),
)
if batch:
try:
deleted, batch_rows_deleted = self._clean_history_intention(model, batch, options)
for k, v in batch_rows_deleted.items():
if rows_deleted.get(k):
rows_deleted[k] = rows_deleted[k] + v
else:
rows_deleted.update(batch_rows_deleted)
break

except CascadeException as e:
_exit = 1
self.stderr.write(f"{model} cleanup aborted as it would cascade to:\n")
self._clean_history_print(e.args[2].items(), err=True)
continue
min_id += BATCH_SIZE
if min_id > max_id:
break
if deleted:
if options['force'] or options['interactive']:
self.stdout.write(f'{model} cleanup deleted:\n')
if options["force"] or options["interactive"]:
self.stdout.write(f"{model} cleanup deleted:\n")
else:
self.stdout.write(f'{model} cleanup would delete:\n')
self.stdout.write(f"{model} cleanup would delete:\n")
self._clean_history_print(rows_deleted.items())
return _exit

Expand Down