Skip to content

Robust auto-cleaning functionality #222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Feb 3, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2a031df
Quick draft of auto-cleaning functionality
Archmonger Jan 31, 2024
f8b99fc
remove auto_clean from config name
Archmonger Feb 1, 2024
685db22
Add type hinting to _cached_static_contents function
Archmonger Feb 1, 2024
eba80d3
ignore_config
Archmonger Feb 1, 2024
2e089de
docs cleanup
Archmonger Feb 1, 2024
4ea41f5
CLEAN_NEEDED_BY caching
Archmonger Feb 1, 2024
836d719
fix link
Archmonger Feb 1, 2024
dbf7e12
fix tests
Archmonger Feb 1, 2024
e1bd185
minor docs wordsmithing
Archmonger Feb 1, 2024
e2c088f
Merge remote-tracking branch 'upstream/main' into robust-cleanup-config
Archmonger Feb 1, 2024
43685b8
Add management command
Archmonger Feb 1, 2024
6c1ecf0
add changelog entries
Archmonger Feb 1, 2024
ef90460
Add checks for new config values
Archmonger Feb 1, 2024
5a57807
better interface for management command
Archmonger Feb 1, 2024
75670a8
ignore settings.py attributes for management command calls
Archmonger Feb 2, 2024
3594cee
add a check for common misspelling
Archmonger Feb 2, 2024
da855dd
Add comment for a bugfix
Archmonger Feb 2, 2024
107b64c
Auto-Clean Settings docs
Archmonger Feb 2, 2024
c6b8e13
fix a few stale docs lines
Archmonger Feb 2, 2024
43ee1ac
remove dead LOC
Archmonger Feb 2, 2024
2799ed0
Django Query Postprocessor docs cleanup
Archmonger Feb 2, 2024
e8fe1da
allow REACTPY_CLEAN_INTERVAL to be None
Archmonger Feb 2, 2024
08e1025
prepare for management command docs
Archmonger Feb 3, 2024
32ceea6
management command docs
Archmonger Feb 3, 2024
3385887
minor docs styling change
Archmonger Feb 3, 2024
e0c771d
avoid django timezone module
Archmonger Feb 3, 2024
05554dc
fix tests
Archmonger Feb 3, 2024
ad62a68
fix tests in a different way
Archmonger Feb 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions src/reactpy_django/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import logging
from datetime import datetime, timedelta
from typing import TYPE_CHECKING

from django.contrib.auth import get_user_model
from django.utils import timezone

from reactpy_django.utils import get_pk

_logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from reactpy_django.models import Config


def clean_all(immediate: bool = False, manual_clean=True):
from reactpy_django.config import (
REACTPY_AUTO_CLEAN_SESSIONS,
REACTPY_AUTO_CLEAN_USER_DATA,
)
from reactpy_django.models import Config

config = Config.load()
if immediate or is_clean_needed(config):
if manual_clean or REACTPY_AUTO_CLEAN_SESSIONS:
clean_sessions()
if manual_clean or REACTPY_AUTO_CLEAN_USER_DATA:
clean_user_data()
config.cleaned_at = timezone.now()
config.save()


def clean_sessions():
"""Deletes expired component sessions from the database.
As a performance optimization, this is only run once every REACTPY_SESSION_MAX_AGE seconds.
"""
from reactpy_django.config import REACTPY_DEBUG_MODE, REACTPY_SESSION_MAX_AGE
from reactpy_django.models import ComponentSession

start_time = timezone.now()
expiration_date = timezone.now() - timedelta(seconds=REACTPY_SESSION_MAX_AGE)
ComponentSession.objects.filter(last_accessed__lte=expiration_date).delete()

if REACTPY_DEBUG_MODE:
inspect_clean_duration(start_time, "component sessions")


def clean_user_data():
"""Delete any user data that is not associated with an existing `User`.
This is a safety measure to ensure that we don't have any orphaned data in the database.

Our `UserDataModel` is supposed to automatically get deleted on every `User` delete signal.
However, we can't use Django to enforce this relationship since ReactPy can be configured to
use any database.
"""
from reactpy_django.config import REACTPY_DEBUG_MODE
from reactpy_django.models import UserDataModel

start_time = timezone.now()
user_model = get_user_model()
all_users = user_model.objects.all()
all_user_pks = all_users.values_list(user_model._meta.pk.name, flat=True) # type: ignore
UserDataModel.objects.exclude(user_pk__in=all_user_pks).delete()

if REACTPY_DEBUG_MODE:
inspect_clean_duration(start_time, "user data")


def is_clean_needed(config: Config | None = None) -> bool:
from reactpy_django.config import REACTPY_SESSION_MAX_AGE
from reactpy_django.models import Config

config = config or Config.load()
cleaned_at = config.cleaned_at
clean_needed_by = cleaned_at + timedelta(seconds=REACTPY_SESSION_MAX_AGE)

return timezone.now() >= clean_needed_by


def inspect_clean_duration(start_time: datetime, task_name: str):
clean_duration = timezone.now() - start_time
if clean_duration.total_seconds() > 1:
_logger.warning(
"ReactPy has taken %s seconds to clean %s. "
"This may indicate a performance issue with your system, cache, or database.",
clean_duration.total_seconds(),
task_name,
)
15 changes: 15 additions & 0 deletions src/reactpy_django/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,18 @@
"REACTPY_AUTO_RELOGIN",
False,
)
REACTPY_AUTO_CLEAN_INTERVAL: int = getattr(
settings,
"REACTPY_AUTO_CLEAN_INTERVAL",
604800, # Default to 7 days
)
REACTPY_AUTO_CLEAN_SESSIONS: bool = getattr(
settings,
"REACTPY_AUTO_CLEAN_SESSIONS",
True,
)
REACTPY_AUTO_CLEAN_USER_DATA: bool = getattr(
settings,
"REACTPY_AUTO_CLEAN_USER_DATA",
True,
)
6 changes: 3 additions & 3 deletions src/reactpy_django/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
QueryOptions,
UserData,
)
from reactpy_django.utils import generate_obj_name, get_user_pk
from reactpy_django.utils import generate_obj_name, get_pk

if TYPE_CHECKING:
from django.contrib.auth.models import AbstractUser
Expand Down Expand Up @@ -344,7 +344,7 @@ async def _set_user_data(data: dict):
if user.is_anonymous:
raise ValueError("AnonymousUser cannot have user data.")

pk = get_user_pk(user)
pk = get_pk(user)
model, _ = await UserDataModel.objects.aget_or_create(user_pk=pk)
model.data = pickle.dumps(data)
await model.asave()
Expand Down Expand Up @@ -386,7 +386,7 @@ async def _get_user_data(
if not user or user.is_anonymous:
return None

pk = get_user_pk(user)
pk = get_pk(user)
model, _ = await UserDataModel.objects.aget_or_create(user_pk=pk)
data = pickle.loads(model.data) if model.data else {}

Expand Down
40 changes: 3 additions & 37 deletions src/reactpy_django/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,17 @@
import os
import re
from asyncio import iscoroutinefunction
from datetime import timedelta
from fnmatch import fnmatch
from importlib import import_module
from typing import Any, Callable, Sequence

import orjson as pickle
from asgiref.sync import async_to_sync
from channels.db import database_sync_to_async
from django.db.models import ManyToManyField, ManyToOneRel, prefetch_related_objects
from django.db.models.base import Model
from django.db.models.query import QuerySet
from django.http import HttpRequest, HttpResponse
from django.template import engines
from django.utils import timezone
from django.utils.encoding import smart_str
from django.views import View
from reactpy.core.layout import Layout
Expand Down Expand Up @@ -351,36 +348,6 @@ def create_cache_key(*args):
return f"reactpy_django:{':'.join(str(arg) for arg in args)}"


def delete_expired_sessions(immediate: bool = False):
"""Deletes expired component sessions from the database.
As a performance optimization, this is only run once every REACTPY_SESSION_MAX_AGE seconds.
"""
from .config import REACTPY_DEBUG_MODE, REACTPY_SESSION_MAX_AGE
from .models import ComponentSession, Config

config = Config.load()
start_time = timezone.now()
cleaned_at = config.cleaned_at
clean_needed_by = cleaned_at + timedelta(seconds=REACTPY_SESSION_MAX_AGE)

# Delete expired component parameters
if immediate or timezone.now() >= clean_needed_by:
expiration_date = timezone.now() - timedelta(seconds=REACTPY_SESSION_MAX_AGE)
ComponentSession.objects.filter(last_accessed__lte=expiration_date).delete()
config.cleaned_at = timezone.now()
config.save()

# Check if cleaning took abnormally long
if REACTPY_DEBUG_MODE:
clean_duration = timezone.now() - start_time
if clean_duration.total_seconds() > 1:
_logger.warning(
"ReactPy has taken %s seconds to clean up expired component sessions. "
"This may indicate a performance issue with your system, cache, or database.",
clean_duration.total_seconds(),
)


class SyncLayout(Layout):
"""Sync adapter for ReactPy's `Layout`. Allows it to be used in Django template tags.
This can be removed when Django supports async template tags.
Expand All @@ -397,7 +364,6 @@ def render(self):
return async_to_sync(super().render)()


def get_user_pk(user, serialize=False):
"""Returns the primary key value for a user model instance."""
pk = getattr(user, user._meta.pk.name)
return pickle.dumps(pk) if serialize else pk
def get_pk(model):
"""Returns the value of the primary key for a Django model."""
return getattr(model, model._meta.pk.name)
18 changes: 10 additions & 8 deletions src/reactpy_django/websocket/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
from reactpy.core.layout import Layout
from reactpy.core.serve import serve_layout

from reactpy_django.clean import clean_all
from reactpy_django.types import ComponentParams
from reactpy_django.utils import delete_expired_sessions

if TYPE_CHECKING:
from django.contrib.auth.models import AbstractUser
Expand Down Expand Up @@ -96,27 +96,29 @@ async def connect(self) -> None:

async def disconnect(self, code: int) -> None:
"""The browser has disconnected."""
from reactpy_django.config import REACTPY_AUTO_CLEAN_INTERVAL

self.dispatcher.cancel()

# Update the component's last_accessed timestamp
if self.component_session:
# Clean up expired component sessions
try:
await database_sync_to_async(delete_expired_sessions)()
await self.component_session.asave()
except Exception:
await asyncio.to_thread(
_logger.error,
"ReactPy has failed to delete expired component sessions!\n"
"ReactPy has failed to save component session!\n"
f"{traceback.format_exc()}",
)

# Update the last_accessed timestamp
# Queue a cleanup, if needed
if REACTPY_AUTO_CLEAN_INTERVAL > -1:
try:
await self.component_session.asave()
await database_sync_to_async(clean_all)()
except Exception:
await asyncio.to_thread(
_logger.error,
"ReactPy has failed to save component session!\n"
f"{traceback.format_exc()}",
"ReactPy cleaning failed!\n" f"{traceback.format_exc()}",
)

await super().disconnect(code)
Expand Down
41 changes: 35 additions & 6 deletions tests/test_app/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import dill as pickle
from django.test import TransactionTestCase
from reactpy_django import utils
from reactpy_django.models import ComponentSession
from reactpy_django import clean
from reactpy_django.models import ComponentSession, UserDataModel
from reactpy_django.types import ComponentParams


Expand All @@ -17,7 +17,7 @@ class RoutedDatabaseTests(TransactionTestCase):
@classmethod
def setUpClass(cls):
super().setUpClass()
utils.delete_expired_sessions(immediate=True)
clean.clean_sessions(immediate=True)

def test_component_params(self):
# Make sure the ComponentParams table is empty
Expand All @@ -27,7 +27,7 @@ def test_component_params(self):
# Check if a component params are in the database
self.assertEqual(ComponentSession.objects.count(), 1)
self.assertEqual(
pickle.loads(ComponentSession.objects.first().params), params_1 # type: ignore
pickle.loads(ComponentSession.objects.first().params), params_1
)

# Force `params_1` to expire
Expand All @@ -41,14 +41,43 @@ def test_component_params(self):
self.assertEqual(ComponentSession.objects.count(), 2)

# Delete the first component params based on expiration time
utils.delete_expired_sessions() # Don't use `immediate` to test timestamping logic
clean.clean_sessions() # Don't use `immediate` to test timestamping logic

# Make sure `params_1` has expired
self.assertEqual(ComponentSession.objects.count(), 1)
self.assertEqual(
pickle.loads(ComponentSession.objects.first().params), params_2 # type: ignore
pickle.loads(ComponentSession.objects.first().params), params_2
)

def test_user_data_cleanup(self):
from django.contrib.auth.models import User

# Create UserData for real user #1
user = User.objects.create_user(username=uuid4().hex, password=uuid4().hex)
user_data = UserDataModel(user_pk=user.pk)
user_data.save()

# Create UserData for real user #2
user = User.objects.create_user(username=uuid4().hex, password=uuid4().hex)
user_data = UserDataModel(user_pk=user.pk)
user_data.save()

# Keep track of the count of UserData objects
original_count = UserDataModel.objects.count()

# Create UserData for a fake user (orphaned)
user_data = UserDataModel(user_pk=uuid4().hex)
user_data.save()

# Make sure the orphaned user data object is deleted
self.assertNotEqual(UserDataModel.objects.count(), original_count)
clean.clean_user_data()
self.assertEqual(UserDataModel.objects.count(), original_count)

# Check if deleting a user deletes the associated UserData
user.delete()
self.assertEqual(UserDataModel.objects.count(), original_count - 1)

def _save_params_to_db(self, value: Any) -> ComponentParams:
db = list(self.databases)[0]
param_data = ComponentParams((value,), {"test_value": value})
Expand Down