diff --git a/devops/lms.yml b/devops/lms.yml index 78749285..b7b9d65d 100644 --- a/devops/lms.yml +++ b/devops/lms.yml @@ -97,6 +97,7 @@ services: volumes: - ../lms:/app_dir/lms - ../../notebooks-tests:/app_dir/notebooks-tests + - repositories-data-volume:/repositories environment: - DB_NAME=${DB_NAME:-db} - DB_USER=${DB_USERNAME:-postgres} @@ -109,6 +110,7 @@ services: - CELERY_CHECKS_PUBLIC_VHOST=lmstests-public - CELERY_RABBITMQ_HOST=rabbitmq - CELERY_RABBITMQ_PORT=5672 + - REPOSITORY_FOLDER=/repositories links: - db - rabbitmq @@ -126,6 +128,7 @@ volumes: docker-engine-lib: db-data-volume: rabbit-data-volume: + repositories-data-volume: networks: diff --git a/lms/lmsweb/__init__.py b/lms/lmsweb/__init__.py index ec48f2ae..fe328733 100644 --- a/lms/lmsweb/__init__.py +++ b/lms/lmsweb/__init__.py @@ -1,8 +1,10 @@ import pathlib import shutil +import typing from flask import Flask from flask_babel import Babel # type: ignore +from flask_httpauth import HTTPBasicAuth from flask_limiter import Limiter # type: ignore from flask_limiter.util import get_remote_address # type: ignore from flask_mail import Mail # type: ignore @@ -28,6 +30,8 @@ static_folder=str(static_dir), ) +http_basic_auth = HTTPBasicAuth() + limiter = Limiter(webapp, key_func=get_remote_address) @@ -52,3 +56,18 @@ # gunicorn search for application application = webapp + + +@http_basic_auth.get_password +def get_password(username: str) -> typing.Optional[str]: + user = models.User.get_or_none(models.User.username == username) + return user.password if user else None + + +@http_basic_auth.verify_password +def verify_password(username: str, client_password: str): + username_username = models.User.username == username + login_user = models.User.get_or_none(username_username) + if login_user is None or not login_user.is_password_valid(client_password): + return False + return login_user diff --git a/lms/lmsweb/config.py.example b/lms/lmsweb/config.py.example index c3bb4b0c..942dafe7 100644 --- a/lms/lmsweb/config.py.example +++ b/lms/lmsweb/config.py.example @@ -67,3 +67,5 @@ LIMITS_PER_HOUR = 50 # Change password settings MAX_INVALID_PASSWORD_TRIES = 5 + +REPOSITORY_FOLDER = os.getenv("REPOSITORY_FOLDER", os.path.abspath(os.path.join(os.curdir, "repositories"))) diff --git a/lms/lmsweb/git_service.py b/lms/lmsweb/git_service.py new file mode 100644 index 00000000..a20d4254 --- /dev/null +++ b/lms/lmsweb/git_service.py @@ -0,0 +1,229 @@ +import os +import shutil +import subprocess # noqa: S404 +import tempfile +import typing +import pathlib + +import flask + +from lms.lmsdb import models +from lms.models import upload +from lms.utils import hashing +from lms.utils.log import log + + +class _GitOperation(typing.NamedTuple): + response_content_type: str + service_command: typing.List[str] + supported: bool + format_response: typing.Optional[typing.Callable] + contain_new_commits: bool + + +class GitService: + _GIT_PROCESS_TIMEOUT = 20 + _GIT_VALID_EXIT_CODE = 0 + _STATELESS_RPC = '--stateless-rpc' + _ADVERTISE_REFS = '--advertise-refs' + _UPLOAD_COMMAND = 'git-upload-pack' + _RECEIVE_COMMAND = 'git-receive-pack' + _REFS_COMMAND = '/info/refs' + + def __init__( + self, + user: models.User, + exercise_number: int, + course_id: int, + request: flask.Request, + base_repository_folder: str, + ): + self._base_repository_folder = base_repository_folder + self._user = user + self._exercise_number = exercise_number + self._course_id = course_id + self._request = request + + @property + def project_name(self) -> str: + return f'{self._course_id}-{self._exercise_number}-{self._user.id}' + + @property + def repository_folder(self) -> pathlib.Path: + return pathlib.Path(self._base_repository_folder) / self.project_name + + def handle_operation(self) -> flask.Response: + git_operation = self._extract_git_operation() + repository_folder = self.repository_folder / 'config' + + new_repository = not repository_folder.exists() + if new_repository: + self._initialize_bare_repository() + + if not git_operation.supported: + raise OSError + + data_out = self._execute_git_operation(git_operation) + + if git_operation.format_response: + data_out = git_operation.format_response(data_out) + + if git_operation.contain_new_commits: + files = self._load_files_from_repository() + solution_hash = hashing.by_content(str(files)) + upload.upload_solution( + course_id=self._course_id, + exercise_number=self._exercise_number, + files=files, + solution_hash=solution_hash, + user_id=self._user.id, + ) + + return self.build_response(data_out, git_operation) + + def _execute_command( + self, + args: typing.List[str], + cwd: typing.Union[str, pathlib.Path], + proc_input: typing.Optional[bytes] = None, + ): + proc = subprocess.Popen( # noqa: S603 + args=args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=cwd, + ) + data_out, _ = proc.communicate(proc_input, self._GIT_PROCESS_TIMEOUT) + operation_failed = proc.wait() != self._GIT_VALID_EXIT_CODE + if operation_failed: + log.error( + 'Failed to execute command %s. stdout=%s\nstderr=%s', + args, proc.stdout.read(), proc.stderr.read(), + ) + raise OSError + return data_out + + def _execute_git_operation(self, git_operation: _GitOperation) -> bytes: + return self._execute_command( + args=git_operation.service_command, + cwd=self._base_repository_folder, + proc_input=self._request.data, + ) + + def _initialize_bare_repository(self) -> None: + os.makedirs(self.repository_folder, exist_ok=True) + self._execute_command( + args=['git', 'init', '--bare'], + cwd=self.repository_folder, + ) + + @staticmethod + def build_response( + data_out: bytes, + git_operation: _GitOperation, + ) -> flask.Response: + res = flask.make_response(data_out) + res.headers['Pragma'] = 'no-cache' + res.headers['Cache-Control'] = 'no-cache, max-age=0, must-revalidate' + res.headers['Content-Type'] = git_operation.response_content_type + return res + + def _extract_git_operation(self) -> _GitOperation: + if self._request.path.endswith(self._UPLOAD_COMMAND): + return self._build_upload_operation() + + elif self._request.path.endswith(self._RECEIVE_COMMAND): + return self._build_receive_operation() + + elif self._request.path.endswith(self._REFS_COMMAND): + return self._build_refs_operation() + + else: + log.error( + 'Failed to find the git command for route %s', + self._request.path, + ) + raise NotImplementedError + + def _build_refs_operation(self) -> _GitOperation: + allowed_commands = [self._UPLOAD_COMMAND, self._RECEIVE_COMMAND] + service_name = self._request.args.get('service') + content_type = f'application/x-{service_name}-advertisement' + supported = service_name in allowed_commands + + def format_response_callback(response_bytes: bytes) -> bytes: + packet = f'# service={service_name}\n' + length = len(packet) + 4 + prefix = '{:04x}'.format(length & 0xFFFF) + + data = (prefix + packet + '0000').encode() + data += response_bytes + return data + + return _GitOperation( + response_content_type=content_type, + service_command=[ + service_name, + self._STATELESS_RPC, + self._ADVERTISE_REFS, + self.project_name, + ], + supported=supported, + format_response=format_response_callback, + contain_new_commits=False, + ) + + def _build_receive_operation(self) -> _GitOperation: + return _GitOperation( + response_content_type='application/x-git-receive-pack-result', + service_command=[ + self._RECEIVE_COMMAND, + self._STATELESS_RPC, + self.project_name, + ], + supported=True, + format_response=None, + contain_new_commits=True, + ) + + def _build_upload_operation(self) -> _GitOperation: + return _GitOperation( + response_content_type='application/x-git-upload-pack-result', + service_command=[ + self._UPLOAD_COMMAND, + self._STATELESS_RPC, + self.project_name, + ], + supported=True, + format_response=None, + contain_new_commits=False, + ) + + def _load_files_from_repository(self) -> typing.List[upload.File]: + """ + Since the remote server is a git bare repository + we need to 'clone' the bare repository to resolve the files. + We are not changing the remote at any end - that is why we + don't care about git files here. + """ + with tempfile.TemporaryDirectory() as tempdir: + self._execute_command( + args=['git', 'clone', self.repository_folder, '.'], + cwd=tempdir, + ) + to_return = [] + # remove git internal files + shutil.rmtree(pathlib.Path(tempdir) / '.git') + for root, _, files in os.walk(tempdir): + for file in files: + upload_file = self._load_file(file, root, tempdir) + to_return.append(upload_file) + return to_return + + @staticmethod + def _load_file(file_name: str, root: str, tempdir: str) -> upload.File: + file_path = str(pathlib.Path(root).relative_to(tempdir) / file_name) + with open(pathlib.Path(root) / file_name) as f: + upload_file = upload.File(path=file_path, code=f.read()) + return upload_file diff --git a/lms/lmsweb/routes.py b/lms/lmsweb/routes.py index 7487488f..21b17e62 100644 --- a/lms/lmsweb/routes.py +++ b/lms/lmsweb/routes.py @@ -2,3 +2,4 @@ STATUS = '/status' DOWNLOADS = '/download' SHARED = '/shared' +GIT = '/git//.git' diff --git a/lms/lmsweb/views.py b/lms/lmsweb/views.py index cc5c9689..74ad70c1 100644 --- a/lms/lmsweb/views.py +++ b/lms/lmsweb/views.py @@ -2,7 +2,7 @@ import arrow # type: ignore from flask import ( - jsonify, make_response, render_template, request, + Response, jsonify, make_response, render_template, request, send_from_directory, session, url_for, ) from flask_babel import gettext as _ # type: ignore @@ -18,17 +18,18 @@ ALL_MODELS, Comment, Course, Note, Role, RoleOptions, SharedSolution, Solution, SolutionFile, User, UserCourse, database, ) -from lms.lmsweb import babel, limiter, routes, webapp +from lms.lmsweb import babel, http_basic_auth, limiter, routes, webapp from lms.lmsweb.admin import ( AdminModelView, SPECIAL_MAPPING, admin, managers_only, ) from lms.lmsweb.config import ( CONFIRMATION_TIME, LANGUAGES, LIMITS_PER_HOUR, - LIMITS_PER_MINUTE, LOCALE, MAX_UPLOAD_SIZE, + LIMITS_PER_MINUTE, LOCALE, MAX_UPLOAD_SIZE, REPOSITORY_FOLDER, ) from lms.lmsweb.forms.change_password import ChangePasswordForm from lms.lmsweb.forms.register import RegisterForm from lms.lmsweb.forms.reset_password import RecoverPassForm, ResetPassForm +from lms.lmsweb.git_service import GitService from lms.lmsweb.manifest import MANIFEST from lms.lmsweb.redirections import ( PERMISSIVE_CORS, get_next_url, login_manager, @@ -570,6 +571,21 @@ def download(download_id: str): return response +@webapp.route(f'{routes.GIT}/info/refs') +@webapp.route(f'{routes.GIT}/git-receive-pack', methods=['POST']) +@webapp.route(f'{routes.GIT}/git-upload-pack', methods=['POST']) +@http_basic_auth.login_required +def git_handler(course_id: int, exercise_number: int) -> Response: + git_service = GitService( + user=http_basic_auth.current_user(), + exercise_number=exercise_number, + course_id=course_id, + request=request, + base_repository_folder=REPOSITORY_FOLDER, + ) + return git_service.handle_operation() + + @webapp.route(f'{routes.SOLUTIONS}/') @webapp.route(f'{routes.SOLUTIONS}//') @login_required diff --git a/lms/models/upload.py b/lms/models/upload.py index b231c157..12054671 100644 --- a/lms/models/upload.py +++ b/lms/models/upload.py @@ -72,10 +72,13 @@ def new( errors: List[Union[UploadError, AlreadyExists]] = [] for exercise_number, files, solution_hash in Extractor(file): try: - solution = _upload_to_db( - exercise_number, course_id, user_id, files, solution_hash, + upload_solution( + course_id=course_id, + exercise_number=exercise_number, + files=files, + solution_hash=solution_hash, + user_id=user_id, ) - _run_auto_checks(solution) except (UploadError, AlreadyExists) as e: log.debug(e) errors.append(e) @@ -87,3 +90,20 @@ def new( raise UploadError(errors) return matches, misses + + +def upload_solution( + course_id: int, + exercise_number: int, + files: List[File], + solution_hash: str, + user_id: int, +): + solution = _upload_to_db( + exercise_number=exercise_number, + course_id=course_id, + user_id=user_id, + files=files, + solution_hash=solution_hash, + ) + _run_auto_checks(solution) diff --git a/requirements.txt b/requirements.txt index b8951f64..404ff576 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,6 +31,7 @@ flake8-todo==0.7 Flask==2.0.1 Flask-Admin==1.5.8 Flask-Babel==2.0.0 +Flask-HTTPAuth==4.4.0 Flask-Limiter==1.4 Flask-Login==0.5.0 Flask-Mail==0.9.1 diff --git a/tests/conftest.py b/tests/conftest.py index 2b73bb3f..067ec0ec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,6 +25,9 @@ from lms.models import notifications +FAKE_PASSWORD = 'fake pass' + + @pytest.fixture(autouse=True, scope='session') def db_in_memory(): """Binds all models to in-memory SQLite and creates all tables`""" @@ -126,7 +129,7 @@ def get_logged_user(username: str) -> FlaskClient: client = webapp.test_client() client.post('/login', data={ # noqa: S106 'username': username, - 'password': 'fake pass', + 'password': FAKE_PASSWORD, }, follow_redirects=True) return client @@ -186,12 +189,11 @@ def create_user( role_name: str = RoleOptions.STUDENT.value, index: int = 1, ) -> User: username = f'{role_name}-{index}' - password = 'fake pass' return User.create( # NOQA: S106 username=username, fullname=f'A{role_name}', mail_address=f'so-{role_name}-{index}@mail.com', - password=password, + password=FAKE_PASSWORD, api_key='fake key', role=Role.by_name(role_name), ) @@ -215,7 +217,7 @@ def create_staff_user(index: int = 0) -> User: @pytest.fixture() def staff_password(): - return 'fake pass' + return FAKE_PASSWORD @pytest.fixture @@ -242,12 +244,11 @@ def student_user(): def admin_user(): admin_role = Role.get(Role.name == RoleOptions.ADMINISTRATOR.value) username = 'Yam' - password = 'fake pass' return User.create( # NOQA: B106, S106 username=username, fullname='Buya', mail_address='mymail@mail.com', - password=password, + password=FAKE_PASSWORD, api_key='fake key', role=admin_role, ) diff --git a/tests/test_git_solution.py b/tests/test_git_solution.py new file mode 100644 index 00000000..3ff439f0 --- /dev/null +++ b/tests/test_git_solution.py @@ -0,0 +1,140 @@ +import base64 +import os.path +import shutil +import tempfile +from unittest import mock + +from flask.testing import FlaskClient + +from lms.lmsdb import models +from lms.lmsweb import webapp +from tests import conftest + + +POST_NEW_REPOSITORY_BUFFER = \ + b'00ab0000000000000000000000000000000000000000 ' \ + b'c1d42352fc88ae88fde7713c23232d7d0703849a refs/heads/master\x00 ' \ + b'report-status-v2 side-band-64k object-format=sha1 ' \ + b'agent=git/2.30.10000PACK\x00\x00\x00\x02\x00\x00\x00\x03\x9d\nx' \ + b'\x9c\x95\xccA\n\xc3 \x10@\xd1\xbd\xa7p_(3\x8e\x9a\x04J\xe8\xae' \ + b'\x07\xe8\t\xa6\x99\xd1\n\x9a\x80\xd8\xfb7\xd0\x13t\xfb\xe1\xfd' \ + b'\xd1U\xed$@\xc2\x92\x92\xdf\xd2\x1c\xf1\x15@\x84=\x12\xba\xa4' \ + b'\xea\xe6e\x89\x88\x12\x12\x1a\xfe\x8c\xf7\xd1\xed\x83\xab}\x96=k' \ + b'\xb7\xb7\xcc\xd5\x93\xbb\xe7\xc6\xa5^\xb7\xa3\xad\x16#\x91\x9b' \ + b'\xc0\x07\xb2\x17 \x00s\xd6V\xc6\xd0\xbf\xa1){)\xe34\xbf\x83\xf9' \ + b'\x02\xa5\x1f3_\xa0\x02x\x9c340031Q(\xc8,Id(M^\xc86;\xe0\xd1\x1d' \ + b'\xefZ\x8bP\x17\x8eU\xd2\x17\xcb\xb6\xc6\x01\x00\xab:\x0b\xe64x' \ + b'\x9c+O\xcc\xe6\x02\x00\x03\xe3\x01NvHX\x85>M\xf7I\xd6\x7fGZ' \ + b'\x0e^\xc8\x82Q\xe3\xcb\xd9' + + +POST_CLONE_REPOSITORY_BUFFER = \ + b'0098want c1d42352fc88ae88fde7713c23232d7d0703849a multi_ack_detailed' \ + b' no-done side-band-64k thin-pack ofs-delta deepen-since deepen-not' \ + b' agent=git/2.30.1\n00000009done\n' + + +class TestSendSolutionFromGit: + INFO_URL = 'info/refs' + GET_METHOD = FlaskClient.get.__name__ + POST_METHOD = FlaskClient.post.__name__ + + temp_folder = '' + + def setup_method(self, _method: str) -> None: + self.temp_folder = tempfile.mkdtemp() + + def teardown_method(self, _method: str) -> None: + if self.temp_folder and os.path.exists(self.temp_folder): + shutil.rmtree(self.temp_folder) + + @staticmethod + def _get_formatted_git_url(exercise: models.Exercise, rel_path: str) -> str: + return f'/git/{exercise.course.id}/{exercise.number}.git/{rel_path}' + + def _send_git_request( + self, + username: str, + method_name: str, + url: str, + data=None, + service=None, + password=conftest.FAKE_PASSWORD, + ): + client = webapp.test_client() + encoded_credentials = base64.b64encode(f'{username}:{password}'.encode()).decode() + headers = ( + ('Authorization', f'Basic {encoded_credentials}'), + ) + query_string = {'service': service} if service is not None else None + + # patch the REPOSITORY_FOLDER to make new repository every test + with mock.patch('lms.lmsweb.views.REPOSITORY_FOLDER', self.temp_folder): + return getattr(client, method_name)(url, query_string=query_string, headers=headers, data=data) + + def test_not_authorized_access(self, exercise: models.Exercise, student_user: models.User): + client = conftest.get_logged_user(student_user.username) + response = client.get(self._get_formatted_git_url(exercise, self.INFO_URL)) + assert response.status_code == 401 + + def test_not_existing_user(self, exercise: models.Exercise): + response = self._send_git_request( + username='not-exists', + method_name=self.GET_METHOD, + url=self._get_formatted_git_url(exercise, self.INFO_URL), + ) + assert response.status_code == 401 + + def test_invalid_user_password(self, exercise: models.Exercise, student_user: models.User): + response = self._send_git_request( + username=student_user.username, + method_name=self.GET_METHOD, + url=self._get_formatted_git_url(exercise, self.INFO_URL), + password='not real password' + ) + assert response.status_code == 401 + + def test_push_exercise(self, exercise: models.Exercise, student_user: models.User): + git_receive_pack = 'git-receive-pack' + conftest.create_usercourse(student_user, exercise.course) + + response = self._send_git_request( + username=student_user.username, + method_name=self.GET_METHOD, + url=self._get_formatted_git_url(exercise, self.INFO_URL), + service=git_receive_pack, + ) + + assert response.status_code == 200 + assert response.data.startswith(b'001f#') + + response = self._send_git_request( + username=student_user.username, + method_name=self.POST_METHOD, + url=self._get_formatted_git_url(exercise, git_receive_pack), + data=POST_NEW_REPOSITORY_BUFFER, + ) + assert response.status_code == 200 + assert response.data.startswith(b'0030\x01000eunpack ok\n0019ok refs/heads/master\n00000000') + + def test_get_exercise(self, exercise: models.Exercise, student_user: models.User): + git_upload_pack = 'git-upload-pack' + conftest.create_usercourse(student_user, exercise.course) + self.test_push_exercise(exercise, student_user) + response = self._send_git_request( + username=student_user.username, + method_name=self.GET_METHOD, + url=self._get_formatted_git_url(exercise, self.INFO_URL), + service=git_upload_pack, + ) + assert response.status_code == 200 + assert response.data.startswith(b'001e# service=git-upload-pack') + + response = self._send_git_request( + username=student_user.username, + method_name=self.POST_METHOD, + url=self._get_formatted_git_url(exercise, git_upload_pack), + data=POST_CLONE_REPOSITORY_BUFFER, + ) + assert response.status_code == 200 + assert response.data.startswith(b'0008NAK\n0023\x02Enumerating objects: 3, done.')