diff --git a/pygit2/__init__.py b/pygit2/__init__.py index 42988c3b9..2ed82db3a 100644 --- a/pygit2/__init__.py +++ b/pygit2/__init__.py @@ -40,6 +40,7 @@ from .index import Index, IndexEntry from .remote import Remote from .repository import Repository +from .packbuilder import PackBuilder from .settings import Settings from .submodule import Submodule from .utils import to_bytes, to_str diff --git a/pygit2/_run.py b/pygit2/_run.py index 5fdaf77a7..974d5ca37 100644 --- a/pygit2/_run.py +++ b/pygit2/_run.py @@ -61,10 +61,10 @@ 'strarray.h', 'diff.h', 'checkout.h', - 'pack.h', 'transport.h', 'proxy.h', 'indexer.h', + 'pack.h', 'remote.h', 'clone.h', 'common.h', diff --git a/pygit2/decl/pack.h b/pygit2/decl/pack.h index 314a272ac..0e6973045 100644 --- a/pygit2/decl/pack.h +++ b/pygit2/decl/pack.h @@ -3,3 +3,16 @@ typedef int (*git_packbuilder_progress)( uint32_t current, uint32_t total, void *payload); + +int git_packbuilder_new(git_packbuilder **out, git_repository *repo); +void git_packbuilder_free(git_packbuilder *pb); + +int git_packbuilder_insert(git_packbuilder *pb, const git_oid *id, const char *name); +int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name); + +size_t git_packbuilder_object_count(git_packbuilder *pb); + +int git_packbuilder_write(git_packbuilder *pb, const char *path, unsigned int mode, git_indexer_progress_cb progress_cb, void *progress_cb_payload); +uint32_t git_packbuilder_written(git_packbuilder *pb); + +unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n); \ No newline at end of file diff --git a/pygit2/decl/types.h b/pygit2/decl/types.h index 085270849..8770da7a5 100644 --- a/pygit2/decl/types.h +++ b/pygit2/decl/types.h @@ -10,6 +10,7 @@ typedef struct git_repository git_repository; typedef struct git_submodule git_submodule; typedef struct git_transport git_transport; typedef struct git_tree git_tree; +typedef struct git_packbuilder git_packbuilder; typedef int64_t git_off_t; typedef int64_t git_time_t; diff --git a/pygit2/packbuilder.py b/pygit2/packbuilder.py new file mode 100644 index 000000000..b3f54e6be --- /dev/null +++ b/pygit2/packbuilder.py @@ -0,0 +1,81 @@ +# Copyright 2010-2020 The pygit2 contributors +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# In addition to the permissions in the GNU General Public License, +# the authors give you unlimited permission to link the compiled +# version of this file into combinations with other programs, +# and to distribute those combinations without any restriction +# coming from the use of this file. (The General Public License +# restrictions do apply in other respects; for example, they cover +# modification of the file, and distribution when not linked into +# a combined executable.) +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to +# the Free Software Foundation, 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. + + +# Import from pygit2 +from .errors import check_error +from .ffi import ffi, C +from .utils import to_bytes + + +class PackBuilder: + + def __init__(self, repo): + + cpackbuilder = ffi.new('git_packbuilder **') + err = C.git_packbuilder_new(cpackbuilder, repo._repo) + check_error(err) + + self._repo = repo + self._packbuilder = cpackbuilder[0] + self._cpackbuilder = cpackbuilder + + @property + def _pointer(self): + return bytes(ffi.buffer(self._packbuilder)[:]) + + def __del__(self): + C.git_packbuilder_free(self._packbuilder) + + def __len__(self): + return C.git_packbuilder_object_count(self._packbuilder) + + @staticmethod + def convert_object_to_oid(oid): + git_oid = ffi.new('git_oid *') + ffi.buffer(git_oid)[:] = oid.raw[:] + return git_oid + + def add(self, oid): + git_oid = self.convert_object_to_oid(oid) + err = C.git_packbuilder_insert(self._packbuilder, git_oid, ffi.NULL) + check_error(err) + + def add_recur(self, oid): + git_oid = self.convert_object_to_oid(oid) + err = C.git_packbuilder_insert_recur(self._packbuilder, git_oid, ffi.NULL) + check_error(err) + + def set_threads(self, n_threads): + return C.git_packbuilder_set_threads(self._packbuilder, n_threads) + + def write(self, path=None): + path = ffi.NULL if path is None else to_bytes(path) + err = C.git_packbuilder_write(self._packbuilder, path, 0, ffi.NULL, ffi.NULL) + check_error(err) + + @property + def written_objects_count(self): + return C.git_packbuilder_written(self._packbuilder) diff --git a/pygit2/repository.py b/pygit2/repository.py index a7180f9b0..98dcf8f76 100644 --- a/pygit2/repository.py +++ b/pygit2/repository.py @@ -49,6 +49,7 @@ from .blame import Blame from .utils import to_bytes, StrArray from .submodule import Submodule +from .packbuilder import PackBuilder class BaseRepository(_Repository): @@ -83,6 +84,39 @@ def write(self, *args, **kwargs): object.""" return self.odb.write(*args, **kwargs) + def pack(self, path=None, pack_delegate=None, n_threads=None): + """Pack the objects in the odb chosen by the pack_delegate function + and write .pack and .idx files for them. + + Returns: the number of objects written to the pack + + Parameters: + + path + The path to which the .pack and .idx files should be written. None will write to the default location. + + pack_delegate + The method which will provide add the objects to the pack builder. Defaults to all objects. + + n_threads + The number of threads the PackBuilder will spawn. If set to 0 libgit2 will autodetect the number of CPUs. + """ + + def pack_all_objects(pack_builder): + for obj in self.odb: + pack_builder.add(obj) + + pack_delegate = pack_delegate or pack_all_objects + + builder = PackBuilder(self) + if n_threads is not None: + builder.set_threads(n_threads) + pack_delegate(builder) + builder.write(path=path) + + return builder.written_objects_count + + def __iter__(self): return iter(self.odb) diff --git a/test/test_packbuilder.py b/test/test_packbuilder.py new file mode 100644 index 000000000..01dc126e8 --- /dev/null +++ b/test/test_packbuilder.py @@ -0,0 +1,112 @@ +# Copyright 2010-2020 The pygit2 contributors +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# In addition to the permissions in the GNU General Public License, +# the authors give you unlimited permission to link the compiled +# version of this file into combinations with other programs, +# and to distribute those combinations without any restriction +# coming from the use of this file. (The General Public License +# restrictions do apply in other respects; for example, they cover +# modification of the file, and distribution when not linked into +# a combined executable.) +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to +# the Free Software Foundation, 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. + +"""Tests for Index files.""" + +import os + +import pytest + +import pygit2 +from pygit2 import PackBuilder +from . import utils +from .utils import rmtree + + +def test_create_packbuilder(testrepo): + # simple test of PackBuilder creation + packbuilder = PackBuilder(testrepo) + assert len(packbuilder) == 0 + + +def test_add(testrepo): + # Add a few objects and confirm that the count is correct + packbuilder = PackBuilder(testrepo) + objects_to_add = [obj for obj in testrepo] + packbuilder.add(objects_to_add[0]) + assert len(packbuilder) == 1 + packbuilder.add(objects_to_add[1]) + assert len(packbuilder) == 2 + + +def test_add_recursively(testrepo): + # Add the head object and referenced objects recursively and confirm that the count is correct + packbuilder = PackBuilder(testrepo) + packbuilder.add_recur(testrepo.head.target) + + #expect a count of 4 made up of the following referenced objects: + # Commit + # Tree + # Blob: hello.txt + # Blob: .gitignore + + assert len(packbuilder) == 4 + + +def test_repo_pack(testrepo, tmp_path): + # pack the repo with the default strategy + confirm_same_repo_after_packing(testrepo, tmp_path, None) + + +def test_pack_with_delegate(testrepo, tmp_path): + # loop through all branches and add each commit to the packbuilder + def pack_delegate(pb): + for branch in pb._repo.branches: + br = pb._repo.branches.get(branch) + for commit in br.log(): + pb.add_recur(commit.oid_new) + confirm_same_repo_after_packing(testrepo, tmp_path, pack_delegate) + + +def setup_second_repo(tmp_path): + # helper method to set up a second repo for comparison + tmp_path_2 = os.path.join(tmp_path, 'test_repo2') + with utils.TemporaryRepository('testrepo.tar', tmp_path_2) as path: + testrepo = pygit2.Repository(path) + return testrepo + +def confirm_same_repo_after_packing(testrepo, tmp_path, pack_delegate): + # Helper method to confirm the contents of two repos before and after packing + pack_repo = setup_second_repo(tmp_path) + + objects_dir = os.path.join(pack_repo.path, 'objects') + rmtree(objects_dir) + pack_path = os.path.join(pack_repo.path, 'objects', 'pack') + os.makedirs(pack_path) + + # assert that the number of written objects is the same as the number of objects in the repo + written_objects = testrepo.pack(pack_path, pack_delegate=pack_delegate) + assert written_objects == len([obj for obj in testrepo]) + + + # assert that the number of objects in the pack repo is the same as the original repo + orig_objects = [obj for obj in testrepo.odb] + packed_objects = [obj for obj in pack_repo.odb] + assert len(packed_objects) == len(orig_objects) + + # assert that the objects in the packed repo are the same objects as the original repo + for i, obj in enumerate(orig_objects): + assert pack_repo[obj].type == testrepo[obj].type + assert pack_repo[obj].read_raw() == testrepo[obj].read_raw()