Skip to content

Commit 6a29083

Browse files
committed
Merge branch 'master' into develop/personal
* master: (773 commits) server : add `/detokenize` endpoint (ggml-org#2802) convert.py : advanced option (ggml-org#2753) llama : use Unicode Escape Sequence to replace encoded characters (ggml-org#2814) flake.nix : add rocm support and cleanup (ggml-org#2808) llama : move #includes out of _GNU_SOURCE conditional (ggml-org#2817) main : fix bug (penalize_nl=false doesn't work) + suppress warning on mingw (ggml-org#1528) llama : use std::abs in llama_sample_tail_free (ggml-org#2800) k-quants : remove unnecessary tensor shape restrictions (ggml-org#2811) Better perplexity for 2- and 3-bit quantization for LLaMA-v2-70B (ggml-org#2807) Fix HellaSwag (ggml-org#2805) flake : build llama.cpp on Intel with nix (ggml-org#2795) Handle null rope scaling value (ggml-org#2793) Fix spm whitespaces (ggml-org#2806) examples : skip unnecessary external lib in server README.md how-to (ggml-org#2804) llama : fix struct decl (ggml-org#2790) Faster perplexity computation (ggml-org#2786) llama : add llama_beam_search() (ggml-org#2267) convert.py : Get rope scale from HuggingFace models (ggml-org#2772) llama-bench : add model sizes (ggml-org#2771) convert.py : export rope freq_base when converting CodeLlama from an HF model (ggml-org#2773) ...
2 parents a939558 + c1ac54b commit 6a29083

File tree

210 files changed

+112015
-12343
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

210 files changed

+112015
-12343
lines changed

.clang-tidy

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
Checks: >
3+
bugprone-*,
4+
-bugprone-easily-swappable-parameters,
5+
-bugprone-implicit-widening-of-multiplication-result,
6+
-bugprone-narrowing-conversions,
7+
readability-*,
8+
-readability-avoid-unconditional-preprocessor-if,
9+
-readability-function-cognitive-complexity,
10+
-readability-identifier-length,
11+
-readability-implicit-bool-conversion,
12+
-readability-magic-numbers,
13+
-readability-uppercase-literal-suffix,
14+
clang-analyzer-*,
15+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
16+
performance-*,
17+
portability-*,
18+
FormatStyle: none

.devops/full-cuda.Dockerfile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG CUDA_VERSION=11.7.1
5+
6+
# Target the CUDA build image
7+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential python3 python3-pip
16+
17+
COPY requirements.txt requirements.txt
18+
19+
RUN pip install --upgrade pip setuptools wheel \
20+
&& pip install -r requirements.txt
21+
22+
WORKDIR /app
23+
24+
COPY . .
25+
26+
# Set nvcc architecture
27+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
28+
# Enable cuBLAS
29+
ENV LLAMA_CUBLAS=1
30+
31+
RUN make
32+
33+
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/full-rocm.Dockerfile

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG ROCM_VERSION=5.6
5+
6+
# Target the CUDA build image
7+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8+
9+
FROM ${BASE_ROCM_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13+
# This is mostly tied to rocBLAS supported archs.
14+
ARG ROCM_DOCKER_ARCH=\
15+
gfx803 \
16+
gfx900 \
17+
gfx906 \
18+
gfx908 \
19+
gfx90a \
20+
gfx1010 \
21+
gfx1030 \
22+
gfx1100 \
23+
gfx1101 \
24+
gfx1102
25+
26+
COPY requirements.txt requirements.txt
27+
28+
RUN pip install --upgrade pip setuptools wheel \
29+
&& pip install -r requirements.txt
30+
31+
WORKDIR /app
32+
33+
COPY . .
34+
35+
# Set nvcc architecture
36+
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
37+
# Enable ROCm
38+
ENV LLAMA_HIPBLAS=1
39+
ENV CC=/opt/rocm/llvm/bin/clang
40+
ENV CXX=/opt/rocm/llvm/bin/clang++
41+
42+
RUN make
43+
44+
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/full.Dockerfile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,19 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential python3 python3-pip
6+
apt-get install -y build-essential python3 python3-pip git
7+
8+
COPY requirements.txt requirements.txt
79

810
RUN pip install --upgrade pip setuptools wheel \
9-
&& pip install numpy requests sentencepiece tqdm \
10-
&& pip install torch --index-url https://download.pytorch.org/whl/cpu
11+
&& pip install -r requirements.txt
1112

1213
WORKDIR /app
1314

1415
COPY . .
1516

1617
RUN make
1718

19+
ENV LC_ALL=C.utf8
20+
1821
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/lamma-cpp-clblast.srpm.spec

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - boeroboy@gmail.com
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp-clblast
16+
Version: master
17+
Release: 1%{?dist}
18+
Summary: OpenCL Inference of LLaMA model in pure C/C++
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel
22+
URL: https://github.com/ggerganov/llama.cpp
23+
24+
%define debug_package %{nil}
25+
%define source_date_epoch_from_changelog 0
26+
27+
%description
28+
CPU inference for Meta's Lllama2 models using default options.
29+
30+
%prep
31+
%setup -n llama.cpp-master
32+
33+
%build
34+
make -j LLAMA_CLBLAST=1
35+
36+
%install
37+
mkdir -p %{buildroot}%{_bindir}/
38+
cp -p main %{buildroot}%{_bindir}/llamacppclblast
39+
cp -p server %{buildroot}%{_bindir}/llamacppclblastserver
40+
cp -p simple %{buildroot}%{_bindir}/llamacppclblastsimple
41+
42+
%clean
43+
rm -rf %{buildroot}
44+
rm -rf %{_builddir}/*
45+
46+
%files
47+
%{_bindir}/llamacppclblast
48+
%{_bindir}/llamacppclblastserver
49+
%{_bindir}/llamacppclblastsimple
50+
51+
%pre
52+
53+
%post
54+
55+
%preun
56+
%postun
57+
58+
%changelog

.devops/lamma-cpp-cublas.srpm.spec

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - boeroboy@gmail.com
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp-cublas
16+
Version: master
17+
Release: 1%{?dist}
18+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22+
Requires: cuda-toolkit
23+
URL: https://github.com/ggerganov/llama.cpp
24+
25+
%define debug_package %{nil}
26+
%define source_date_epoch_from_changelog 0
27+
28+
%description
29+
CPU inference for Meta's Lllama2 models using default options.
30+
31+
%prep
32+
%setup -n llama.cpp-master
33+
34+
%build
35+
make -j LLAMA_CUBLAS=1
36+
37+
%install
38+
mkdir -p %{buildroot}%{_bindir}/
39+
cp -p main %{buildroot}%{_bindir}/llamacppcublas
40+
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver
41+
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple
42+
43+
%clean
44+
rm -rf %{buildroot}
45+
rm -rf %{_builddir}/*
46+
47+
%files
48+
%{_bindir}/llamacppcublas
49+
%{_bindir}/llamacppcublasserver
50+
%{_bindir}/llamacppcublassimple
51+
52+
%pre
53+
54+
%post
55+
56+
%preun
57+
%postun
58+
59+
%changelog

.devops/llama-cpp.srpm.spec

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - boeroboy@gmail.com
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp
16+
Version: master
17+
Release: 1%{?dist}
18+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git
22+
URL: https://github.com/ggerganov/llama.cpp
23+
24+
%define debug_package %{nil}
25+
%define source_date_epoch_from_changelog 0
26+
27+
%description
28+
CPU inference for Meta's Lllama2 models using default options.
29+
30+
%prep
31+
%autosetup
32+
33+
%build
34+
make -j
35+
36+
%install
37+
mkdir -p %{buildroot}%{_bindir}/
38+
cp -p main %{buildroot}%{_bindir}/llamacpp
39+
cp -p server %{buildroot}%{_bindir}/llamacppserver
40+
cp -p simple %{buildroot}%{_bindir}/llamacppsimple
41+
42+
%clean
43+
rm -rf %{buildroot}
44+
rm -rf %{_builddir}/*
45+
46+
%files
47+
%{_bindir}/llamacpp
48+
%{_bindir}/llamacppserver
49+
%{_bindir}/llamacppsimple
50+
51+
%pre
52+
53+
%post
54+
55+
%preun
56+
%postun
57+
58+
%changelog

.devops/main-cuda.Dockerfile

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=11.7.1
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
# Target the CUDA runtime image
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
# Set nvcc architecture
22+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23+
# Enable cuBLAS
24+
ENV LLAMA_CUBLAS=1
25+
26+
RUN make
27+
28+
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
29+
30+
COPY --from=build /app/main /main
31+
32+
ENTRYPOINT [ "/main" ]

.devops/main-rocm.Dockerfile

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG ROCM_VERSION=5.6
5+
6+
# Target the CUDA build image
7+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8+
9+
FROM ${BASE_ROCM_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13+
# This is mostly tied to rocBLAS supported archs.
14+
ARG ROCM_DOCKER_ARCH=\
15+
gfx803 \
16+
gfx900 \
17+
gfx906 \
18+
gfx908 \
19+
gfx90a \
20+
gfx1010 \
21+
gfx1030 \
22+
gfx1100 \
23+
gfx1101 \
24+
gfx1102
25+
26+
COPY requirements.txt requirements.txt
27+
28+
RUN pip install --upgrade pip setuptools wheel \
29+
&& pip install -r requirements.txt
30+
31+
WORKDIR /app
32+
33+
COPY . .
34+
35+
# Set nvcc architecture
36+
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
37+
# Enable ROCm
38+
ENV LLAMA_HIPBLAS=1
39+
ENV CC=/opt/rocm/llvm/bin/clang
40+
ENV CXX=/opt/rocm/llvm/bin/clang++
41+
42+
RUN make
43+
44+
ENTRYPOINT [ "/app/main" ]

.devops/main.Dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential
6+
apt-get install -y build-essential git
77

88
WORKDIR /app
99

@@ -15,4 +15,6 @@ FROM ubuntu:$UBUNTU_VERSION as runtime
1515

1616
COPY --from=build /app/main /main
1717

18-
ENTRYPOINT [ "/main" ]
18+
ENV LC_ALL=C.utf8
19+
20+
ENTRYPOINT [ "/main" ]

0 commit comments

Comments
 (0)