Skip to content

Commit 911a871

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents 9d3ba0b + 23b5e12 commit 911a871

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+2833
-389
lines changed

.devops/tools.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
./quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
./main "$@"
16+
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
17+
./finetune "$@"
1618
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
1719
echo "Converting PTH to GGML..."
1820
for i in `ls $1/$2/ggml-model-f16.bin*`; do
@@ -34,6 +36,8 @@ else
3436
echo " ex: --outtype f16 \"/models/7B/\" "
3537
echo " --quantize (-q): Optimize with quantization process ggml"
3638
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
39+
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
40+
echo " See documentation for finetune for command-line parameters"
3741
echo " --all-in-one (-a): Execute --convert & --quantize"
3842
echo " ex: \"/models/\" 7B"
3943
echo " --server (-s): Run a model on the server"

.github/workflows/build.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,17 @@ jobs:
498498
path: |
499499
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
500500
501+
ios-xcode-build:
502+
runs-on: macos-latest
503+
504+
steps:
505+
- name: Checkout code
506+
uses: actions/checkout@v3
507+
508+
- name: Build Xcode project
509+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
510+
511+
501512
# freeBSD-latest:
502513
# runs-on: macos-12
503514
# steps:

.gitignore

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ models-mnt
4747
/libllama.so
4848
/llama-bench
4949
/llava-cli
50+
/lookahead
5051
/main
5152
/metal
5253
/perplexity
@@ -87,15 +88,16 @@ poetry.lock
8788
poetry.toml
8889

8990
# Test binaries
90-
tests/test-grammar-parser
91-
tests/test-llama-grammar
92-
tests/test-double-float
93-
tests/test-grad0
94-
tests/test-opt
95-
tests/test-quantize-fns
96-
tests/test-quantize-perf
97-
tests/test-sampling
98-
tests/test-tokenizer-0-llama
99-
tests/test-tokenizer-0-falcon
100-
tests/test-tokenizer-1-llama
101-
tests/test-tokenizer-1-bpe
91+
/tests/test-grammar-parser
92+
/tests/test-llama-grammar
93+
/tests/test-double-float
94+
/tests/test-grad0
95+
/tests/test-opt
96+
/tests/test-quantize-fns
97+
/tests/test-quantize-perf
98+
/tests/test-sampling
99+
/tests/test-tokenizer-0-llama
100+
/tests/test-tokenizer-0-falcon
101+
/tests/test-tokenizer-1-llama
102+
/tests/test-tokenizer-1-bpe
103+
/tests/test-rope

CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ else()
4343
endif()
4444

4545
# general
46+
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
4647
option(LLAMA_STATIC "llama: static link libraries" OFF)
4748
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
4849
option(LLAMA_LTO "llama: enable link time optimization" OFF)
@@ -100,6 +101,9 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALO
100101
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
101102
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
102103

104+
# Required for relocatable CMake package
105+
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
106+
103107
#
104108
# Compile flags
105109
#
@@ -112,6 +116,11 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
112116
find_package(Threads REQUIRED)
113117
include(CheckCXXCompilerFlag)
114118

119+
# enable libstdc++ assertions for debug builds
120+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
121+
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
122+
endif()
123+
115124
if (NOT MSVC)
116125
if (LLAMA_SANITIZE_THREAD)
117126
add_compile_options(-fsanitize=thread)
@@ -161,7 +170,7 @@ if (LLAMA_METAL)
161170
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
162171

163172
# copy ggml-metal.metal to bin directory
164-
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)
173+
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
165174

166175
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
167176
${FOUNDATION_LIBRARY}

Makefile

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
BUILD_TARGETS = \
33
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
44
simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
5-
speculative infill tokenize benchmark-matmult parallel finetune export-lora tests/test-c.o
5+
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead tests/test-c.o
66

77
# Binaries only useful for tests
88
TEST_TARGETS = \
99
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
1010
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11-
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
11+
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope
1212

1313
# Code coverage output files
1414
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -30,7 +30,7 @@ ifeq '' '$(findstring clang,$(shell $(CC) --version))'
3030
CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
3131
else
3232
CC_IS_CLANG=1
33-
ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
33+
ifeq '' '$(findstring Apple,$(shell $(CC) --version))'
3434
CC_IS_LLVM_CLANG=1
3535
else
3636
CC_IS_APPLE_CLANG=1
@@ -174,6 +174,10 @@ ifdef LLAMA_DEBUG
174174
MK_CFLAGS += -O0 -g
175175
MK_CXXFLAGS += -O0 -g
176176
MK_LDFLAGS += -g
177+
178+
ifeq ($(UNAME_S),Linux)
179+
MK_CXXFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS
180+
endif
177181
else
178182
MK_CPPFLAGS += -DNDEBUG
179183
endif
@@ -648,7 +652,7 @@ beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS)
648652
finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
649653
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
650654

651-
export-lora: examples/export-lora/export-lora.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
655+
export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
652656
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
653657

654658
speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
@@ -657,6 +661,9 @@ speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS)
657661
parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
658662
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
659663

664+
lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
665+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
666+
660667
ifdef LLAMA_METAL
661668
metal: examples/metal/metal.cpp ggml.o $(OBJS)
662669
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
@@ -698,28 +705,28 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
698705
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
699706
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
700707

701-
tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
708+
tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
702709
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
703710

704-
tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
711+
tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
705712
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
706713

707-
tests/test-double-float: tests/test-double-float.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
714+
tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
708715
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
709716

710-
tests/test-grad0: tests/test-grad0.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
717+
tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
711718
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
712719

713-
tests/test-opt: tests/test-opt.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
720+
tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
714721
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
715722

716-
tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
723+
tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
717724
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
718725

719-
tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
726+
tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
720727
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
721728

722-
tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
729+
tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
723730
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
724731

725732
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
@@ -734,5 +741,8 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
734741
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
735742
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
736743

744+
tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
745+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
746+
737747
tests/test-c.o: tests/test-c.c llama.h
738748
$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
1010

1111
### Hot topics
1212

13+
- Using `llama.cpp` with AWS instances: https://github.com/ggerganov/llama.cpp/discussions/4225
14+
- Looking for contributions to improve and maintain the `server` example: https://github.com/ggerganov/llama.cpp/issues/4216
1315
- Collecting Apple Silicon performance stats: https://github.com/ggerganov/llama.cpp/discussions/4167
1416

1517
----
@@ -114,6 +116,8 @@ as the main playground for developing new features for the [ggml](https://github
114116
- [nat/openplayground](https://github.com/nat/openplayground)
115117
- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui)
116118
- [withcatai/catai](https://github.com/withcatai/catai)
119+
- [semperai/amica](https://github.com/semperai/amica)
120+
- [psugihara/FreeChat](https://github.com/psugihara/FreeChat)
117121

118122
---
119123

@@ -320,7 +324,7 @@ mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
320324
321325
### BLAS Build
322326
323-
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). BLAS doesn't affect the normal generation performance. There are currently three different implementations of it:
327+
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use:
324328

325329
- #### Accelerate Framework:
326330

@@ -892,7 +896,7 @@ Additionally, there the following images, similar to the above:
892896
- `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
893897
- `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
894898

895-
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the Gitlab Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
899+
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
896900
897901
#### Usage
898902

common/CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
1111
if(NOT IS_DIRECTORY "${GIT_DIR}")
1212
file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
1313
string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
14-
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
14+
string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS)
15+
if (SLASH_POS EQUAL 0)
16+
set(GIT_DIR "${REAL_GIT_DIR}")
17+
else()
18+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
19+
endif()
1520
endif()
1621

1722
set(GIT_INDEX "${GIT_DIR}/index")
@@ -26,7 +31,7 @@ add_custom_command(
2631
COMMENT "Generating build details from Git"
2732
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
2833
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
29-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/build-info.cmake"
34+
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake"
3035
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
3136
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
3237
VERBATIM

common/grammar-parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ namespace grammar_parser {
190190
pos = parse_space(pos + 1, is_nested);
191191
} else if (*pos == '*' || *pos == '+' || *pos == '?') { // repetition operator
192192
if (last_sym_start == out_elements.size()) {
193-
throw std::runtime_error(std::string("expecting preceeding item to */+/? at ") + pos);
193+
throw std::runtime_error(std::string("expecting preceding item to */+/? at ") + pos);
194194
}
195195

196196
// apply transformation to previous symbol (last_sym_start to end) according to

0 commit comments

Comments
 (0)