-
Notifications
You must be signed in to change notification settings - Fork 12k
llava : introduce libmtmd #12849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
llava : introduce libmtmd #12849
Changes from 8 commits
235340d
96bf95e
94564ac
7cc4108
a9ef623
3b25bd9
1576c82
117bf73
a6625fa
430dbd8
6ed09b7
aed3216
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
# llava (legacy) | ||
|
||
add_library(llava OBJECT | ||
llava.cpp | ||
llava.h | ||
|
@@ -22,12 +24,41 @@ if (BUILD_SHARED_LIBS) | |
install(TARGETS llava_shared LIBRARY) | ||
endif() | ||
|
||
# llava2 | ||
|
||
add_library(llava2 OBJECT | ||
llava2.cpp | ||
llava2.h | ||
clip.cpp | ||
clip.h | ||
clip-impl.h | ||
) | ||
|
||
target_link_libraries(llava2 PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) | ||
|
||
target_include_directories(llava2 PUBLIC .) | ||
target_include_directories(llava2 PUBLIC ../..) | ||
target_include_directories(llava2 PUBLIC ../../common) # for stb_image.h | ||
|
||
target_compile_features(llava2 PRIVATE cxx_std_17) | ||
|
||
add_library(llava2_static STATIC $<TARGET_OBJECTS:llava2>) | ||
if (BUILD_SHARED_LIBS) | ||
set_target_properties(llava2 PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||
target_compile_definitions(llava2 PRIVATE LLAMA_SHARED LLAMA_BUILD) | ||
add_library(llava2_shared SHARED $<TARGET_OBJECTS:llava2>) | ||
target_link_libraries(llava2_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) | ||
install(TARGETS llava2_shared LIBRARY) | ||
endif() | ||
|
||
if (NOT MSVC) | ||
target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h | ||
target_compile_options(llava2 PRIVATE -Wno-cast-qual) # stb_image.h | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably better to wrap |
||
endif() | ||
|
||
if(TARGET BUILD_INFO) | ||
add_dependencies(llava BUILD_INFO) | ||
add_dependencies(llava2 BUILD_INFO) | ||
endif() | ||
|
||
set(TARGET llama-llava-cli) | ||
|
@@ -55,7 +86,7 @@ set(TARGET llama-gemma3-cli) | |
add_executable(${TARGET} gemma3-cli.cpp) | ||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-gemma3-cli) | ||
install(TARGETS ${TARGET} RUNTIME) | ||
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) | ||
target_link_libraries(${TARGET} PRIVATE common llava2 ${CMAKE_THREAD_LIBS_INIT}) | ||
target_compile_features(${TARGET} PRIVATE cxx_std_17) | ||
|
||
set(TARGET llama-llava-clip-quantize-cli) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,15 @@ | ||
#include "ggml.h" | ||
#include "gguf.h" | ||
|
||
#include "clip.h" | ||
|
||
#include <climits> | ||
#include <cstdarg> | ||
#include <string> | ||
#include <map> | ||
#include <sstream> | ||
#include <vector> | ||
#include <memory> | ||
|
||
// Internal header for clip.cpp | ||
|
||
|
@@ -120,6 +123,23 @@ static projector_type clip_projector_type_from_string(const std::string & str) { | |
return PROJECTOR_TYPE_UNKNOWN; | ||
} | ||
|
||
// RGB uint8 image | ||
struct clip_image_u8 { | ||
int nx; | ||
int ny; | ||
|
||
std::vector<uint8_t> buf; | ||
}; | ||
|
||
// RGB float32 image (NHWC) | ||
// Memory layout: RGBRGBRGB... | ||
struct clip_image_f32 { | ||
int nx; | ||
int ny; | ||
|
||
std::vector<float> buf; | ||
}; | ||
|
||
// | ||
// logging | ||
// | ||
|
@@ -178,6 +198,28 @@ static void clip_log_internal(enum ggml_log_level level, const char * format, .. | |
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) | ||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, __VA_ARGS__) | ||
|
||
// | ||
// cpp wrappers | ||
// | ||
|
||
struct clip_image_u8_deleter { | ||
void operator()(clip_image_u8 * val) { clip_image_u8_free(val); } | ||
}; | ||
|
||
struct clip_image_f32_deleter { | ||
void operator()(clip_image_f32 * val) { clip_image_f32_free(val); } | ||
}; | ||
|
||
struct clip_image_f32_batch_deleter { | ||
void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); } | ||
}; | ||
|
||
typedef std::unique_ptr<clip_image_u8, clip_image_u8_deleter> clip_image_u8_ptr; | ||
typedef std::unique_ptr<clip_image_f32, clip_image_f32_deleter> clip_image_f32_ptr; | ||
typedef std::unique_ptr<clip_image_f32_batch, clip_image_f32_batch_deleter> clip_image_f32_batch_ptr; | ||
|
||
// TODO @ngxson : we're currently having a naming clash between struct clip_image_size and function clip_image_size() | ||
Comment on lines
+205
to
+221
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what I was talking about in #12834 (comment) In a follow-up PR, I'll use this inside clip.cpp |
||
|
||
// | ||
// common utils | ||
// | ||
|
@@ -214,6 +256,20 @@ static void string_replace_all(std::string & s, const std::string & search, cons | |
s = std::move(builder); | ||
} | ||
|
||
// split string by a `std::string delim` instead of `char delim` | ||
static std::vector<std::string> string_split_str(std::string s, const std::string & delimiter) { | ||
std::vector<std::string> tokens; | ||
size_t pos = 0; | ||
std::string token; | ||
while ((pos = s.find(delimiter)) != std::string::npos) { | ||
token = s.substr(0, pos); | ||
tokens.push_back(token); | ||
s.erase(0, pos + delimiter.length()); | ||
} | ||
tokens.push_back(s); | ||
return tokens; | ||
} | ||
|
||
// | ||
// gguf utils | ||
// | ||
|
@@ -271,3 +327,9 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { | |
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); | ||
} | ||
} | ||
|
||
// | ||
// API used internally with llava2 | ||
// | ||
|
||
projector_type clip_get_projector_type(const struct clip_ctx * ctx); |
Uh oh!
There was an error while loading. Please reload this page.