1
- #ifndef CLIP_H
2
- #define CLIP_H
1
+ #pragma once
3
2
4
3
#include "ggml.h"
5
4
#include <stddef.h>
6
5
#include <stdint.h>
7
6
8
- #ifdef LLAMA_SHARED
9
- # if defined(_WIN32 ) && !defined(__MINGW32__ )
10
- # ifdef LLAMA_BUILD
11
- # define CLIP_API __declspec(dllexport)
12
- # else
13
- # define CLIP_API __declspec(dllimport)
14
- # endif
15
- # else
16
- # define CLIP_API __attribute__ ((visibility ("default")))
17
- # endif
18
- #else
19
- # define CLIP_API
20
- #endif
21
-
22
- #ifdef __cplusplus
23
- extern "C" {
24
- #endif
25
-
26
7
struct clip_ctx ;
27
8
28
9
struct clip_image_size {
@@ -39,97 +20,80 @@ struct clip_context_params {
39
20
enum ggml_log_level verbosity ;
40
21
};
41
22
42
- // deprecated, use clip_init
43
- CLIP_API struct clip_ctx * clip_model_load (const char * fname , int verbosity );
44
-
45
- CLIP_API struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
23
+ struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
46
24
47
- CLIP_API void clip_free (struct clip_ctx * ctx );
25
+ void clip_free (struct clip_ctx * ctx );
48
26
49
- CLIP_API size_t clip_embd_nbytes (const struct clip_ctx * ctx );
50
- CLIP_API size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
27
+ size_t clip_embd_nbytes (const struct clip_ctx * ctx );
28
+ size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
51
29
52
- CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx );
53
- CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx );
54
- CLIP_API int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
30
+ int32_t clip_get_image_size (const struct clip_ctx * ctx );
31
+ int32_t clip_get_patch_size (const struct clip_ctx * ctx );
32
+ int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
55
33
56
34
// TODO: should be enum, not string
57
- CLIP_API const char * clip_patch_merge_type (const struct clip_ctx * ctx );
35
+ const char * clip_patch_merge_type (const struct clip_ctx * ctx );
58
36
59
- CLIP_API const int32_t * clip_image_grid (const struct clip_ctx * ctx );
60
- CLIP_API size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
37
+ const int32_t * clip_image_grid (const struct clip_ctx * ctx );
38
+ size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
61
39
62
- GGML_DEPRECATED (CLIP_API int clip_n_patches (const struct clip_ctx * ctx ),
63
- "use clip_n_output_tokens instead" );
64
- GGML_DEPRECATED (CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx , struct clip_image_f32 * img ),
65
- "use clip_n_output_tokens instead" );
66
-
67
- CLIP_API int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
40
+ int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
68
41
69
42
// for M-RoPE, this will be the number of token positions in X and Y directions
70
43
// for other models, X will be the total number of tokens and Y will be 1
71
- CLIP_API int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
72
- CLIP_API int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
44
+ int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
45
+ int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
73
46
74
47
// this should be equal to the embedding dimension of the text model
75
- CLIP_API int clip_n_mmproj_embd (const struct clip_ctx * ctx );
48
+ int clip_n_mmproj_embd (const struct clip_ctx * ctx );
76
49
77
- CLIP_API int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
78
- CLIP_API void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
79
- CLIP_API struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
50
+ int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
51
+ void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
52
+ struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
80
53
81
- CLIP_API struct clip_image_size * clip_image_size_init (void );
82
- CLIP_API struct clip_image_u8 * clip_image_u8_init (void );
83
- CLIP_API struct clip_image_f32 * clip_image_f32_init (void );
84
- CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
54
+ struct clip_image_size * clip_image_size_init (void );
55
+ struct clip_image_u8 * clip_image_u8_init (void );
56
+ struct clip_image_f32 * clip_image_f32_init (void );
57
+ struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
85
58
86
59
// nx, ny are the output image dimensions
87
- CLIP_API unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
60
+ unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
88
61
89
- CLIP_API void clip_image_size_free (struct clip_image_size * img_size );
90
- CLIP_API void clip_image_u8_free (struct clip_image_u8 * img );
91
- CLIP_API void clip_image_f32_free (struct clip_image_f32 * img );
92
- CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
93
- CLIP_API void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
62
+ void clip_image_size_free (struct clip_image_size * img_size );
63
+ void clip_image_u8_free (struct clip_image_u8 * img );
64
+ void clip_image_f32_free (struct clip_image_f32 * img );
65
+ void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
66
+ void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
94
67
95
68
// use for accessing underlay data of clip_image_f32_batch
96
- CLIP_API size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
97
- CLIP_API size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
98
- CLIP_API size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
99
- CLIP_API struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
69
+ size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
70
+ size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
71
+ size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
72
+ struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
100
73
101
74
/**
102
75
* Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
103
76
* The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
104
77
*/
105
- CLIP_API void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
78
+ void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
106
79
107
- CLIP_API bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
80
+ bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
108
81
109
82
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
110
- CLIP_API bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
83
+ bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
111
84
112
85
/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
113
- CLIP_API bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
114
-
115
- CLIP_API struct ggml_tensor * clip_get_newline_tensor (const struct clip_ctx * ctx );
116
-
117
- CLIP_API bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
118
- CLIP_API bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
119
-
120
- CLIP_API bool clip_model_quantize (const char * fname_inp , const char * fname_out , int itype );
121
-
122
- CLIP_API int clip_is_minicpmv (const struct clip_ctx * ctx );
123
- CLIP_API bool clip_is_glm (const struct clip_ctx * ctx );
124
- CLIP_API bool clip_is_qwen2vl (const struct clip_ctx * ctx );
125
- CLIP_API bool clip_is_llava (const struct clip_ctx * ctx );
126
- CLIP_API bool clip_is_gemma3 (const struct clip_ctx * ctx );
86
+ bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
127
87
128
- CLIP_API bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
88
+ struct ggml_tensor * clip_get_newline_tensor ( const struct clip_ctx * ctx );
129
89
90
+ bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
91
+ bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
130
92
131
- #ifdef __cplusplus
132
- }
133
- #endif
93
+ int clip_is_minicpmv (const struct clip_ctx * ctx );
94
+ bool clip_is_glm (const struct clip_ctx * ctx );
95
+ bool clip_is_qwen2vl (const struct clip_ctx * ctx );
96
+ bool clip_is_llava (const struct clip_ctx * ctx );
97
+ bool clip_is_gemma3 (const struct clip_ctx * ctx );
134
98
135
- #endif // CLIP_H
99
+ bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
0 commit comments