Skip to content

Commit 02114c2

Browse files
committed
refactor guidance params in lib
1 parent a5dbce5 commit 02114c2

File tree

3 files changed

+66
-77
lines changed

3 files changed

+66
-77
lines changed

examples/cli/main.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -963,11 +963,12 @@ int main(int argc, const char* argv[]) {
963963
params.style_ratio,
964964
params.normalize_input,
965965
params.input_id_images_path.c_str(),
966-
params.skip_layers.data(),
967-
params.skip_layers.size(),
968-
params.slg_scale,
969-
params.skip_layer_start,
970-
params.skip_layer_end);
966+
sd_slg_params_t{params.skip_layers.data(),
967+
params.skip_layers.size(),
968+
params.slg_scale,
969+
params.skip_layer_start,
970+
params.skip_layer_end},
971+
sd_apg_params_t{1, 0, 0});
971972
} else {
972973
sd_image_t input_image = {(uint32_t)params.width,
973974
(uint32_t)params.height,
@@ -1032,11 +1033,12 @@ int main(int argc, const char* argv[]) {
10321033
params.style_ratio,
10331034
params.normalize_input,
10341035
params.input_id_images_path.c_str(),
1035-
params.skip_layers.data(),
1036-
params.skip_layers.size(),
1037-
params.slg_scale,
1038-
params.skip_layer_start,
1039-
params.skip_layer_end);
1036+
sd_slg_params_t{params.skip_layers.data(),
1037+
params.skip_layers.size(),
1038+
params.slg_scale,
1039+
params.skip_layer_start,
1040+
params.skip_layer_end},
1041+
sd_apg_params_t{1, 0, 0});
10401042
}
10411043
}
10421044

@@ -1075,19 +1077,19 @@ int main(int argc, const char* argv[]) {
10751077

10761078
std::string dummy_name, ext, lc_ext;
10771079
bool is_jpg;
1078-
size_t last = params.output_path.find_last_of(".");
1080+
size_t last = params.output_path.find_last_of(".");
10791081
size_t last_path = std::min(params.output_path.find_last_of("/"),
10801082
params.output_path.find_last_of("\\"));
1081-
if (last != std::string::npos // filename has extension
1082-
&& (last_path == std::string::npos || last > last_path)) {
1083+
if (last != std::string::npos // filename has extension
1084+
&& (last_path == std::string::npos || last > last_path)) {
10831085
dummy_name = params.output_path.substr(0, last);
10841086
ext = lc_ext = params.output_path.substr(last);
10851087
std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
10861088
is_jpg = lc_ext == ".jpg" || lc_ext == ".jpeg" || lc_ext == ".jpe";
10871089
} else {
10881090
dummy_name = params.output_path;
10891091
ext = lc_ext = "";
1090-
is_jpg = false;
1092+
is_jpg = false;
10911093
}
10921094
// appending ".png" to absent or unknown extension
10931095
if (!is_jpg && lc_ext != ".png") {
@@ -1099,7 +1101,7 @@ int main(int argc, const char* argv[]) {
10991101
continue;
11001102
}
11011103
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
1102-
if(is_jpg) {
1104+
if (is_jpg) {
11031105
stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
11041106
results[i].data, 90, get_image_params(params, params.seed + i).c_str());
11051107
printf("save result JPEG image to '%s'\n", final_image_path.c_str());

stable-diffusion.cpp

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -800,11 +800,11 @@ class StableDiffusionGGML {
800800
const std::vector<float>& sigmas,
801801
int start_merge_step,
802802
SDCondition id_cond,
803-
std::vector<int> skip_layers = {},
804-
float slg_scale = 0,
805-
float skip_layer_start = 0.01,
806-
float skip_layer_end = 0.2,
807-
ggml_tensor* noise_mask = nullptr) {
803+
sd_slg_params_t slg_params = {NULL, 0, 0, 0, 0},
804+
sd_apg_params_t apg_params = {1, 0, 0},
805+
ggml_tensor* noise_mask = nullptr) {
806+
std::vector<int> skip_layers(slg_params.skip_layers, slg_params.skip_layers + slg_params.skip_layers_count);
807+
808808
LOG_DEBUG("Sample");
809809
struct ggml_init_params params;
810810
size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
@@ -827,7 +827,7 @@ class StableDiffusionGGML {
827827
struct ggml_tensor* noised_input = ggml_dup_tensor(work_ctx, noise);
828828

829829
bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL;
830-
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size() > 0;
830+
bool has_skiplayer = slg_params.scale != 0.0 && skip_layers.size() > 0;
831831

832832
// denoise wrapper
833833
struct ggml_tensor* out_cond = ggml_dup_tensor(work_ctx, x);
@@ -847,13 +847,8 @@ class StableDiffusionGGML {
847847
}
848848
struct ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x);
849849

850-
// TODO do not hardcode
851-
float apg_eta = .08f;
852-
float apg_momentum = -.5f;
853-
float apg_norm_treshold = 15.0f;
854-
855850
std::vector<float> apg_momentum_buffer;
856-
if (apg_momentum != 0)
851+
if (apg_params.momentum != 0)
857852
apg_momentum_buffer.resize((size_t)ggml_nelements(denoised));
858853

859854
auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
@@ -936,7 +931,7 @@ class StableDiffusionGGML {
936931
}
937932

938933
int step_count = sigmas.size();
939-
bool is_skiplayer_step = has_skiplayer && step > (int)(skip_layer_start * step_count) && step < (int)(skip_layer_end * step_count);
934+
bool is_skiplayer_step = has_skiplayer && step > (int)(slg_params.skip_layer_start * step_count) && step < (int)(slg_params.skip_layer_end * step_count);
940935
float* skip_layer_data = NULL;
941936
if (is_skiplayer_step) {
942937
LOG_DEBUG("Skipping layers at step %d\n", step);
@@ -970,37 +965,37 @@ class StableDiffusionGGML {
970965
float dot = 0;
971966
for (int i = 0; i < ne_elements; i++) {
972967
float delta = positive_data[i] - negative_data[i];
973-
if (apg_momentum != 0) {
974-
delta += apg_momentum * apg_momentum_buffer[i];
968+
if (apg_params.momentum != 0) {
969+
delta += apg_params.momentum * apg_momentum_buffer[i];
975970
apg_momentum_buffer[i] = delta;
976971
}
977-
if (apg_norm_treshold > 0) {
972+
if (apg_params.norm_treshold > 0) {
978973
diff_norm += delta * delta;
979974
}
980-
if (apg_eta != 1.0f) {
975+
if (apg_params.eta != 1.0f) {
981976
cond_norm_sq += positive_data[i] * positive_data[i];
982977
dot += positive_data[i] * delta;
983978
}
984979
deltas[i] = delta;
985980
}
986-
if (apg_norm_treshold > 0) {
981+
if (apg_params.norm_treshold > 0) {
987982
diff_norm = std::sqrtf(diff_norm);
988-
apg_scale_factor = std::min(1.0f, apg_norm_treshold / diff_norm);
983+
apg_scale_factor = std::min(1.0f, apg_params.norm_treshold / diff_norm);
989984
}
990-
if (apg_eta != 1.0f) {
985+
if (apg_params.eta != 1.0f) {
991986
dot *= apg_scale_factor;
992987
// pre-normalize (avoids one square root and ne_elements extra divs)
993988
dot /= cond_norm_sq;
994989
}
995990

996991
for (int i = 0; i < ne_elements; i++) {
997992
deltas[i] *= apg_scale_factor;
998-
if (apg_eta != 1.0f) {
993+
if (apg_params.eta != 1.0f) {
999994
float apg_parallel = dot * positive_data[i];
1000995
float apg_orthogonal = deltas[i] - apg_parallel;
1001996

1002997
// tweak deltas
1003-
deltas[i] = apg_orthogonal + apg_eta * apg_parallel;
998+
deltas[i] = apg_orthogonal + apg_params.eta * apg_parallel;
1004999
}
10051000
}
10061001

@@ -1019,7 +1014,7 @@ class StableDiffusionGGML {
10191014
}
10201015
}
10211016
if (is_skiplayer_step) {
1022-
latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale;
1017+
latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_params.scale;
10231018
}
10241019
// v = latent_result, eps = latent_result
10251020
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
@@ -1265,11 +1260,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12651260
float style_ratio,
12661261
bool normalize_input,
12671262
std::string input_id_images_path,
1268-
std::vector<int> skip_layers = {},
1269-
float slg_scale = 0,
1270-
float skip_layer_start = 0.01,
1271-
float skip_layer_end = 0.2,
1272-
ggml_tensor* masked_image = NULL) {
1263+
sd_slg_params_t slg_params,
1264+
sd_apg_params_t apg_params,
1265+
ggml_tensor* masked_image = NULL) {
12731266
if (seed < 0) {
12741267
// Generally, when using the provided command line, the seed is always >0.
12751268
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@@ -1522,10 +1515,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15221515
sigmas,
15231516
start_merge_step,
15241517
id_cond,
1525-
skip_layers,
1526-
slg_scale,
1527-
skip_layer_start,
1528-
skip_layer_end,
1518+
slg_params,
1519+
apg_params,
15291520
noise_mask);
15301521

15311522
// struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
@@ -1595,12 +1586,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
15951586
float style_ratio,
15961587
bool normalize_input,
15971588
const char* input_id_images_path_c_str,
1598-
int* skip_layers = NULL,
1599-
size_t skip_layers_count = 0,
1600-
float slg_scale = 0,
1601-
float skip_layer_start = 0.01,
1602-
float skip_layer_end = 0.2) {
1603-
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
1589+
sd_slg_params_t slg_params,
1590+
sd_apg_params_t apg_params) {
16041591
LOG_DEBUG("txt2img %dx%d", width, height);
16051592
if (sd_ctx == NULL) {
16061593
return NULL;
@@ -1674,10 +1661,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16741661
style_ratio,
16751662
normalize_input,
16761663
input_id_images_path_c_str,
1677-
skip_layers_vec,
1678-
slg_scale,
1679-
skip_layer_start,
1680-
skip_layer_end);
1664+
slg_params,
1665+
apg_params);
16811666

16821667
size_t t1 = ggml_time_ms();
16831668

@@ -1707,12 +1692,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
17071692
float style_ratio,
17081693
bool normalize_input,
17091694
const char* input_id_images_path_c_str,
1710-
int* skip_layers = NULL,
1711-
size_t skip_layers_count = 0,
1712-
float slg_scale = 0,
1713-
float skip_layer_start = 0.01,
1714-
float skip_layer_end = 0.2) {
1715-
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
1695+
sd_slg_params_t slg_params,
1696+
sd_apg_params_t apg_params) {
17161697
LOG_DEBUG("img2img %dx%d", width, height);
17171698
if (sd_ctx == NULL) {
17181699
return NULL;
@@ -1854,10 +1835,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
18541835
style_ratio,
18551836
normalize_input,
18561837
input_id_images_path_c_str,
1857-
skip_layers_vec,
1858-
slg_scale,
1859-
skip_layer_start,
1860-
skip_layer_end,
1838+
slg_params,
1839+
apg_params,
18611840
masked_image);
18621841

18631842
size_t t2 = ggml_time_ms();

stable-diffusion.h

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,20 @@ typedef struct {
127127
uint8_t* data;
128128
} sd_image_t;
129129

130+
typedef struct {
131+
float eta;
132+
float momentum;
133+
float norm_treshold;
134+
} sd_apg_params_t;
135+
136+
typedef struct {
137+
int* skip_layers;
138+
size_t skip_layers_count;
139+
float scale;
140+
float skip_layer_start;
141+
float skip_layer_end;
142+
} sd_slg_params_t;
143+
130144
typedef struct sd_ctx_t sd_ctx_t;
131145

132146
SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
@@ -172,11 +186,8 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
172186
float style_strength,
173187
bool normalize_input,
174188
const char* input_id_images_path,
175-
int* skip_layers,
176-
size_t skip_layers_count,
177-
float slg_scale,
178-
float skip_layer_start,
179-
float skip_layer_end);
189+
sd_slg_params_t slg_params,
190+
sd_apg_params_t apg_params);
180191

181192
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
182193
sd_image_t init_image,
@@ -199,11 +210,8 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
199210
float style_strength,
200211
bool normalize_input,
201212
const char* input_id_images_path,
202-
int* skip_layers,
203-
size_t skip_layers_count,
204-
float slg_scale,
205-
float skip_layer_start,
206-
float skip_layer_end);
213+
sd_slg_params_t slg_params,
214+
sd_apg_params_t apg_params);
207215

208216
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
209217
sd_image_t init_image,

0 commit comments

Comments
 (0)