@@ -800,11 +800,11 @@ class StableDiffusionGGML {
800
800
const std::vector<float >& sigmas,
801
801
int start_merge_step,
802
802
SDCondition id_cond,
803
- std::vector< int > skip_layers = {},
804
- float slg_scale = 0 ,
805
- float skip_layer_start = 0.01 ,
806
- float skip_layer_end = 0.2 ,
807
- ggml_tensor* noise_mask = nullptr ) {
803
+ sd_slg_params_t slg_params = {NULL , 0 , 0 , 0 , 0 },
804
+ sd_apg_params_t apg_params = { 1 , 0 , 0 } ,
805
+ ggml_tensor* noise_mask = nullptr ) {
806
+ std::vector< int > skip_layers (slg_params. skip_layers , slg_params. skip_layers + slg_params. skip_layers_count );
807
+
808
808
LOG_DEBUG (" Sample" );
809
809
struct ggml_init_params params;
810
810
size_t data_size = ggml_row_size (init_latent->type , init_latent->ne [0 ]);
@@ -827,7 +827,7 @@ class StableDiffusionGGML {
827
827
struct ggml_tensor * noised_input = ggml_dup_tensor (work_ctx, noise);
828
828
829
829
bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
830
- bool has_skiplayer = slg_scale != 0.0 && skip_layers.size () > 0 ;
830
+ bool has_skiplayer = slg_params. scale != 0.0 && skip_layers.size () > 0 ;
831
831
832
832
// denoise wrapper
833
833
struct ggml_tensor * out_cond = ggml_dup_tensor (work_ctx, x);
@@ -847,13 +847,8 @@ class StableDiffusionGGML {
847
847
}
848
848
struct ggml_tensor * denoised = ggml_dup_tensor (work_ctx, x);
849
849
850
- // TODO do not hardcode
851
- float apg_eta = .08f ;
852
- float apg_momentum = -.5f ;
853
- float apg_norm_treshold = 15 .0f ;
854
-
855
850
std::vector<float > apg_momentum_buffer;
856
- if (apg_momentum != 0 )
851
+ if (apg_params. momentum != 0 )
857
852
apg_momentum_buffer.resize ((size_t )ggml_nelements (denoised));
858
853
859
854
auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
@@ -936,7 +931,7 @@ class StableDiffusionGGML {
936
931
}
937
932
938
933
int step_count = sigmas.size ();
939
- bool is_skiplayer_step = has_skiplayer && step > (int )(skip_layer_start * step_count) && step < (int )(skip_layer_end * step_count);
934
+ bool is_skiplayer_step = has_skiplayer && step > (int )(slg_params. skip_layer_start * step_count) && step < (int )(slg_params. skip_layer_end * step_count);
940
935
float * skip_layer_data = NULL ;
941
936
if (is_skiplayer_step) {
942
937
LOG_DEBUG (" Skipping layers at step %d\n " , step);
@@ -970,37 +965,37 @@ class StableDiffusionGGML {
970
965
float dot = 0 ;
971
966
for (int i = 0 ; i < ne_elements; i++) {
972
967
float delta = positive_data[i] - negative_data[i];
973
- if (apg_momentum != 0 ) {
974
- delta += apg_momentum * apg_momentum_buffer[i];
968
+ if (apg_params. momentum != 0 ) {
969
+ delta += apg_params. momentum * apg_momentum_buffer[i];
975
970
apg_momentum_buffer[i] = delta;
976
971
}
977
- if (apg_norm_treshold > 0 ) {
972
+ if (apg_params. norm_treshold > 0 ) {
978
973
diff_norm += delta * delta;
979
974
}
980
- if (apg_eta != 1 .0f ) {
975
+ if (apg_params. eta != 1 .0f ) {
981
976
cond_norm_sq += positive_data[i] * positive_data[i];
982
977
dot += positive_data[i] * delta;
983
978
}
984
979
deltas[i] = delta;
985
980
}
986
- if (apg_norm_treshold > 0 ) {
981
+ if (apg_params. norm_treshold > 0 ) {
987
982
diff_norm = std::sqrtf (diff_norm);
988
- apg_scale_factor = std::min (1 .0f , apg_norm_treshold / diff_norm);
983
+ apg_scale_factor = std::min (1 .0f , apg_params. norm_treshold / diff_norm);
989
984
}
990
- if (apg_eta != 1 .0f ) {
985
+ if (apg_params. eta != 1 .0f ) {
991
986
dot *= apg_scale_factor;
992
987
// pre-normalize (avoids one square root and ne_elements extra divs)
993
988
dot /= cond_norm_sq;
994
989
}
995
990
996
991
for (int i = 0 ; i < ne_elements; i++) {
997
992
deltas[i] *= apg_scale_factor;
998
- if (apg_eta != 1 .0f ) {
993
+ if (apg_params. eta != 1 .0f ) {
999
994
float apg_parallel = dot * positive_data[i];
1000
995
float apg_orthogonal = deltas[i] - apg_parallel;
1001
996
1002
997
// tweak deltas
1003
- deltas[i] = apg_orthogonal + apg_eta * apg_parallel;
998
+ deltas[i] = apg_orthogonal + apg_params. eta * apg_parallel;
1004
999
}
1005
1000
}
1006
1001
@@ -1019,7 +1014,7 @@ class StableDiffusionGGML {
1019
1014
}
1020
1015
}
1021
1016
if (is_skiplayer_step) {
1022
- latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale ;
1017
+ latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_params. scale ;
1023
1018
}
1024
1019
// v = latent_result, eps = latent_result
1025
1020
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
@@ -1265,11 +1260,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
1265
1260
float style_ratio,
1266
1261
bool normalize_input,
1267
1262
std::string input_id_images_path,
1268
- std::vector<int > skip_layers = {},
1269
- float slg_scale = 0 ,
1270
- float skip_layer_start = 0.01 ,
1271
- float skip_layer_end = 0.2 ,
1272
- ggml_tensor* masked_image = NULL ) {
1263
+ sd_slg_params_t slg_params,
1264
+ sd_apg_params_t apg_params,
1265
+ ggml_tensor* masked_image = NULL ) {
1273
1266
if (seed < 0 ) {
1274
1267
// Generally, when using the provided command line, the seed is always >0.
1275
1268
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@@ -1522,10 +1515,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
1522
1515
sigmas,
1523
1516
start_merge_step,
1524
1517
id_cond,
1525
- skip_layers,
1526
- slg_scale,
1527
- skip_layer_start,
1528
- skip_layer_end,
1518
+ slg_params,
1519
+ apg_params,
1529
1520
noise_mask);
1530
1521
1531
1522
// struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
@@ -1595,12 +1586,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
1595
1586
float style_ratio,
1596
1587
bool normalize_input,
1597
1588
const char * input_id_images_path_c_str,
1598
- int * skip_layers = NULL ,
1599
- size_t skip_layers_count = 0 ,
1600
- float slg_scale = 0 ,
1601
- float skip_layer_start = 0.01 ,
1602
- float skip_layer_end = 0.2 ) {
1603
- std::vector<int > skip_layers_vec (skip_layers, skip_layers + skip_layers_count);
1589
+ sd_slg_params_t slg_params,
1590
+ sd_apg_params_t apg_params) {
1604
1591
LOG_DEBUG (" txt2img %dx%d" , width, height);
1605
1592
if (sd_ctx == NULL ) {
1606
1593
return NULL ;
@@ -1674,10 +1661,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
1674
1661
style_ratio,
1675
1662
normalize_input,
1676
1663
input_id_images_path_c_str,
1677
- skip_layers_vec,
1678
- slg_scale,
1679
- skip_layer_start,
1680
- skip_layer_end);
1664
+ slg_params,
1665
+ apg_params);
1681
1666
1682
1667
size_t t1 = ggml_time_ms ();
1683
1668
@@ -1707,12 +1692,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
1707
1692
float style_ratio,
1708
1693
bool normalize_input,
1709
1694
const char * input_id_images_path_c_str,
1710
- int * skip_layers = NULL ,
1711
- size_t skip_layers_count = 0 ,
1712
- float slg_scale = 0 ,
1713
- float skip_layer_start = 0.01 ,
1714
- float skip_layer_end = 0.2 ) {
1715
- std::vector<int > skip_layers_vec (skip_layers, skip_layers + skip_layers_count);
1695
+ sd_slg_params_t slg_params,
1696
+ sd_apg_params_t apg_params) {
1716
1697
LOG_DEBUG (" img2img %dx%d" , width, height);
1717
1698
if (sd_ctx == NULL ) {
1718
1699
return NULL ;
@@ -1854,10 +1835,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
1854
1835
style_ratio,
1855
1836
normalize_input,
1856
1837
input_id_images_path_c_str,
1857
- skip_layers_vec,
1858
- slg_scale,
1859
- skip_layer_start,
1860
- skip_layer_end,
1838
+ slg_params,
1839
+ apg_params,
1861
1840
masked_image);
1862
1841
1863
1842
size_t t2 = ggml_time_ms ();
0 commit comments