Skip to content

Commit a697518

Browse files
committed
Add CosXL support (broken)
cosxl: smol cleanup CosXL: fix schedule choice Rename EDMVDenoiser Avoid inf for EDMVDenoiser + discrete schedule make parametrization flags public Fix CosXL with empty negative prompts Instruct-p2p support support 2 conditionings cfg Do not re-encode the exact same image twice pix2pix: fixes for 2-cfg Fix pix2pix latent inputs + improve inpainting a bit + fix naming prepare for other pix2pix-like models Support sdxl ip2p CoxXL edit: fix reference image embeddings Support 2-cond cfg properly in cli fix typo in help
1 parent 10c6501 commit a697518

File tree

7 files changed

+351
-237
lines changed

7 files changed

+351
-237
lines changed

denoiser.hpp

Lines changed: 74 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -168,24 +168,21 @@ struct AYSSchedule : SigmaSchedule {
168168
std::vector<float> inputs;
169169
std::vector<float> results(n + 1);
170170

171-
switch (version) {
172-
case VERSION_SD2: /* fallthrough */
173-
LOG_WARN("AYS not designed for SD2.X models");
174-
case VERSION_SD1:
175-
LOG_INFO("AYS using SD1.5 noise levels");
176-
inputs = noise_levels[0];
177-
break;
178-
case VERSION_SDXL:
179-
LOG_INFO("AYS using SDXL noise levels");
180-
inputs = noise_levels[1];
181-
break;
182-
case VERSION_SVD:
183-
LOG_INFO("AYS using SVD noise levels");
184-
inputs = noise_levels[2];
185-
break;
186-
default:
187-
LOG_ERROR("Version not compatable with AYS scheduler");
188-
return results;
171+
if (sd_version_is_sd2((SDVersion)version)) {
172+
LOG_WARN("AYS not designed for SD2.X models");
173+
} /* fallthrough */
174+
else if (sd_version_is_sd1((SDVersion)version)) {
175+
LOG_INFO("AYS using SD1.5 noise levels");
176+
inputs = noise_levels[0];
177+
} else if (sd_version_is_sdxl((SDVersion)version)) {
178+
LOG_INFO("AYS using SDXL noise levels");
179+
inputs = noise_levels[1];
180+
} else if (version == VERSION_SVD) {
181+
LOG_INFO("AYS using SVD noise levels");
182+
inputs = noise_levels[2];
183+
} else {
184+
LOG_ERROR("Version not compatable with AYS scheduler");
185+
return results;
189186
}
190187

191188
/* Stretches those pre-calculated reference levels out to the desired
@@ -346,6 +343,31 @@ struct CompVisVDenoiser : public CompVisDenoiser {
346343
}
347344
};
348345

346+
struct EDMVDenoiser : public CompVisVDenoiser {
347+
float min_sigma = 0.002;
348+
float max_sigma = 120.0;
349+
350+
EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0) : min_sigma(min_sigma), max_sigma(max_sigma) {
351+
schedule = std::make_shared<ExponentialSchedule>();
352+
}
353+
354+
float t_to_sigma(float t) {
355+
return std::exp(t * 4/(float)TIMESTEPS);
356+
}
357+
358+
float sigma_to_t(float s) {
359+
return 0.25 * std::log(s);
360+
}
361+
362+
float sigma_min() {
363+
return min_sigma;
364+
}
365+
366+
float sigma_max() {
367+
return max_sigma;
368+
}
369+
};
370+
349371
float time_snr_shift(float alpha, float t) {
350372
if (alpha == 1.0f) {
351373
return t;
@@ -1019,7 +1041,7 @@ static void sample_k_diffusion(sample_method_t method,
10191041
// also needed to invert the behavior of CompVisDenoiser
10201042
// (k-diffusion's LMSDiscreteScheduler)
10211043
float beta_start = 0.00085f;
1022-
float beta_end = 0.0120f;
1044+
float beta_end = 0.0120f;
10231045
std::vector<double> alphas_cumprod;
10241046
std::vector<double> compvis_sigmas;
10251047

@@ -1030,8 +1052,9 @@ static void sample_k_diffusion(sample_method_t method,
10301052
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
10311053
(1.0f -
10321054
std::pow(sqrtf(beta_start) +
1033-
(sqrtf(beta_end) - sqrtf(beta_start)) *
1034-
((float)i / (TIMESTEPS - 1)), 2));
1055+
(sqrtf(beta_end) - sqrtf(beta_start)) *
1056+
((float)i / (TIMESTEPS - 1)),
1057+
2));
10351058
compvis_sigmas[i] =
10361059
std::sqrt((1 - alphas_cumprod[i]) /
10371060
alphas_cumprod[i]);
@@ -1061,7 +1084,8 @@ static void sample_k_diffusion(sample_method_t method,
10611084
// - pred_prev_sample -> "x_t-1"
10621085
int timestep =
10631086
roundf(TIMESTEPS -
1064-
i * ((float)TIMESTEPS / steps)) - 1;
1087+
i * ((float)TIMESTEPS / steps)) -
1088+
1;
10651089
// 1. get previous step value (=t-1)
10661090
int prev_timestep = timestep - TIMESTEPS / steps;
10671091
// The sigma here is chosen to cause the
@@ -1086,10 +1110,9 @@ static void sample_k_diffusion(sample_method_t method,
10861110
float* vec_x = (float*)x->data;
10871111
for (int j = 0; j < ggml_nelements(x); j++) {
10881112
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
1089-
sigma;
1113+
sigma;
10901114
}
1091-
}
1092-
else {
1115+
} else {
10931116
// For the subsequent steps after the first one,
10941117
// at this point x = latents or x = sample, and
10951118
// needs to be prescaled with x <- sample / c_in
@@ -1127,9 +1150,8 @@ static void sample_k_diffusion(sample_method_t method,
11271150
float alpha_prod_t = alphas_cumprod[timestep];
11281151
// Note final_alpha_cumprod = alphas_cumprod[0] due to
11291152
// trailing timestep spacing
1130-
float alpha_prod_t_prev = prev_timestep >= 0 ?
1131-
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
1132-
float beta_prod_t = 1 - alpha_prod_t;
1153+
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
1154+
float beta_prod_t = 1 - alpha_prod_t;
11331155
// 3. compute predicted original sample from predicted
11341156
// noise also called "predicted x_0" of formula (12)
11351157
// from https://arxiv.org/pdf/2010.02502.pdf
@@ -1145,7 +1167,7 @@ static void sample_k_diffusion(sample_method_t method,
11451167
vec_pred_original_sample[j] =
11461168
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
11471169
std::sqrt(beta_prod_t) *
1148-
vec_model_output[j]) *
1170+
vec_model_output[j]) *
11491171
(1 / std::sqrt(alpha_prod_t));
11501172
}
11511173
}
@@ -1159,8 +1181,8 @@ static void sample_k_diffusion(sample_method_t method,
11591181
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
11601182
// sqrt(1 - alpha_t/alpha_t-1)
11611183
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
1162-
float variance = (beta_prod_t_prev / beta_prod_t) *
1163-
(1 - alpha_prod_t / alpha_prod_t_prev);
1184+
float variance = (beta_prod_t_prev / beta_prod_t) *
1185+
(1 - alpha_prod_t / alpha_prod_t_prev);
11641186
float std_dev_t = eta * std::sqrt(variance);
11651187
// 6. compute "direction pointing to x_t" of formula
11661188
// (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1179,8 +1201,8 @@ static void sample_k_diffusion(sample_method_t method,
11791201
std::pow(std_dev_t, 2)) *
11801202
vec_model_output[j];
11811203
vec_x[j] = std::sqrt(alpha_prod_t_prev) *
1182-
vec_pred_original_sample[j] +
1183-
pred_sample_direction;
1204+
vec_pred_original_sample[j] +
1205+
pred_sample_direction;
11841206
}
11851207
}
11861208
if (eta > 0) {
@@ -1208,7 +1230,7 @@ static void sample_k_diffusion(sample_method_t method,
12081230
// by Semi-Linear Consistency Function with Trajectory
12091231
// Mapping", arXiv:2402.19159 [cs.CV]
12101232
float beta_start = 0.00085f;
1211-
float beta_end = 0.0120f;
1233+
float beta_end = 0.0120f;
12121234
std::vector<double> alphas_cumprod;
12131235
std::vector<double> compvis_sigmas;
12141236

@@ -1219,8 +1241,9 @@ static void sample_k_diffusion(sample_method_t method,
12191241
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
12201242
(1.0f -
12211243
std::pow(sqrtf(beta_start) +
1222-
(sqrtf(beta_end) - sqrtf(beta_start)) *
1223-
((float)i / (TIMESTEPS - 1)), 2));
1244+
(sqrtf(beta_end) - sqrtf(beta_start)) *
1245+
((float)i / (TIMESTEPS - 1)),
1246+
2));
12241247
compvis_sigmas[i] =
12251248
std::sqrt((1 - alphas_cumprod[i]) /
12261249
alphas_cumprod[i]);
@@ -1235,13 +1258,10 @@ static void sample_k_diffusion(sample_method_t method,
12351258
for (int i = 0; i < steps; i++) {
12361259
// Analytic form for TCD timesteps
12371260
int timestep = TIMESTEPS - 1 -
1238-
(TIMESTEPS / original_steps) *
1239-
(int)floor(i * ((float)original_steps / steps));
1261+
(TIMESTEPS / original_steps) *
1262+
(int)floor(i * ((float)original_steps / steps));
12401263
// 1. get previous step value
1241-
int prev_timestep = i >= steps - 1 ? 0 :
1242-
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1243-
(int)floor((i + 1) *
1244-
((float)original_steps / steps));
1264+
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
12451265
// Here timestep_s is tau_n' in Algorithm 4. The _s
12461266
// notation appears to be that from C. Lu,
12471267
// "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1258,10 +1278,9 @@ static void sample_k_diffusion(sample_method_t method,
12581278
float* vec_x = (float*)x->data;
12591279
for (int j = 0; j < ggml_nelements(x); j++) {
12601280
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
1261-
sigma;
1281+
sigma;
12621282
}
1263-
}
1264-
else {
1283+
} else {
12651284
float* vec_x = (float*)x->data;
12661285
for (int j = 0; j < ggml_nelements(x); j++) {
12671286
vec_x[j] *= std::sqrt(sigma * sigma + 1);
@@ -1294,15 +1313,14 @@ static void sample_k_diffusion(sample_method_t method,
12941313
// DPM-Solver. In fact, we have alpha_{t_n} =
12951314
// \sqrt{\hat{alpha_n}}, [...]"
12961315
float alpha_prod_t = alphas_cumprod[timestep];
1297-
float beta_prod_t = 1 - alpha_prod_t;
1316+
float beta_prod_t = 1 - alpha_prod_t;
12981317
// Note final_alpha_cumprod = alphas_cumprod[0] since
12991318
// TCD is always "trailing"
1300-
float alpha_prod_t_prev = prev_timestep >= 0 ?
1301-
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
1319+
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
13021320
// The subscript _s are the only portion in this
13031321
// section (2) unique to TCD
13041322
float alpha_prod_s = alphas_cumprod[timestep_s];
1305-
float beta_prod_s = 1 - alpha_prod_s;
1323+
float beta_prod_s = 1 - alpha_prod_s;
13061324
// 3. Compute the predicted noised sample x_s based on
13071325
// the model parameterization
13081326
//
@@ -1317,7 +1335,7 @@ static void sample_k_diffusion(sample_method_t method,
13171335
vec_pred_original_sample[j] =
13181336
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
13191337
std::sqrt(beta_prod_t) *
1320-
vec_model_output[j]) *
1338+
vec_model_output[j]) *
13211339
(1 / std::sqrt(alpha_prod_t));
13221340
}
13231341
}
@@ -1339,9 +1357,9 @@ static void sample_k_diffusion(sample_method_t method,
13391357
// pred_epsilon = model_output
13401358
vec_x[j] =
13411359
std::sqrt(alpha_prod_s) *
1342-
vec_pred_original_sample[j] +
1360+
vec_pred_original_sample[j] +
13431361
std::sqrt(beta_prod_s) *
1344-
vec_model_output[j];
1362+
vec_model_output[j];
13451363
}
13461364
}
13471365
// 4. Sample and inject noise z ~ N(0, I) for
@@ -1357,7 +1375,7 @@ static void sample_k_diffusion(sample_method_t method,
13571375
// In this case, x is still pred_noised_sample,
13581376
// continue in-place
13591377
ggml_tensor_set_f32_randn(noise, rng);
1360-
float* vec_x = (float*)x->data;
1378+
float* vec_x = (float*)x->data;
13611379
float* vec_noise = (float*)noise->data;
13621380
for (int j = 0; j < ggml_nelements(x); j++) {
13631381
// Corresponding to (35) in Zheng et
@@ -1366,10 +1384,10 @@ static void sample_k_diffusion(sample_method_t method,
13661384
vec_x[j] =
13671385
std::sqrt(alpha_prod_t_prev /
13681386
alpha_prod_s) *
1369-
vec_x[j] +
1387+
vec_x[j] +
13701388
std::sqrt(1 - alpha_prod_t_prev /
1371-
alpha_prod_s) *
1372-
vec_noise[j];
1389+
alpha_prod_s) *
1390+
vec_noise[j];
13731391
}
13741392
}
13751393
}

0 commit comments

Comments
 (0)