@@ -168,24 +168,21 @@ struct AYSSchedule : SigmaSchedule {
168
168
std::vector<float > inputs;
169
169
std::vector<float > results (n + 1 );
170
170
171
- switch (version) {
172
- case VERSION_SD2: /* fallthrough */
173
- LOG_WARN (" AYS not designed for SD2.X models" );
174
- case VERSION_SD1:
175
- LOG_INFO (" AYS using SD1.5 noise levels" );
176
- inputs = noise_levels[0 ];
177
- break ;
178
- case VERSION_SDXL:
179
- LOG_INFO (" AYS using SDXL noise levels" );
180
- inputs = noise_levels[1 ];
181
- break ;
182
- case VERSION_SVD:
183
- LOG_INFO (" AYS using SVD noise levels" );
184
- inputs = noise_levels[2 ];
185
- break ;
186
- default :
187
- LOG_ERROR (" Version not compatable with AYS scheduler" );
188
- return results;
171
+ if (sd_version_is_sd2 ((SDVersion)version)) {
172
+ LOG_WARN (" AYS not designed for SD2.X models" );
173
+ } /* fallthrough */
174
+ else if (sd_version_is_sd1 ((SDVersion)version)) {
175
+ LOG_INFO (" AYS using SD1.5 noise levels" );
176
+ inputs = noise_levels[0 ];
177
+ } else if (sd_version_is_sdxl ((SDVersion)version)) {
178
+ LOG_INFO (" AYS using SDXL noise levels" );
179
+ inputs = noise_levels[1 ];
180
+ } else if (version == VERSION_SVD) {
181
+ LOG_INFO (" AYS using SVD noise levels" );
182
+ inputs = noise_levels[2 ];
183
+ } else {
184
+ LOG_ERROR (" Version not compatable with AYS scheduler" );
185
+ return results;
189
186
}
190
187
191
188
/* Stretches those pre-calculated reference levels out to the desired
@@ -346,6 +343,31 @@ struct CompVisVDenoiser : public CompVisDenoiser {
346
343
}
347
344
};
348
345
346
+ struct EDMVDenoiser : public CompVisVDenoiser {
347
+ float min_sigma = 0.002 ;
348
+ float max_sigma = 120.0 ;
349
+
350
+ EDMVDenoiser (float min_sigma = 0.002 , float max_sigma = 120.0 ) : min_sigma(min_sigma), max_sigma(max_sigma) {
351
+ schedule = std::make_shared<ExponentialSchedule>();
352
+ }
353
+
354
+ float t_to_sigma (float t) {
355
+ return std::exp (t * 4 /(float )TIMESTEPS);
356
+ }
357
+
358
+ float sigma_to_t (float s) {
359
+ return 0.25 * std::log (s);
360
+ }
361
+
362
+ float sigma_min () {
363
+ return min_sigma;
364
+ }
365
+
366
+ float sigma_max () {
367
+ return max_sigma;
368
+ }
369
+ };
370
+
349
371
float time_snr_shift (float alpha, float t) {
350
372
if (alpha == 1 .0f ) {
351
373
return t;
@@ -1019,7 +1041,7 @@ static void sample_k_diffusion(sample_method_t method,
1019
1041
// also needed to invert the behavior of CompVisDenoiser
1020
1042
// (k-diffusion's LMSDiscreteScheduler)
1021
1043
float beta_start = 0 .00085f ;
1022
- float beta_end = 0 .0120f ;
1044
+ float beta_end = 0 .0120f ;
1023
1045
std::vector<double > alphas_cumprod;
1024
1046
std::vector<double > compvis_sigmas;
1025
1047
@@ -1030,8 +1052,9 @@ static void sample_k_diffusion(sample_method_t method,
1030
1052
(i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
1031
1053
(1 .0f -
1032
1054
std::pow (sqrtf (beta_start) +
1033
- (sqrtf (beta_end) - sqrtf (beta_start)) *
1034
- ((float )i / (TIMESTEPS - 1 )), 2 ));
1055
+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1056
+ ((float )i / (TIMESTEPS - 1 )),
1057
+ 2 ));
1035
1058
compvis_sigmas[i] =
1036
1059
std::sqrt ((1 - alphas_cumprod[i]) /
1037
1060
alphas_cumprod[i]);
@@ -1061,7 +1084,8 @@ static void sample_k_diffusion(sample_method_t method,
1061
1084
// - pred_prev_sample -> "x_t-1"
1062
1085
int timestep =
1063
1086
roundf (TIMESTEPS -
1064
- i * ((float )TIMESTEPS / steps)) - 1 ;
1087
+ i * ((float )TIMESTEPS / steps)) -
1088
+ 1 ;
1065
1089
// 1. get previous step value (=t-1)
1066
1090
int prev_timestep = timestep - TIMESTEPS / steps;
1067
1091
// The sigma here is chosen to cause the
@@ -1086,10 +1110,9 @@ static void sample_k_diffusion(sample_method_t method,
1086
1110
float * vec_x = (float *)x->data ;
1087
1111
for (int j = 0 ; j < ggml_nelements (x); j++) {
1088
1112
vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1089
- sigma;
1113
+ sigma;
1090
1114
}
1091
- }
1092
- else {
1115
+ } else {
1093
1116
// For the subsequent steps after the first one,
1094
1117
// at this point x = latents or x = sample, and
1095
1118
// needs to be prescaled with x <- sample / c_in
@@ -1127,9 +1150,8 @@ static void sample_k_diffusion(sample_method_t method,
1127
1150
float alpha_prod_t = alphas_cumprod[timestep];
1128
1151
// Note final_alpha_cumprod = alphas_cumprod[0] due to
1129
1152
// trailing timestep spacing
1130
- float alpha_prod_t_prev = prev_timestep >= 0 ?
1131
- alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1132
- float beta_prod_t = 1 - alpha_prod_t ;
1153
+ float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1154
+ float beta_prod_t = 1 - alpha_prod_t ;
1133
1155
// 3. compute predicted original sample from predicted
1134
1156
// noise also called "predicted x_0" of formula (12)
1135
1157
// from https://arxiv.org/pdf/2010.02502.pdf
@@ -1145,7 +1167,7 @@ static void sample_k_diffusion(sample_method_t method,
1145
1167
vec_pred_original_sample[j] =
1146
1168
(vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
1147
1169
std::sqrt (beta_prod_t ) *
1148
- vec_model_output[j]) *
1170
+ vec_model_output[j]) *
1149
1171
(1 / std::sqrt (alpha_prod_t ));
1150
1172
}
1151
1173
}
@@ -1159,8 +1181,8 @@ static void sample_k_diffusion(sample_method_t method,
1159
1181
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
1160
1182
// sqrt(1 - alpha_t/alpha_t-1)
1161
1183
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
1162
- float variance = (beta_prod_t_prev / beta_prod_t ) *
1163
- (1 - alpha_prod_t / alpha_prod_t_prev);
1184
+ float variance = (beta_prod_t_prev / beta_prod_t ) *
1185
+ (1 - alpha_prod_t / alpha_prod_t_prev);
1164
1186
float std_dev_t = eta * std::sqrt (variance);
1165
1187
// 6. compute "direction pointing to x_t" of formula
1166
1188
// (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1179,8 +1201,8 @@ static void sample_k_diffusion(sample_method_t method,
1179
1201
std::pow (std_dev_t , 2 )) *
1180
1202
vec_model_output[j];
1181
1203
vec_x[j] = std::sqrt (alpha_prod_t_prev) *
1182
- vec_pred_original_sample[j] +
1183
- pred_sample_direction;
1204
+ vec_pred_original_sample[j] +
1205
+ pred_sample_direction;
1184
1206
}
1185
1207
}
1186
1208
if (eta > 0 ) {
@@ -1208,7 +1230,7 @@ static void sample_k_diffusion(sample_method_t method,
1208
1230
// by Semi-Linear Consistency Function with Trajectory
1209
1231
// Mapping", arXiv:2402.19159 [cs.CV]
1210
1232
float beta_start = 0 .00085f ;
1211
- float beta_end = 0 .0120f ;
1233
+ float beta_end = 0 .0120f ;
1212
1234
std::vector<double > alphas_cumprod;
1213
1235
std::vector<double > compvis_sigmas;
1214
1236
@@ -1219,8 +1241,9 @@ static void sample_k_diffusion(sample_method_t method,
1219
1241
(i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
1220
1242
(1 .0f -
1221
1243
std::pow (sqrtf (beta_start) +
1222
- (sqrtf (beta_end) - sqrtf (beta_start)) *
1223
- ((float )i / (TIMESTEPS - 1 )), 2 ));
1244
+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1245
+ ((float )i / (TIMESTEPS - 1 )),
1246
+ 2 ));
1224
1247
compvis_sigmas[i] =
1225
1248
std::sqrt ((1 - alphas_cumprod[i]) /
1226
1249
alphas_cumprod[i]);
@@ -1235,13 +1258,10 @@ static void sample_k_diffusion(sample_method_t method,
1235
1258
for (int i = 0 ; i < steps; i++) {
1236
1259
// Analytic form for TCD timesteps
1237
1260
int timestep = TIMESTEPS - 1 -
1238
- (TIMESTEPS / original_steps) *
1239
- (int )floor (i * ((float )original_steps / steps));
1261
+ (TIMESTEPS / original_steps) *
1262
+ (int )floor (i * ((float )original_steps / steps));
1240
1263
// 1. get previous step value
1241
- int prev_timestep = i >= steps - 1 ? 0 :
1242
- TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1243
- (int )floor ((i + 1 ) *
1244
- ((float )original_steps / steps));
1264
+ int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int )floor ((i + 1 ) * ((float )original_steps / steps));
1245
1265
// Here timestep_s is tau_n' in Algorithm 4. The _s
1246
1266
// notation appears to be that from C. Lu,
1247
1267
// "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1258,10 +1278,9 @@ static void sample_k_diffusion(sample_method_t method,
1258
1278
float * vec_x = (float *)x->data ;
1259
1279
for (int j = 0 ; j < ggml_nelements (x); j++) {
1260
1280
vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1261
- sigma;
1281
+ sigma;
1262
1282
}
1263
- }
1264
- else {
1283
+ } else {
1265
1284
float * vec_x = (float *)x->data ;
1266
1285
for (int j = 0 ; j < ggml_nelements (x); j++) {
1267
1286
vec_x[j] *= std::sqrt (sigma * sigma + 1 );
@@ -1294,15 +1313,14 @@ static void sample_k_diffusion(sample_method_t method,
1294
1313
// DPM-Solver. In fact, we have alpha_{t_n} =
1295
1314
// \sqrt{\hat{alpha_n}}, [...]"
1296
1315
float alpha_prod_t = alphas_cumprod[timestep];
1297
- float beta_prod_t = 1 - alpha_prod_t ;
1316
+ float beta_prod_t = 1 - alpha_prod_t ;
1298
1317
// Note final_alpha_cumprod = alphas_cumprod[0] since
1299
1318
// TCD is always "trailing"
1300
- float alpha_prod_t_prev = prev_timestep >= 0 ?
1301
- alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1319
+ float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1302
1320
// The subscript _s are the only portion in this
1303
1321
// section (2) unique to TCD
1304
1322
float alpha_prod_s = alphas_cumprod[timestep_s];
1305
- float beta_prod_s = 1 - alpha_prod_s;
1323
+ float beta_prod_s = 1 - alpha_prod_s;
1306
1324
// 3. Compute the predicted noised sample x_s based on
1307
1325
// the model parameterization
1308
1326
//
@@ -1317,7 +1335,7 @@ static void sample_k_diffusion(sample_method_t method,
1317
1335
vec_pred_original_sample[j] =
1318
1336
(vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
1319
1337
std::sqrt (beta_prod_t ) *
1320
- vec_model_output[j]) *
1338
+ vec_model_output[j]) *
1321
1339
(1 / std::sqrt (alpha_prod_t ));
1322
1340
}
1323
1341
}
@@ -1339,9 +1357,9 @@ static void sample_k_diffusion(sample_method_t method,
1339
1357
// pred_epsilon = model_output
1340
1358
vec_x[j] =
1341
1359
std::sqrt (alpha_prod_s) *
1342
- vec_pred_original_sample[j] +
1360
+ vec_pred_original_sample[j] +
1343
1361
std::sqrt (beta_prod_s) *
1344
- vec_model_output[j];
1362
+ vec_model_output[j];
1345
1363
}
1346
1364
}
1347
1365
// 4. Sample and inject noise z ~ N(0, I) for
@@ -1357,7 +1375,7 @@ static void sample_k_diffusion(sample_method_t method,
1357
1375
// In this case, x is still pred_noised_sample,
1358
1376
// continue in-place
1359
1377
ggml_tensor_set_f32_randn (noise, rng);
1360
- float * vec_x = (float *)x->data ;
1378
+ float * vec_x = (float *)x->data ;
1361
1379
float * vec_noise = (float *)noise->data ;
1362
1380
for (int j = 0 ; j < ggml_nelements (x); j++) {
1363
1381
// Corresponding to (35) in Zheng et
@@ -1366,10 +1384,10 @@ static void sample_k_diffusion(sample_method_t method,
1366
1384
vec_x[j] =
1367
1385
std::sqrt (alpha_prod_t_prev /
1368
1386
alpha_prod_s) *
1369
- vec_x[j] +
1387
+ vec_x[j] +
1370
1388
std::sqrt (1 - alpha_prod_t_prev /
1371
- alpha_prod_s) *
1372
- vec_noise[j];
1389
+ alpha_prod_s) *
1390
+ vec_noise[j];
1373
1391
}
1374
1392
}
1375
1393
}
0 commit comments