24 constexpr
float kInv255 = 1.0f / 255.0f;
26 static inline float clamp01(
float value) {
27 return std::max(0.0f, std::min(1.0f, value));
30 static inline int clampByte(
float value) {
35 return static_cast<int>(value + 0.5f);
38 static inline float luma(
float r,
float g,
float b) {
39 return (0.299f * r) + (0.587f * g) + (0.114f * b);
42 static inline float smoothstep(
float edge0,
float edge1,
float value) {
44 return value < edge0 ? 0.0f : 1.0f;
45 const float t = clamp01((value - edge0) / (edge1 - edge0));
46 return t * t * (3.0f - (2.0f * t));
49 static inline float lerp(
float a,
float b,
float amount) {
50 return a + ((b - a) * amount);
53 static std::array<float, 256> build_response_lut(
const AnimatedCurve& curve, int64_t frame_number) {
54 std::array<float, 256> lut{};
61 for (
size_t i = 0; i < lut.size(); ++i)
62 lut[i] = clamp01(
static_cast<float>(sampled_curve.
GetValue(
static_cast<int64_t
>(i))));
66 struct DenoisePixelLut {
67 std::array<float, 256> fine_threshold{};
68 std::array<float, 256> mid_threshold{};
69 std::array<float, 256> fine_keep{};
70 std::array<float, 256> mid_keep{};
71 std::array<float, 256> chroma_fine_threshold_rb{};
72 std::array<float, 256> chroma_fine_threshold_g{};
73 std::array<float, 256> chroma_mid_threshold{};
74 std::array<float, 256> chroma_fine_keep{};
75 std::array<float, 256> chroma_mid_keep{};
76 std::array<float, 256> temporal_mix_base{};
79 static DenoisePixelLut build_pixel_lut(
const std::array<float, 256>& response_lut,
80 float strength_value,
float detail_value,
81 float temporal_value,
float color_noise_value) {
83 const float strength_overdrive = strength_value * (1.0f + (0.85f * strength_value * strength_value));
84 const float detail_structural_keep = detail_value * (0.62f + (0.34f * detail_value));
85 const float detail_fine_keep = detail_value * detail_value * detail_value * 0.35f;
86 const float inverse_detail = 1.0f - detail_value;
87 const float fine_threshold_scale = 24.0f + (inverse_detail * 42.0f);
88 const float mid_threshold_scale = 9.0f + (inverse_detail * 20.0f);
89 const float fine_keep_scale = 0.95f + (inverse_detail * 0.24f);
90 const float mid_keep_scale = 0.46f + (inverse_detail * 0.22f);
91 const float chroma_scale_rb = 0.75f + (color_noise_value * 0.55f);
92 const float chroma_scale_g = 0.80f + (color_noise_value * 0.45f);
93 const float chroma_mid_scale = 0.85f + (color_noise_value * 0.45f);
94 const float temporal_scale = temporal_value * strength_value;
96 for (
size_t i = 0; i < response_lut.size(); ++i) {
97 const float response = response_lut[i];
98 const float noise_amount = strength_overdrive * response;
99 const float fine_threshold = 4.0f + (noise_amount * fine_threshold_scale);
100 const float mid_threshold = 2.0f + (noise_amount * mid_threshold_scale);
101 const float fine_keep = std::max(detail_fine_keep, 1.0f - (noise_amount * fine_keep_scale));
102 const float mid_keep = std::max(detail_structural_keep, 1.0f - (noise_amount * mid_keep_scale));
104 lut.fine_threshold[i] = fine_threshold;
105 lut.mid_threshold[i] = mid_threshold;
106 lut.fine_keep[i] = fine_keep;
107 lut.mid_keep[i] = mid_keep;
108 lut.chroma_fine_threshold_rb[i] = fine_threshold * chroma_scale_rb;
109 lut.chroma_fine_threshold_g[i] = fine_threshold * chroma_scale_g;
110 lut.chroma_mid_threshold[i] = mid_threshold * chroma_mid_scale;
111 lut.chroma_fine_keep[i] = std::max(0.0f, fine_keep - (color_noise_value * noise_amount * 0.55f));
112 lut.chroma_mid_keep[i] = std::max(0.0f, mid_keep - (color_noise_value * noise_amount * 0.25f));
113 lut.temporal_mix_base[i] = temporal_scale * response;
118 struct DenoiseScratch {
120 std::unique_ptr<float[]> fine_r;
121 std::unique_ptr<float[]> fine_g;
122 std::unique_ptr<float[]> fine_b;
123 std::unique_ptr<float[]> base_r;
124 std::unique_ptr<float[]> base_g;
125 std::unique_ptr<float[]> base_b;
126 std::unique_ptr<float[]> tmp_r;
127 std::unique_ptr<float[]> tmp_g;
128 std::unique_ptr<float[]> tmp_b;
130 void ensure(
int pixel_count) {
131 if (pixel_count <= capacity)
133 capacity = pixel_count;
134 fine_r.reset(
new float[capacity]);
135 fine_g.reset(
new float[capacity]);
136 fine_b.reset(
new float[capacity]);
137 base_r.reset(
new float[capacity]);
138 base_g.reset(
new float[capacity]);
139 base_b.reset(
new float[capacity]);
140 tmp_r.reset(
new float[capacity]);
141 tmp_g.reset(
new float[capacity]);
142 tmp_b.reset(
new float[capacity]);
146 static inline void read_unpremultiplied_rgb(
const uchar* px,
float& r,
float& g,
float& b) {
148 r =
static_cast<float>(px[0]);
149 g =
static_cast<float>(px[1]);
150 b =
static_cast<float>(px[2]);
152 const float alpha =
static_cast<float>(px[3]);
153 const float unpremultiply = alpha > 0.0f ? 255.0f / alpha : 0.0f;
154 r =
static_cast<float>(px[0]) * unpremultiply;
155 g =
static_cast<float>(px[1]) * unpremultiply;
156 b =
static_cast<float>(px[2]) * unpremultiply;
160 static void box_blur_image_rgb(
const uchar* input_pixels,
int input_stride,
161 float* dst_r,
float* dst_g,
float* dst_b,
162 float* tmp_r,
float* tmp_g,
float* tmp_b,
163 int width,
int height,
int radius) {
164 const int window = (radius * 2) + 1;
165 const float inv_window = 1.0f /
static_cast<float>(window);
167 #pragma omp parallel for if(width * height >= 16384) schedule(static)
168 for (
int y = 0; y < height; ++y) {
169 const int row = y * width;
170 const uchar* row_pixels = input_pixels + (y * input_stride);
171 float first_r, first_g, first_b;
172 read_unpremultiplied_rgb(row_pixels, first_r, first_g, first_b);
173 float sum_r = first_r *
static_cast<float>(radius + 1);
174 float sum_g = first_g *
static_cast<float>(radius + 1);
175 float sum_b = first_b *
static_cast<float>(radius + 1);
176 for (
int i = 1; i <= radius; ++i) {
177 float sample_r, sample_g, sample_b;
178 read_unpremultiplied_rgb(row_pixels + (std::min(i, width - 1) * 4), sample_r, sample_g, sample_b);
184 for (
int x = 0; x < width; ++x) {
185 const int idx = row + x;
186 tmp_r[idx] = sum_r * inv_window;
187 tmp_g[idx] = sum_g * inv_window;
188 tmp_b[idx] = sum_b * inv_window;
190 float add_r, add_g, add_b;
191 float sub_r, sub_g, sub_b;
192 read_unpremultiplied_rgb(row_pixels + (std::min(x + radius + 1, width - 1) * 4), add_r, add_g, add_b);
193 read_unpremultiplied_rgb(row_pixels + (std::max(x - radius, 0) * 4), sub_r, sub_g, sub_b);
194 sum_r += add_r - sub_r;
195 sum_g += add_g - sub_g;
196 sum_b += add_b - sub_b;
200 #pragma omp parallel for if(width * height >= 16384) schedule(static)
201 for (
int x = 0; x < width; ++x) {
202 float sum_r = tmp_r[x] *
static_cast<float>(radius + 1);
203 float sum_g = tmp_g[x] *
static_cast<float>(radius + 1);
204 float sum_b = tmp_b[x] *
static_cast<float>(radius + 1);
205 for (
int i = 1; i <= radius; ++i) {
206 const int idx = std::min(i, height - 1) * width + x;
212 for (
int y = 0; y < height; ++y) {
213 const int idx = y * width + x;
214 dst_r[idx] = sum_r * inv_window;
215 dst_g[idx] = sum_g * inv_window;
216 dst_b[idx] = sum_b * inv_window;
217 const int add_idx = std::min(y + radius + 1, height - 1) * width + x;
218 const int sub_idx = std::max(y - radius, 0) * width + x;
219 sum_r += tmp_r[add_idx] - tmp_r[sub_idx];
220 sum_g += tmp_g[add_idx] - tmp_g[sub_idx];
221 sum_b += tmp_b[add_idx] - tmp_b[sub_idx];
226 static void box_blur_rgb(
const float* src_r,
const float* src_g,
const float* src_b,
227 float* dst_r,
float* dst_g,
float* dst_b,
228 float* tmp_r,
float* tmp_g,
float* tmp_b,
229 int width,
int height,
int radius) {
230 const int pixel_count = width * height;
232 std::copy(src_r, src_r + pixel_count, dst_r);
233 std::copy(src_g, src_g + pixel_count, dst_g);
234 std::copy(src_b, src_b + pixel_count, dst_b);
238 const int window = (radius * 2) + 1;
239 const float inv_window = 1.0f /
static_cast<float>(window);
241 #pragma omp parallel for if(width * height >= 16384) schedule(static)
242 for (
int y = 0; y < height; ++y) {
243 const int row = y * width;
244 float sum_r = src_r[row] *
static_cast<float>(radius + 1);
245 float sum_g = src_g[row] *
static_cast<float>(radius + 1);
246 float sum_b = src_b[row] *
static_cast<float>(radius + 1);
247 for (
int i = 1; i <= radius; ++i) {
248 const int idx = row + std::min(i, width - 1);
254 for (
int x = 0; x < width; ++x) {
255 const int idx = row + x;
256 tmp_r[idx] = sum_r * inv_window;
257 tmp_g[idx] = sum_g * inv_window;
258 tmp_b[idx] = sum_b * inv_window;
259 const int add_idx = row + std::min(x + radius + 1, width - 1);
260 const int sub_idx = row + std::max(x - radius, 0);
261 sum_r += src_r[add_idx] - src_r[sub_idx];
262 sum_g += src_g[add_idx] - src_g[sub_idx];
263 sum_b += src_b[add_idx] - src_b[sub_idx];
267 #pragma omp parallel for if(width * height >= 16384) schedule(static)
268 for (
int x = 0; x < width; ++x) {
269 float sum_r = tmp_r[x] *
static_cast<float>(radius + 1);
270 float sum_g = tmp_g[x] *
static_cast<float>(radius + 1);
271 float sum_b = tmp_b[x] *
static_cast<float>(radius + 1);
272 for (
int i = 1; i <= radius; ++i) {
273 const int idx = std::min(i, height - 1) * width + x;
279 for (
int y = 0; y < height; ++y) {
280 const int idx = y * width + x;
281 dst_r[idx] = sum_r * inv_window;
282 dst_g[idx] = sum_g * inv_window;
283 dst_b[idx] = sum_b * inv_window;
284 const int add_idx = std::min(y + radius + 1, height - 1) * width + x;
285 const int sub_idx = std::max(y - radius, 0) * width + x;
286 sum_r += tmp_r[add_idx] - tmp_r[sub_idx];
287 sum_g += tmp_g[add_idx] - tmp_g[sub_idx];
288 sum_b += tmp_b[add_idx] - tmp_b[sub_idx];
293 static inline float shrink_detail(
float value,
float threshold,
float keep) {
294 const float magnitude = std::abs(value);
295 if (magnitude <= threshold)
297 const float sign = value < 0.0f ? -1.0f : 1.0f;
298 return sign * (threshold * keep + (magnitude - threshold));
314 init_effect_details();
328 void DenoiseImage::init_effect_details()
333 info.
description =
"Reduces visible grain and color speckles in video frames.";
338 void DenoiseImage::reset_temporal_history()
340 previous_input_ = QImage();
346 std::shared_ptr<QImage> frame_image = frame->GetImage();
347 if (!frame_image || frame_image->isNull()) {
348 reset_temporal_history();
352 const float strength_value = clamp01(
static_cast<float>(
strength.
GetValue(frame_number)));
353 if (strength_value <= 0.0f) {
354 reset_temporal_history();
358 if (frame_image->format() != QImage::Format_RGBA8888_Premultiplied)
359 *frame_image = frame_image->convertToFormat(QImage::Format_RGBA8888_Premultiplied);
361 const int width = frame_image->width();
362 const int height = frame_image->height();
363 if (width <= 0 || height <= 0) {
364 reset_temporal_history();
368 const float detail_value = clamp01(
static_cast<float>(
detail.
GetValue(frame_number)));
369 const float temporal_value = clamp01(
static_cast<float>(
temporal.
GetValue(frame_number)));
371 const float color_noise_value = clamp01(
static_cast<float>(
color_noise.
GetValue(frame_number)));
374 const QImage* input_image = frame_image.get();
375 if (temporal_value > 0.0f) {
376 input_copy = frame_image->copy();
377 input_image = &input_copy;
379 reset_temporal_history();
381 const bool temporal_valid =
382 temporal_value > 0.0f &&
384 frame_number == last_frame_ + 1 &&
385 !previous_input_.isNull() &&
386 previous_input_.width() == width &&
387 previous_input_.height() == height &&
388 previous_input_.format() == QImage::Format_RGBA8888_Premultiplied;
390 const std::array<float, 256> response_lut = build_response_lut(
response_curve, frame_number);
391 QImage output(width, height, QImage::Format_RGBA8888_Premultiplied);
393 const uchar* prev_pixels = temporal_valid ? previous_input_.constBits() :
nullptr;
394 const uchar* input_pixels = input_image->constBits();
395 uchar* output_pixels = output.bits();
396 const int input_stride = input_image->bytesPerLine();
397 const int output_stride = output.bytesPerLine();
398 const int prev_stride = temporal_valid ? previous_input_.bytesPerLine() : 0;
399 const int pixel_count = width * height;
400 const DenoisePixelLut pixel_lut = build_pixel_lut(response_lut, strength_value, detail_value, temporal_value, color_noise_value);
401 const float inverse_detail = 1.0f - detail_value;
402 const float temporal_gate_scale = 0.45f + (0.55f * inverse_detail);
403 const float temporal_luma_scale = 0.32f + (inverse_detail * 0.28f);
404 const float temporal_chroma_scale = 0.65f + (color_noise_value * 0.35f);
405 const float motion_low = 0.020f + ((1.0f - motion_safety_value) * 0.080f);
406 const float motion_high = motion_low + 0.055f + ((1.0f - motion_safety_value) * 0.100f);
409 thread_local DenoiseScratch scratch;
410 scratch.ensure(pixel_count);
411 float* fine_r = scratch.fine_r.get();
412 float* fine_g = scratch.fine_g.get();
413 float* fine_b = scratch.fine_b.get();
414 float* base_r = scratch.base_r.get();
415 float* base_g = scratch.base_g.get();
416 float* base_b = scratch.base_b.get();
417 float* tmp_r = scratch.tmp_r.get();
418 float* tmp_g = scratch.tmp_g.get();
419 float* tmp_b = scratch.tmp_b.get();
421 const int fine_radius = 1 + (strength_value >= 0.75f && detail_value <= 0.45f ? 1 : 0);
422 int coarse_radius = 3;
423 if (strength_value >= 0.95f && detail_value <= 0.15f)
425 else if (strength_value >= 0.80f && detail_value <= 0.35f)
427 else if (strength_value >= 0.65f && detail_value <= 0.55f)
430 box_blur_image_rgb(input_pixels, input_stride, fine_r, fine_g, fine_b, tmp_r, tmp_g, tmp_b, width, height, fine_radius);
431 box_blur_rgb(fine_r, fine_g, fine_b, base_r, base_g, base_b, tmp_r, tmp_g, tmp_b, width, height, coarse_radius);
433 #pragma omp parallel for if(width * height >= 16384) schedule(static)
434 for (
int y = 0; y < height; ++y) {
435 for (
int x = 0; x < width; ++x) {
436 const int p = (y * width) + x;
437 const uchar* input_pixel = input_pixels + (y * input_stride) + (x * 4);
438 float center_r, center_g, center_b;
439 read_unpremultiplied_rgb(input_pixel, center_r, center_g, center_b);
440 const float center_y = luma(center_r, center_g, center_b);
441 const float fine_y_value = luma(fine_r[p], fine_g[p], fine_b[p]);
442 const float base_y_value = luma(base_r[p], base_g[p], base_b[p]);
443 const float fine_detail_y = center_y - fine_y_value;
444 const float mid_detail_y = fine_y_value - base_y_value;
446 const int response_index = clampByte(center_y);
447 const float fine_threshold = pixel_lut.fine_threshold[response_index];
448 const float mid_threshold = pixel_lut.mid_threshold[response_index];
449 const float fine_keep = pixel_lut.fine_keep[response_index];
450 const float mid_keep = pixel_lut.mid_keep[response_index];
451 const float chroma_mid_threshold = pixel_lut.chroma_mid_threshold[response_index];
452 const float chroma_fine_keep = pixel_lut.chroma_fine_keep[response_index];
453 const float chroma_mid_keep = pixel_lut.chroma_mid_keep[response_index];
455 float out_y = base_y_value +
456 shrink_detail(mid_detail_y, mid_threshold, mid_keep) +
457 shrink_detail(fine_detail_y, fine_threshold, fine_keep);
459 const float fine_detail_r = center_r - fine_r[p];
460 const float mid_detail_r = fine_r[p] - base_r[p];
461 const float fine_detail_g = center_g - fine_g[p];
462 const float mid_detail_g = fine_g[p] - base_g[p];
463 const float fine_detail_b = center_b - fine_b[p];
464 const float mid_detail_b = fine_b[p] - base_b[p];
465 float out_r = base_r[p] +
466 shrink_detail(mid_detail_r, chroma_mid_threshold, chroma_mid_keep) +
467 shrink_detail(fine_detail_r, pixel_lut.chroma_fine_threshold_rb[response_index], chroma_fine_keep);
468 float out_g = base_g[p] +
469 shrink_detail(mid_detail_g, chroma_mid_threshold, chroma_mid_keep) +
470 shrink_detail(fine_detail_g, pixel_lut.chroma_fine_threshold_g[response_index], chroma_fine_keep);
471 float out_b = base_b[p] +
472 shrink_detail(mid_detail_b, chroma_mid_threshold, chroma_mid_keep) +
473 shrink_detail(fine_detail_b, pixel_lut.chroma_fine_threshold_rb[response_index], chroma_fine_keep);
475 const float reconstructed_y = luma(out_r, out_g, out_b);
476 out_r += out_y - reconstructed_y;
477 out_g += out_y - reconstructed_y;
478 out_b += out_y - reconstructed_y;
480 if (temporal_valid) {
481 const uchar* prev_pixel = prev_pixels + (y * prev_stride) + (x * 4);
485 if (prev_pixel[3] == 255) {
486 prev_r =
static_cast<float>(prev_pixel[0]);
487 prev_g =
static_cast<float>(prev_pixel[1]);
488 prev_b =
static_cast<float>(prev_pixel[2]);
490 const float prev_alpha =
static_cast<float>(prev_pixel[3]);
491 const float prev_unpremultiply = prev_alpha > 0.0f ? 255.0f / prev_alpha : 0.0f;
492 prev_r =
static_cast<float>(prev_pixel[0]) * prev_unpremultiply;
493 prev_g =
static_cast<float>(prev_pixel[1]) * prev_unpremultiply;
494 prev_b =
static_cast<float>(prev_pixel[2]) * prev_unpremultiply;
496 const float prev_y = luma(prev_r, prev_g, prev_b);
498 const float luma_motion = std::abs(center_y - prev_y) * kInv255;
499 const float color_motion = (
500 std::abs(center_r - prev_r) +
501 std::abs(center_g - prev_g) +
502 std::abs(center_b - prev_b)) * (kInv255 / 3.0f);
503 const float motion = std::max(luma_motion, color_motion * 0.75f);
504 const float motion_gate = 1.0f - smoothstep(motion_low, motion_high, motion);
505 const float temporal_mix = std::min(0.35f, pixel_lut.temporal_mix_base[response_index] * motion_gate * temporal_gate_scale);
507 const float temporal_luma_mix = temporal_mix * temporal_luma_scale;
508 const float temporal_chroma_mix = temporal_mix * temporal_chroma_scale;
510 const float out_chroma_r = out_r - out_y;
511 const float out_chroma_g = out_g - out_y;
512 const float out_chroma_b = out_b - out_y;
513 out_y = lerp(out_y, prev_y, temporal_luma_mix);
514 out_r = out_y + lerp(out_chroma_r, prev_r - prev_y, temporal_chroma_mix);
515 out_g = out_y + lerp(out_chroma_g, prev_g - prev_y, temporal_chroma_mix);
516 out_b = out_y + lerp(out_chroma_b, prev_b - prev_y, temporal_chroma_mix);
519 uchar* output_pixel = output_pixels + (y * output_stride) + (x * 4);
520 if (input_pixel[3] == 255) {
521 output_pixel[0] =
static_cast<uchar
>(clampByte(out_r));
522 output_pixel[1] =
static_cast<uchar
>(clampByte(out_g));
523 output_pixel[2] =
static_cast<uchar
>(clampByte(out_b));
525 const float alpha =
static_cast<float>(input_pixel[3]);
526 const float premultiply = alpha * kInv255;
527 output_pixel[0] =
static_cast<uchar
>(clampByte(out_r * premultiply));
528 output_pixel[1] =
static_cast<uchar
>(clampByte(out_g * premultiply));
529 output_pixel[2] =
static_cast<uchar
>(clampByte(out_b * premultiply));
531 output_pixel[3] = input_pixel[3];
535 *frame_image = output;
536 if (temporal_value > 0.0f)
537 previous_input_ = input_copy;
538 last_frame_ = frame_number;
564 }
catch (
const std::exception&) {
565 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
572 if (!root[
"strength"].isNull())
574 if (!root[
"detail"].isNull())
576 if (!root[
"temporal"].isNull())
578 if (!root[
"motion_safety"].isNull())
580 if (!root[
"color_noise"].isNull())
582 if (!root[
"response_curve"].isNull())
584 reset_temporal_history();
597 root[
"response_curve"][
"channel"] =
"response";
599 return root.toStyledString();