LibGfx+LibPDF: Simpler and faster N-D linear sampling

Previously, if we wanted to to e.g. do linear interpolation in 2-D, we'd get a sample point like (1.3, 4.4), then get 4 samples around it at (1, 4), (2, 4), (1, 5), (2, 5), then reduce the 4 samples to 2 samples by computing the combined samples `0.3 * f(1, 4) + 0.7 * f(2, 4)` and `0.3 * f(1, 5) + 0.8 * f(2, 5)`, and then 1-D linearly blending between these two samples with the factor 0.4. In the end we'd multiply the first value by 0.3 * 0.4, the second by 0.7 * 0.4, the third by 0.3 * 0.6, and the third by 0.7 * 0.6, and then sum them all up. This requires computing and storing 2**N samples, followed by another 2**N iterations to combine the 2**N sampls to a single value. (N is in practice either 4 or 3, so 2**N isn't super huge.) Instead, for every sample we can directly compute the product of weights and sum them up directly. This lets us omit the second loop and storing 2**N values, in exchange for doing an additional O(n) work to compute the product. Takes Build/lagom/bin/image --no-output --invert-cmyk \ --assign-color-profile \ Build/lagom/Root/res/icc/Adobe/CMYK/USWebCoatedSWOP.icc \ --convert-to-color-profile serenity-sRGB.icc \ cmyk.jpg form 3.42s to 3.08s on my machine, almost 10% faster (and less code). Here cmyk.jpg is a 2253x3080 cmyk jpeg, and USWebCoatedSWOP.icc is an mft2 profile with input tables with 256 samples and a 9x9x9x9 CLUT. The LibPDF change is covered by TEST_CASE(sampled) in LibPDF.cpp, and the LibGfx change is basically the same change as the one in LibPDF (where the test results don't change) and the output subjectively looks identical. So hopefully this causes indeed no behavior change :^)
Author: https://github.com/nico Commit: https://github.com/SerenityOS/serenity/commit/f562c470e2 Pull-request: https://github.com/SerenityOS/serenity/pull/23050
2024-02-02 12:05:12 -05:00 · 2024-02-02 12:05:12 -05:00 · f562c470e2 · 2024-07-17 21:26:19 +09:00
commit f562c470e2
parent c4780f4ee1
2 changed files with 16 additions and 20 deletions
--- a/Userland/Libraries/LibGfx/ICC/TagTypes.h
+++ b/Userland/Libraries/LibGfx/ICC/TagTypes.h
@ -46,23 +46,20 @@ inline FloatVector3 lerp_nd(Function<unsigned(size_t)> size, Function<FloatVecto
        factor.append(ec - left_index[i]);
    }

-    Vector<FloatVector3> samples;
-    samples.resize(1u << x.size());
+    FloatVector3 sample_output {};
    // The i'th bit of mask indicates if the i'th coordinate is rounded up or down.
    Vector<unsigned> coordinates;
    coordinates.resize(x.size());
    for (size_t mask = 0; mask < (1u << x.size()); ++mask) {
-        for (size_t i = 0; i < x.size(); ++i)
+        float sample_weight = 1.0f;
+        for (size_t i = 0; i < x.size(); ++i) {
            coordinates[i] = left_index[i] + ((mask >> i) & 1u);
-        samples[mask] = sample(coordinates);
+            sample_weight *= ((mask >> i) & 1u) ? factor[i] : 1.0f - factor[i];
+        }
+        sample_output += sample(coordinates) * sample_weight;
    }

-    for (int i = static_cast<int>(x.size() - 1); i >= 0; --i) {
-        for (size_t mask = 0; mask < (1u << i); ++mask)
-            samples[mask] = mix(samples[mask], samples[mask | (1u << i)], factor[i]);
-    }
-
-    return samples[0];
+    return sample_output;
 }

 using S15Fixed16 = FixedPoint<16, i32>;
--- a/Userland/Libraries/LibPDF/Function.cpp
+++ b/Userland/Libraries/LibPDF/Function.cpp
@ -206,23 +206,22 @@ PDFErrorOr<ReadonlySpan<float>> SampledFunction::evaluate(ReadonlySpan<float> xs
        // then 2 by interpolating along y, then 1 by interpolating along x.
        // So for the general case, we create 2**N samples, and then for each coordinate, we cut the number of samples in half
        // by interpolating along that coordinate.
-        Vector<float> samples;
-        samples.resize(1 << m_domain.size());
+        // Instead of storing all the 2**N samples, we can calculate the product of weights for each corner,
+        // and sum up the weighted samples.
+        float sample_output = 0;
        // The i'th bit of mask indicates if the i'th coordinate is rounded up or down.
        Vector<int> coordinates;
        coordinates.resize(m_domain.size());
        for (size_t mask = 0; mask < (1u << m_domain.size()); ++mask) {
-            for (size_t i = 0; i < m_domain.size(); ++i)
+            float sample_weight = 1.0f;
+            for (size_t i = 0; i < m_domain.size(); ++i) {
                coordinates[i] = m_left_index[i] + ((mask >> i) & 1u);
-            samples[mask] = sample(coordinates, r);
+                sample_weight *= ((mask >> i) & 1u) ? m_inputs[i] : (1.0f - m_inputs[i]);
+            }
+            sample_output += sample(coordinates, r) * sample_weight;
        }

-        for (int i = static_cast<int>(m_domain.size() - 1); i >= 0; --i) {
-            for (size_t mask = 0; mask < (1u << i); ++mask)
-                samples[mask] = mix(samples[mask], samples[mask | (1 << i)], m_inputs[i]);
-        }
-
-        float result = interpolate(samples[0], 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper);
+        float result = interpolate(sample_output, 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper);
        m_outputs[r] = clamp(result, m_range[r].lower, m_range[r].upper);
    }