From 796bc56a13adf74fec6e5718f3f1792c25067783 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Thu, 21 Mar 2019 16:55:31 +0100 Subject: [PATCH] PNGLoader: Reduce unfiltering branchiness even more. Use a dummy scanline for y=0 filled with all zeroes to avoid having to check y on every iteration before grabbing color data from scanline[y - 1]. --- SharedGraphics/PNGLoader.cpp | 39 ++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/SharedGraphics/PNGLoader.cpp b/SharedGraphics/PNGLoader.cpp index 204c6959967..1fb246e8894 100644 --- a/SharedGraphics/PNGLoader.cpp +++ b/SharedGraphics/PNGLoader.cpp @@ -162,8 +162,9 @@ union [[gnu::packed]] Pixel { static_assert(sizeof(Pixel) == 4); template -[[gnu::always_inline]] static inline void unfilter_impl(const GraphicsBitmap& bitmap, int y) +[[gnu::always_inline]] static inline void unfilter_impl(const GraphicsBitmap& bitmap, int y, const void* dummy_scanline_data) { + auto* dummy_scanline = (const Pixel*)dummy_scanline_data; if constexpr (filter_type == 1) { auto* pixels = (Pixel*)bitmap.scanline(y); for (int i = 0; i < bitmap.width(); ++i) { @@ -181,11 +182,12 @@ template } if constexpr (filter_type == 2) { auto* pixels = (Pixel*)bitmap.scanline(y); + auto* pixels_y_minus_1 = y == 0 ? dummy_scanline : (Pixel*)bitmap.scanline(y - 1); for (int i = 0; i < bitmap.width(); ++i) { auto& x = pixels[i]; swap(x.r, x.b); Pixel b; - if (y != 0) b.rgba = bitmap.scanline(y - 1)[i]; + b.rgba = pixels_y_minus_1[i].rgba; x.r += b.r; x.g += b.g; x.b += b.b; @@ -197,13 +199,14 @@ template if constexpr (filter_type == 3) { auto* pixels = (Pixel*)bitmap.scanline(y); + auto* pixels_y_minus_1 = y == 0 ? dummy_scanline : (Pixel*)bitmap.scanline(y - 1); for (int i = 0; i < bitmap.width(); ++i) { auto& x = pixels[i]; swap(x.r, x.b); Pixel a; Pixel b; if (i != 0) a.rgba = bitmap.scanline(y)[i - 1]; - if (y != 0) b.rgba = bitmap.scanline(y - 1)[i]; + b.rgba = pixels_y_minus_1[i].rgba; x.r = x.r + ((a.r + b.r) / 2); x.g = x.g + ((a.g + b.g) / 2); x.b = x.b + ((a.b + b.b) / 2); @@ -215,15 +218,18 @@ template if constexpr (filter_type == 4) { auto* pixels = (Pixel*)bitmap.scanline(y); + auto* pixels_y_minus_1 = y == 0 ? dummy_scanline : (Pixel*)bitmap.scanline(y - 1); for (int i = 0; i < bitmap.width(); ++i) { auto& x = pixels[i]; swap(x.r, x.b); Pixel a; Pixel b; Pixel c; - if (i != 0) a.rgba = bitmap.scanline(y)[i - 1]; - if (y != 0) b.rgba = bitmap.scanline(y - 1)[i]; - if (y != 0 && i != 0) c.rgba = bitmap.scanline(y - 1)[i - 1]; + if (i != 0) { + a.rgba = bitmap.scanline(y)[i - 1]; + c.rgba = pixels_y_minus_1[i - 1].rgba; + } + b.rgba = pixels_y_minus_1[i].rgba; x.r += paeth_predictor(a.r, b.r, c.r); x.g += paeth_predictor(a.g, b.g, c.g); x.b += paeth_predictor(a.b, b.b, c.b); @@ -233,9 +239,6 @@ template } } - - - [[gnu::noinline]] static void unfilter(PNGLoadingContext& context) { { @@ -266,6 +269,8 @@ template } } + auto dummy_scanline = ByteBuffer::create_zeroed(context.width * sizeof(RGBA32)); + Stopwatch sw("load_png_impl: unfilter: process"); for (int y = 0; y < context.height; ++y) { auto filter = context.scanlines[y].filter; @@ -273,30 +278,30 @@ template continue; if (filter == 1) { if (context.has_alpha()) - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); else - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); continue; } if (filter == 2) { if (context.has_alpha()) - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); else - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); continue; } if (filter == 3) { if (context.has_alpha()) - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); else - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); continue; } if (filter == 4) { if (context.has_alpha()) - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); else - unfilter_impl(*context.bitmap, y); + unfilter_impl(*context.bitmap, y, dummy_scanline.pointer()); continue; } }