LibSoftGPU: Use FloatVector4
pixel format for Image
This increases memory usage for textures 4-fold, but allows us to completely remove calls to `(un)pack_color` in a very hot loop.
This commit is contained in:
parent
6090e79191
commit
9a1364d784
Notes:
sideshowbarker
2024-07-17 12:02:22 +09:00
Author: https://github.com/gmta Commit: https://github.com/SerenityOS/serenity/commit/9a1364d784 Pull-request: https://github.com/SerenityOS/serenity/pull/13835 Reviewed-by: https://github.com/sunverwerth ✅
2 changed files with 120 additions and 124 deletions
|
@ -9,10 +9,123 @@
|
|||
|
||||
namespace SoftGPU {
|
||||
|
||||
static constexpr FloatVector4 unpack_color(void const* ptr, GPU::ImageFormat format)
|
||||
{
|
||||
constexpr auto one_over_255 = 1.0f / 255;
|
||||
switch (format) {
|
||||
case GPU::ImageFormat::RGB888: {
|
||||
auto rgb = reinterpret_cast<u8 const*>(ptr);
|
||||
return {
|
||||
rgb[0] * one_over_255,
|
||||
rgb[1] * one_over_255,
|
||||
rgb[2] * one_over_255,
|
||||
1.0f,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::BGR888: {
|
||||
auto bgr = reinterpret_cast<u8 const*>(ptr);
|
||||
return {
|
||||
bgr[2] * one_over_255,
|
||||
bgr[1] * one_over_255,
|
||||
bgr[0] * one_over_255,
|
||||
1.0f,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::RGBA8888: {
|
||||
auto rgba = *reinterpret_cast<u32 const*>(ptr);
|
||||
return {
|
||||
(rgba & 0xff) * one_over_255,
|
||||
((rgba >> 8) & 0xff) * one_over_255,
|
||||
((rgba >> 16) & 0xff) * one_over_255,
|
||||
((rgba >> 24) & 0xff) * one_over_255,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::BGRA8888: {
|
||||
auto bgra = *reinterpret_cast<u32 const*>(ptr);
|
||||
return {
|
||||
((bgra >> 16) & 0xff) * one_over_255,
|
||||
((bgra >> 8) & 0xff) * one_over_255,
|
||||
(bgra & 0xff) * one_over_255,
|
||||
((bgra >> 24) & 0xff) * one_over_255,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::RGB565: {
|
||||
auto rgb = *reinterpret_cast<u16 const*>(ptr);
|
||||
return {
|
||||
((rgb >> 11) & 0x1f) / 31.f,
|
||||
((rgb >> 5) & 0x3f) / 63.f,
|
||||
(rgb & 0x1f) / 31.f,
|
||||
1.0f
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::L8: {
|
||||
auto luminance = *reinterpret_cast<u8 const*>(ptr);
|
||||
auto clamped_luminance = luminance * one_over_255;
|
||||
return {
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
1.0f,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::L8A8: {
|
||||
auto luminance_and_alpha = reinterpret_cast<u8 const*>(ptr);
|
||||
auto clamped_luminance = luminance_and_alpha[0] * one_over_255;
|
||||
return {
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
luminance_and_alpha[1] * one_over_255,
|
||||
};
|
||||
}
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr void pack_color(FloatVector4 const& color, void* ptr, GPU::ImageFormat format)
|
||||
{
|
||||
auto r = static_cast<u8>(clamp(color.x(), 0.0f, 1.0f) * 255);
|
||||
auto g = static_cast<u8>(clamp(color.y(), 0.0f, 1.0f) * 255);
|
||||
auto b = static_cast<u8>(clamp(color.z(), 0.0f, 1.0f) * 255);
|
||||
auto a = static_cast<u8>(clamp(color.w(), 0.0f, 1.0f) * 255);
|
||||
|
||||
switch (format) {
|
||||
case GPU::ImageFormat::RGB888:
|
||||
reinterpret_cast<u8*>(ptr)[0] = r;
|
||||
reinterpret_cast<u8*>(ptr)[1] = g;
|
||||
reinterpret_cast<u8*>(ptr)[2] = b;
|
||||
return;
|
||||
case GPU::ImageFormat::BGR888:
|
||||
reinterpret_cast<u8*>(ptr)[2] = b;
|
||||
reinterpret_cast<u8*>(ptr)[1] = g;
|
||||
reinterpret_cast<u8*>(ptr)[0] = r;
|
||||
return;
|
||||
case GPU::ImageFormat::RGBA8888:
|
||||
*reinterpret_cast<u32*>(ptr) = r | (g << 8) | (b << 16) | (a << 24);
|
||||
return;
|
||||
case GPU::ImageFormat::BGRA8888:
|
||||
*reinterpret_cast<u32*>(ptr) = b | (g << 8) | (r << 16) | (a << 24);
|
||||
return;
|
||||
case GPU::ImageFormat::RGB565:
|
||||
*reinterpret_cast<u16*>(ptr) = (r & 0x1f) | ((g & 0x3f) << 5) | ((b & 0x1f) << 11);
|
||||
return;
|
||||
case GPU::ImageFormat::L8:
|
||||
*reinterpret_cast<u8*>(ptr) = r;
|
||||
return;
|
||||
case GPU::ImageFormat::L8A8:
|
||||
reinterpret_cast<u8*>(ptr)[0] = r;
|
||||
reinterpret_cast<u8*>(ptr)[1] = a;
|
||||
return;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
Image::Image(void* const ownership_token, unsigned width, unsigned height, unsigned depth, unsigned max_levels, unsigned layers)
|
||||
: GPU::Image(ownership_token)
|
||||
, m_num_layers(layers)
|
||||
, m_mipmap_buffers(FixedArray<RefPtr<Typed3DBuffer<GPU::ColorType>>>::must_create_but_fixme_should_propagate_errors(layers * max_levels))
|
||||
, m_mipmap_buffers(FixedArray<RefPtr<Typed3DBuffer<FloatVector4>>>::must_create_but_fixme_should_propagate_errors(layers * max_levels))
|
||||
{
|
||||
VERIFY(width > 0);
|
||||
VERIFY(height > 0);
|
||||
|
@ -27,7 +140,7 @@ Image::Image(void* const ownership_token, unsigned width, unsigned height, unsig
|
|||
unsigned level;
|
||||
for (level = 0; level < max_levels; ++level) {
|
||||
for (unsigned layer = 0; layer < layers; ++layer)
|
||||
m_mipmap_buffers[layer * layers + level] = MUST(Typed3DBuffer<GPU::ColorType>::try_create(width, height, depth));
|
||||
m_mipmap_buffers[layer * layers + level] = MUST(Typed3DBuffer<FloatVector4>::try_create(width, height, depth));
|
||||
|
||||
if (width <= 1 && height <= 1 && depth <= 1)
|
||||
break;
|
||||
|
|
|
@ -8,132 +8,15 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/FixedArray.h>
|
||||
#include <AK/RefCounted.h>
|
||||
#include <AK/RefPtr.h>
|
||||
#include <LibGPU/Enums.h>
|
||||
#include <LibGPU/Image.h>
|
||||
#include <LibGPU/ImageDataLayout.h>
|
||||
#include <LibGPU/ImageFormat.h>
|
||||
#include <LibGfx/Vector3.h>
|
||||
#include <LibGfx/Vector4.h>
|
||||
#include <LibSoftGPU/Buffer/Typed3DBuffer.h>
|
||||
#include <LibSoftGPU/Config.h>
|
||||
|
||||
namespace SoftGPU {
|
||||
|
||||
inline static FloatVector4 unpack_color(void const* ptr, GPU::ImageFormat format)
|
||||
{
|
||||
constexpr auto one_over_255 = 1.0f / 255;
|
||||
switch (format) {
|
||||
case GPU::ImageFormat::RGB888: {
|
||||
auto rgb = reinterpret_cast<u8 const*>(ptr);
|
||||
return {
|
||||
rgb[0] * one_over_255,
|
||||
rgb[1] * one_over_255,
|
||||
rgb[2] * one_over_255,
|
||||
1.0f,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::BGR888: {
|
||||
auto bgr = reinterpret_cast<u8 const*>(ptr);
|
||||
return {
|
||||
bgr[2] * one_over_255,
|
||||
bgr[1] * one_over_255,
|
||||
bgr[0] * one_over_255,
|
||||
1.0f,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::RGBA8888: {
|
||||
auto rgba = *reinterpret_cast<u32 const*>(ptr);
|
||||
return {
|
||||
(rgba & 0xff) * one_over_255,
|
||||
((rgba >> 8) & 0xff) * one_over_255,
|
||||
((rgba >> 16) & 0xff) * one_over_255,
|
||||
((rgba >> 24) & 0xff) * one_over_255,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::BGRA8888: {
|
||||
auto bgra = *reinterpret_cast<u32 const*>(ptr);
|
||||
return {
|
||||
((bgra >> 16) & 0xff) * one_over_255,
|
||||
((bgra >> 8) & 0xff) * one_over_255,
|
||||
(bgra & 0xff) * one_over_255,
|
||||
((bgra >> 24) & 0xff) * one_over_255,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::RGB565: {
|
||||
auto rgb = *reinterpret_cast<u16 const*>(ptr);
|
||||
return {
|
||||
((rgb >> 11) & 0x1f) / 31.f,
|
||||
((rgb >> 5) & 0x3f) / 63.f,
|
||||
(rgb & 0x1f) / 31.f,
|
||||
1.0f
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::L8: {
|
||||
auto luminance = *reinterpret_cast<u8 const*>(ptr);
|
||||
auto clamped_luminance = luminance * one_over_255;
|
||||
return {
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
1.0f,
|
||||
};
|
||||
}
|
||||
case GPU::ImageFormat::L8A8: {
|
||||
auto luminance_and_alpha = reinterpret_cast<u8 const*>(ptr);
|
||||
auto clamped_luminance = luminance_and_alpha[0] * one_over_255;
|
||||
return {
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
clamped_luminance,
|
||||
luminance_and_alpha[1] * one_over_255,
|
||||
};
|
||||
}
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
inline static void pack_color(FloatVector4 const& color, void* ptr, GPU::ImageFormat format)
|
||||
{
|
||||
auto r = static_cast<u8>(clamp(color.x(), 0.0f, 1.0f) * 255);
|
||||
auto g = static_cast<u8>(clamp(color.y(), 0.0f, 1.0f) * 255);
|
||||
auto b = static_cast<u8>(clamp(color.z(), 0.0f, 1.0f) * 255);
|
||||
auto a = static_cast<u8>(clamp(color.w(), 0.0f, 1.0f) * 255);
|
||||
|
||||
switch (format) {
|
||||
case GPU::ImageFormat::RGB888:
|
||||
reinterpret_cast<u8*>(ptr)[0] = r;
|
||||
reinterpret_cast<u8*>(ptr)[1] = g;
|
||||
reinterpret_cast<u8*>(ptr)[2] = b;
|
||||
return;
|
||||
case GPU::ImageFormat::BGR888:
|
||||
reinterpret_cast<u8*>(ptr)[2] = b;
|
||||
reinterpret_cast<u8*>(ptr)[1] = g;
|
||||
reinterpret_cast<u8*>(ptr)[0] = r;
|
||||
return;
|
||||
case GPU::ImageFormat::RGBA8888:
|
||||
*reinterpret_cast<u32*>(ptr) = r | (g << 8) | (b << 16) | (a << 24);
|
||||
return;
|
||||
case GPU::ImageFormat::BGRA8888:
|
||||
*reinterpret_cast<u32*>(ptr) = b | (g << 8) | (r << 16) | (a << 24);
|
||||
return;
|
||||
case GPU::ImageFormat::RGB565:
|
||||
*reinterpret_cast<u16*>(ptr) = (r & 0x1f) | ((g & 0x3f) << 5) | ((b & 0x1f) << 11);
|
||||
return;
|
||||
case GPU::ImageFormat::L8:
|
||||
*reinterpret_cast<u8*>(ptr) = r;
|
||||
return;
|
||||
case GPU::ImageFormat::L8A8:
|
||||
reinterpret_cast<u8*>(ptr)[0] = r;
|
||||
reinterpret_cast<u8*>(ptr)[1] = a;
|
||||
return;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
class Image final : public GPU::Image {
|
||||
public:
|
||||
Image(void* const ownership_token, unsigned width, unsigned height, unsigned depth, unsigned max_levels, unsigned layers);
|
||||
|
@ -149,12 +32,12 @@ public:
|
|||
|
||||
FloatVector4 texel(unsigned layer, unsigned level, int x, int y, int z) const
|
||||
{
|
||||
return unpack_color(texel_pointer(layer, level, x, y, z), GPU::ImageFormat::BGRA8888);
|
||||
return *texel_pointer(layer, level, x, y, z);
|
||||
}
|
||||
|
||||
void set_texel(unsigned layer, unsigned level, int x, int y, int z, FloatVector4 const& color)
|
||||
{
|
||||
pack_color(color, texel_pointer(layer, level, x, y, z), GPU::ImageFormat::BGRA8888);
|
||||
*texel_pointer(layer, level, x, y, z) = color;
|
||||
}
|
||||
|
||||
virtual void write_texels(unsigned layer, unsigned level, Vector3<unsigned> const& offset, Vector3<unsigned> const& size, void const* data, GPU::ImageDataLayout const& layout) override;
|
||||
|
@ -162,12 +45,12 @@ public:
|
|||
virtual void copy_texels(GPU::Image const& source, unsigned source_layer, unsigned source_level, Vector3<unsigned> const& source_offset, Vector3<unsigned> const& size, unsigned destination_layer, unsigned destination_level, Vector3<unsigned> const& destination_offset) override;
|
||||
|
||||
private:
|
||||
void const* texel_pointer(unsigned layer, unsigned level, int x, int y, int z) const
|
||||
FloatVector4 const* texel_pointer(unsigned layer, unsigned level, int x, int y, int z) const
|
||||
{
|
||||
return m_mipmap_buffers[layer * m_num_layers + level]->buffer_pointer(x, y, z);
|
||||
}
|
||||
|
||||
void* texel_pointer(unsigned layer, unsigned level, int x, int y, int z)
|
||||
FloatVector4* texel_pointer(unsigned layer, unsigned level, int x, int y, int z)
|
||||
{
|
||||
return m_mipmap_buffers[layer * m_num_layers + level]->buffer_pointer(x, y, z);
|
||||
}
|
||||
|
@ -176,7 +59,7 @@ private:
|
|||
unsigned m_num_levels { 0 };
|
||||
unsigned m_num_layers { 0 };
|
||||
|
||||
FixedArray<RefPtr<Typed3DBuffer<GPU::ColorType>>> m_mipmap_buffers;
|
||||
FixedArray<RefPtr<Typed3DBuffer<FloatVector4>>> m_mipmap_buffers;
|
||||
|
||||
bool m_width_is_power_of_two { false };
|
||||
bool m_height_is_power_of_two { false };
|
||||
|
|
Loading…
Add table
Reference in a new issue