浏览代码

Optimize the Painter::blit() loop a bit. ~3% fewer cycles, I'll take it.

Andreas Kling 6 年之前
父节点
当前提交
f651405694
共有 4 个文件被更改,包括 22 次插入15 次删除
  1. 2 2
      AK/StdLibExtras.h
  2. 0 9
      Widgets/GraphicsBitmap.cpp
  3. 10 0
      Widgets/GraphicsBitmap.h
  4. 10 4
      Widgets/Painter.cpp

+ 2 - 2
AK/StdLibExtras.h

@@ -20,7 +20,7 @@ ALWAYS_INLINE void fast_dword_copy(dword* dest, const dword* src, size_t count)
 #ifdef SERENITY
     asm volatile(
         "rep movsl\n"
-        : "=S"(src), "=D"(dest)
+        : "=S"(src), "=D"(dest), "=c"(count)
         : "S"(src), "D"(dest), "c"(count)
         : "memory"
     );
@@ -34,7 +34,7 @@ ALWAYS_INLINE void fast_dword_fill(dword* dest, dword value, size_t count)
 #ifdef SERENITY
     asm volatile(
         "rep stosl\n"
-        : "=D"(dest)
+        : "=D"(dest), "=c"(count)
         : "D"(dest), "c"(count), "a"(value)
         : "memory"
     );

+ 0 - 9
Widgets/GraphicsBitmap.cpp

@@ -55,12 +55,3 @@ GraphicsBitmap::~GraphicsBitmap()
     m_data = nullptr;
 }
 
-RGBA32* GraphicsBitmap::scanline(int y)
-{
-    return reinterpret_cast<RGBA32*>((((byte*)m_data) + (y * m_pitch)));
-}
-
-const RGBA32* GraphicsBitmap::scanline(int y) const
-{
-    return reinterpret_cast<const RGBA32*>((((const byte*)m_data) + (y * m_pitch)));
-}

+ 10 - 0
Widgets/GraphicsBitmap.h

@@ -46,3 +46,13 @@ private:
     Region* m_server_region { nullptr };
 #endif
 };
+
+inline RGBA32* GraphicsBitmap::scanline(int y)
+{
+    return reinterpret_cast<RGBA32*>((((byte*)m_data) + (y * m_pitch)));
+}
+
+inline const RGBA32* GraphicsBitmap::scanline(int y) const
+{
+    return reinterpret_cast<const RGBA32*>((((const byte*)m_data) + (y * m_pitch)));
+}

+ 10 - 4
Widgets/Painter.cpp

@@ -226,9 +226,15 @@ void Painter::blit(const Point& position, const GraphicsBitmap& source)
     Rect dst_rect(position, source.size());
     dst_rect.intersect(m_clip_rect);
 
-    for (int y = 0; y < dst_rect.height(); ++y) {
-        auto* dst_scanline = m_target->scanline(position.y() + y);
-        auto* src_scanline = source.scanline(y);
-        fast_dword_copy(dst_scanline + dst_rect.x(), src_scanline + (dst_rect.x() - position.x()), dst_rect.width());
+    RGBA32* dst = m_target->scanline(position.y()) + dst_rect.x();
+    const RGBA32* src= source.scanline(0) + (dst_rect.x() - position.x());
+
+    const unsigned dst_skip = m_target->width();
+    const unsigned src_skip = source.width();
+
+    for (int i = dst_rect.height() - 1; i >= 0; --i) {
+        fast_dword_copy(dst, src, dst_rect.width());
+        dst += dst_skip;
+        src += src_skip;
     }
 }