LibSoftGPU: Optimize GPU::Vertex lists

On my machine, benchmarking 3DFileViewer revealed ~2.5% of CPU time
spent in `Vector<GPU::Vertex>::try_append`. By carefully managing list
capacities, we can remove this method from profiles altogether.
This commit is contained in:
Jelle Raaijmakers 2023-10-05 23:02:53 +02:00
parent c978891dda
commit 403d3bbdaf
Notes: sideshowbarker 2024-07-17 02:08:15 +09:00
2 changed files with 12 additions and 5 deletions

View file

@ -76,6 +76,10 @@ FLATTEN static void clip_plane(Vector<GPU::Vertex>& input_list, Vector<GPU::Vert
if (input_list_size == 0)
return;
// Ensure we can perform unchecked appends in the loop below
if (input_list_size * 2 > output_list.capacity())
output_list.ensure_capacity(input_list_size * 2);
auto const* prev_vec = &input_list.data()[0];
auto is_prev_point_within_plane = point_within_plane<plane>(*prev_vec, clip_plane);
@ -84,10 +88,10 @@ FLATTEN static void clip_plane(Vector<GPU::Vertex>& input_list, Vector<GPU::Vert
auto const is_curr_point_within_plane = point_within_plane<plane>(curr_vec, clip_plane);
if (is_curr_point_within_plane != is_prev_point_within_plane)
output_list.append(clip_intersection_point<plane>(*prev_vec, curr_vec, clip_plane));
output_list.unchecked_append(clip_intersection_point<plane>(*prev_vec, curr_vec, clip_plane));
if (is_curr_point_within_plane)
output_list.append(curr_vec);
output_list.unchecked_append(curr_vec);
prev_vec = &curr_vec;
is_prev_point_within_plane = is_curr_point_within_plane;

View file

@ -766,6 +766,9 @@ Device::Device(Gfx::IntSize size)
{
m_options.scissor_box = m_frame_buffer->rect();
m_options.viewport = m_frame_buffer->rect();
// Ensure we can always append 3 vertices unchecked
m_clipped_vertices.ensure_capacity(3);
}
GPU::DeviceInfo Device::info() const
@ -1112,9 +1115,9 @@ void Device::draw_primitives(GPU::PrimitiveType primitive_type, Vector<GPU::Vert
// Clip triangles
for (auto& triangle : m_triangle_list) {
m_clipped_vertices.clear_with_capacity();
m_clipped_vertices.append(triangle.vertices[0]);
m_clipped_vertices.append(triangle.vertices[1]);
m_clipped_vertices.append(triangle.vertices[2]);
m_clipped_vertices.unchecked_append(triangle.vertices[0]);
m_clipped_vertices.unchecked_append(triangle.vertices[1]);
m_clipped_vertices.unchecked_append(triangle.vertices[2]);
m_clipper.clip_triangle_against_frustum(m_clipped_vertices);
if (m_clip_planes.size() > 0)