Device.cpp 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. /*
  2. * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
  3. * Copyright (c) 2021, Jesse Buhagiar <jooster669@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Function.h>
  8. #include <LibCore/ElapsedTimer.h>
  9. #include <LibGfx/Painter.h>
  10. #include <LibGfx/Vector2.h>
  11. #include <LibGfx/Vector3.h>
  12. #include <LibSoftGPU/Config.h>
  13. #include <LibSoftGPU/Device.h>
  14. namespace SoftGPU {
  15. static long long g_num_rasterized_triangles;
  16. static long long g_num_pixels;
  17. static long long g_num_pixels_shaded;
  18. static long long g_num_pixels_blended;
  19. static long long g_num_sampler_calls;
  20. using IntVector2 = Gfx::Vector2<int>;
  21. using IntVector3 = Gfx::Vector3<int>;
  22. constexpr static int edge_function(const IntVector2& a, const IntVector2& b, const IntVector2& c)
  23. {
  24. return ((c.x() - a.x()) * (b.y() - a.y()) - (c.y() - a.y()) * (b.x() - a.x()));
  25. }
  26. template<typename T>
  27. constexpr static T interpolate(const T& v0, const T& v1, const T& v2, const FloatVector3& barycentric_coords)
  28. {
  29. return v0 * barycentric_coords.x() + v1 * barycentric_coords.y() + v2 * barycentric_coords.z();
  30. }
  31. template<typename T>
  32. constexpr static T mix(const T& x, const T& y, float interp)
  33. {
  34. return x * (1 - interp) + y * interp;
  35. }
  36. ALWAYS_INLINE constexpr static Gfx::RGBA32 to_rgba32(const FloatVector4& v)
  37. {
  38. auto clamped = v.clamped(0, 1);
  39. u8 r = clamped.x() * 255;
  40. u8 g = clamped.y() * 255;
  41. u8 b = clamped.z() * 255;
  42. u8 a = clamped.w() * 255;
  43. return a << 24 | r << 16 | g << 8 | b;
  44. }
  45. static FloatVector4 to_vec4(Gfx::RGBA32 rgba)
  46. {
  47. auto constexpr one_over_255 = 1.0f / 255;
  48. return {
  49. ((rgba >> 16) & 0xff) * one_over_255,
  50. ((rgba >> 8) & 0xff) * one_over_255,
  51. (rgba & 0xff) * one_over_255,
  52. ((rgba >> 24) & 0xff) * one_over_255,
  53. };
  54. }
  55. static Gfx::IntRect scissor_box_to_window_coordinates(Gfx::IntRect const& scissor_box, Gfx::IntRect const& window_rect)
  56. {
  57. return scissor_box.translated(0, window_rect.height() - 2 * scissor_box.y() - scissor_box.height());
  58. }
  59. static constexpr void setup_blend_factors(BlendFactor mode, FloatVector4& constant, float& src_alpha, float& dst_alpha, float& src_color, float& dst_color)
  60. {
  61. constant = { 0.0f, 0.0f, 0.0f, 0.0f };
  62. src_alpha = 0;
  63. dst_alpha = 0;
  64. src_color = 0;
  65. dst_color = 0;
  66. switch (mode) {
  67. case BlendFactor::Zero:
  68. break;
  69. case BlendFactor::One:
  70. constant = { 1.0f, 1.0f, 1.0f, 1.0f };
  71. break;
  72. case BlendFactor::SrcColor:
  73. src_color = 1;
  74. break;
  75. case BlendFactor::OneMinusSrcColor:
  76. constant = { 1.0f, 1.0f, 1.0f, 1.0f };
  77. src_color = -1;
  78. break;
  79. case BlendFactor::SrcAlpha:
  80. src_alpha = 1;
  81. break;
  82. case BlendFactor::OneMinusSrcAlpha:
  83. constant = { 1.0f, 1.0f, 1.0f, 1.0f };
  84. src_alpha = -1;
  85. break;
  86. case BlendFactor::DstAlpha:
  87. dst_alpha = 1;
  88. break;
  89. case BlendFactor::OneMinusDstAlpha:
  90. constant = { 1.0f, 1.0f, 1.0f, 1.0f };
  91. dst_alpha = -1;
  92. break;
  93. case BlendFactor::DstColor:
  94. dst_color = 1;
  95. break;
  96. case BlendFactor::OneMinusDstColor:
  97. constant = { 1.0f, 1.0f, 1.0f, 1.0f };
  98. dst_color = -1;
  99. break;
  100. case BlendFactor::SrcAlphaSaturate:
  101. // FIXME: How do we implement this?
  102. break;
  103. default:
  104. VERIFY_NOT_REACHED();
  105. }
  106. }
  107. template<typename PS>
  108. static void rasterize_triangle(const RasterizerOptions& options, Gfx::Bitmap& render_target, DepthBuffer& depth_buffer, const Triangle& triangle, PS pixel_shader)
  109. {
  110. INCREASE_STATISTICS_COUNTER(g_num_rasterized_triangles, 1);
  111. // Since the algorithm is based on blocks of uniform size, we need
  112. // to ensure that our render_target size is actually a multiple of the block size
  113. VERIFY((render_target.width() % RASTERIZER_BLOCK_SIZE) == 0);
  114. VERIFY((render_target.height() % RASTERIZER_BLOCK_SIZE) == 0);
  115. // Return if alpha testing is a no-op
  116. if (options.enable_alpha_test && options.alpha_test_func == AlphaTestFunction::Never)
  117. return;
  118. // Vertices
  119. Vertex const vertex0 = triangle.vertices[0];
  120. Vertex const vertex1 = triangle.vertices[1];
  121. Vertex const vertex2 = triangle.vertices[2];
  122. // Calculate area of the triangle for later tests
  123. IntVector2 const v0 { static_cast<int>(vertex0.window_coordinates.x()), static_cast<int>(vertex0.window_coordinates.y()) };
  124. IntVector2 const v1 { static_cast<int>(vertex1.window_coordinates.x()), static_cast<int>(vertex1.window_coordinates.y()) };
  125. IntVector2 const v2 { static_cast<int>(vertex2.window_coordinates.x()), static_cast<int>(vertex2.window_coordinates.y()) };
  126. int area = edge_function(v0, v1, v2);
  127. if (area == 0)
  128. return;
  129. auto const one_over_area = 1.0f / area;
  130. FloatVector4 src_constant {};
  131. float src_factor_src_alpha = 0;
  132. float src_factor_dst_alpha = 0;
  133. float src_factor_src_color = 0;
  134. float src_factor_dst_color = 0;
  135. FloatVector4 dst_constant {};
  136. float dst_factor_src_alpha = 0;
  137. float dst_factor_dst_alpha = 0;
  138. float dst_factor_src_color = 0;
  139. float dst_factor_dst_color = 0;
  140. if (options.enable_blending) {
  141. setup_blend_factors(
  142. options.blend_source_factor,
  143. src_constant,
  144. src_factor_src_alpha,
  145. src_factor_dst_alpha,
  146. src_factor_src_color,
  147. src_factor_dst_color);
  148. setup_blend_factors(
  149. options.blend_destination_factor,
  150. dst_constant,
  151. dst_factor_src_alpha,
  152. dst_factor_dst_alpha,
  153. dst_factor_src_color,
  154. dst_factor_dst_color);
  155. }
  156. // Obey top-left rule:
  157. // This sets up "zero" for later pixel coverage tests.
  158. // Depending on where on the triangle the edge is located
  159. // it is either tested against 0 or 1, effectively
  160. // turning "< 0" into "<= 0"
  161. IntVector3 zero { 1, 1, 1 };
  162. if (v1.y() > v0.y() || (v1.y() == v0.y() && v1.x() < v0.x()))
  163. zero.set_z(0);
  164. if (v2.y() > v1.y() || (v2.y() == v1.y() && v2.x() < v1.x()))
  165. zero.set_x(0);
  166. if (v0.y() > v2.y() || (v0.y() == v2.y() && v0.x() < v2.x()))
  167. zero.set_y(0);
  168. // This function calculates the 3 edge values for the pixel relative to the triangle.
  169. auto calculate_edge_values = [v0, v1, v2](const IntVector2& p) -> IntVector3 {
  170. return {
  171. edge_function(v1, v2, p),
  172. edge_function(v2, v0, p),
  173. edge_function(v0, v1, p),
  174. };
  175. };
  176. // This function tests whether a point as identified by its 3 edge values lies within the triangle
  177. auto test_point = [zero](const IntVector3& edges) -> bool {
  178. return edges.x() >= zero.x()
  179. && edges.y() >= zero.y()
  180. && edges.z() >= zero.z();
  181. };
  182. // Calculate block-based bounds
  183. auto render_bounds = render_target.rect();
  184. if (options.scissor_enabled)
  185. render_bounds.intersect(scissor_box_to_window_coordinates(options.scissor_box, render_target.rect()));
  186. int const block_padding = RASTERIZER_BLOCK_SIZE - 1;
  187. // clang-format off
  188. int const bx0 = max(render_bounds.left(), min(min(v0.x(), v1.x()), v2.x())) / RASTERIZER_BLOCK_SIZE;
  189. int const bx1 = (min(render_bounds.right(), max(max(v0.x(), v1.x()), v2.x())) + block_padding) / RASTERIZER_BLOCK_SIZE;
  190. int const by0 = max(render_bounds.top(), min(min(v0.y(), v1.y()), v2.y())) / RASTERIZER_BLOCK_SIZE;
  191. int const by1 = (min(render_bounds.bottom(), max(max(v0.y(), v1.y()), v2.y())) + block_padding) / RASTERIZER_BLOCK_SIZE;
  192. // clang-format on
  193. u8 pixel_mask[RASTERIZER_BLOCK_SIZE];
  194. static_assert(RASTERIZER_BLOCK_SIZE <= sizeof(decltype(*pixel_mask)) * 8, "RASTERIZER_BLOCK_SIZE must be smaller than the pixel_mask's width in bits");
  195. FloatVector4 pixel_staging[RASTERIZER_BLOCK_SIZE][RASTERIZER_BLOCK_SIZE];
  196. float depth_staging[RASTERIZER_BLOCK_SIZE][RASTERIZER_BLOCK_SIZE];
  197. // Fog depths
  198. float const vertex0_eye_absz = fabs(vertex0.eye_coordinates.z());
  199. float const vertex1_eye_absz = fabs(vertex1.eye_coordinates.z());
  200. float const vertex2_eye_absz = fabs(vertex2.eye_coordinates.z());
  201. // FIXME: implement stencil testing
  202. // Iterate over all blocks within the bounds of the triangle
  203. for (int by = by0; by < by1; by++) {
  204. for (int bx = bx0; bx < bx1; bx++) {
  205. // Edge values of the 4 block corners
  206. // clang-format off
  207. auto b0 = calculate_edge_values({ bx * RASTERIZER_BLOCK_SIZE, by * RASTERIZER_BLOCK_SIZE });
  208. auto b1 = calculate_edge_values({ bx * RASTERIZER_BLOCK_SIZE + RASTERIZER_BLOCK_SIZE, by * RASTERIZER_BLOCK_SIZE });
  209. auto b2 = calculate_edge_values({ bx * RASTERIZER_BLOCK_SIZE, by * RASTERIZER_BLOCK_SIZE + RASTERIZER_BLOCK_SIZE });
  210. auto b3 = calculate_edge_values({ bx * RASTERIZER_BLOCK_SIZE + RASTERIZER_BLOCK_SIZE, by * RASTERIZER_BLOCK_SIZE + RASTERIZER_BLOCK_SIZE });
  211. // clang-format on
  212. // If the whole block is outside any of the triangle edges we can discard it completely
  213. // We test this by and'ing the relevant edge function values together for all block corners
  214. // and checking if the negative sign bit is set for all of them
  215. if ((b0.x() & b1.x() & b2.x() & b3.x()) & 0x80000000)
  216. continue;
  217. if ((b0.y() & b1.y() & b2.y() & b3.y()) & 0x80000000)
  218. continue;
  219. if ((b0.z() & b1.z() & b2.z() & b3.z()) & 0x80000000)
  220. continue;
  221. // edge value derivatives
  222. auto dbdx = (b1 - b0) / RASTERIZER_BLOCK_SIZE;
  223. auto dbdy = (b2 - b0) / RASTERIZER_BLOCK_SIZE;
  224. // step edge value after each horizontal span: 1 down, BLOCK_SIZE left
  225. auto step_y = dbdy - dbdx * RASTERIZER_BLOCK_SIZE;
  226. int x0 = bx * RASTERIZER_BLOCK_SIZE;
  227. int y0 = by * RASTERIZER_BLOCK_SIZE;
  228. // Generate the coverage mask
  229. if (!options.scissor_enabled && test_point(b0) && test_point(b1) && test_point(b2) && test_point(b3)) {
  230. INCREASE_STATISTICS_COUNTER(g_num_pixels, RASTERIZER_BLOCK_SIZE * RASTERIZER_BLOCK_SIZE);
  231. // The block is fully contained within the triangle. Fill the mask with all 1s
  232. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++)
  233. pixel_mask[y] = -1;
  234. } else {
  235. // The block overlaps at least one triangle edge.
  236. // We need to test coverage of every pixel within the block.
  237. auto coords = b0;
  238. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++, coords += step_y) {
  239. pixel_mask[y] = 0;
  240. for (int x = 0; x < RASTERIZER_BLOCK_SIZE; x++, coords += dbdx) {
  241. if (test_point(coords) && (!options.scissor_enabled || render_bounds.contains(x0 + x, y0 + y))) {
  242. INCREASE_STATISTICS_COUNTER(g_num_pixels, 1);
  243. pixel_mask[y] |= 1 << x;
  244. }
  245. }
  246. }
  247. }
  248. // AND the depth mask onto the coverage mask
  249. if (options.enable_depth_test) {
  250. int z_pass_count = 0;
  251. auto coords = b0;
  252. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++, coords += step_y) {
  253. if (pixel_mask[y] == 0) {
  254. coords += dbdx * RASTERIZER_BLOCK_SIZE;
  255. continue;
  256. }
  257. auto* depth = &depth_buffer.scanline(y0 + y)[x0];
  258. for (int x = 0; x < RASTERIZER_BLOCK_SIZE; x++, coords += dbdx, depth++) {
  259. if (~pixel_mask[y] & (1 << x))
  260. continue;
  261. auto barycentric = FloatVector3(coords.x(), coords.y(), coords.z()) * one_over_area;
  262. float z = interpolate(vertex0.window_coordinates.z(), vertex1.window_coordinates.z(), vertex2.window_coordinates.z(), barycentric);
  263. // FIXME: Also apply depth_offset_factor which depends on the depth gradient
  264. z += options.depth_offset_constant * NumericLimits<float>::epsilon();
  265. bool pass = false;
  266. switch (options.depth_func) {
  267. case DepthTestFunction::Always:
  268. pass = true;
  269. break;
  270. case DepthTestFunction::Never:
  271. pass = false;
  272. break;
  273. case DepthTestFunction::Greater:
  274. pass = z > *depth;
  275. break;
  276. case DepthTestFunction::GreaterOrEqual:
  277. pass = z >= *depth;
  278. break;
  279. case DepthTestFunction::NotEqual:
  280. #ifdef __SSE__
  281. pass = z != *depth;
  282. #else
  283. pass = bit_cast<u32>(z) != bit_cast<u32>(*depth);
  284. #endif
  285. break;
  286. case DepthTestFunction::Equal:
  287. #ifdef __SSE__
  288. pass = z == *depth;
  289. #else
  290. //
  291. // This is an interesting quirk that occurs due to us using the x87 FPU when Serenity is
  292. // compiled for the i386 target. When we calculate our depth value to be stored in the buffer,
  293. // it is an 80-bit x87 floating point number, however, when stored into the DepthBuffer, this is
  294. // truncated to 32 bits. This 38 bit loss of precision means that when x87 `FCOMP` is eventually
  295. // used here the comparison fails.
  296. // This could be solved by using a `long double` for the depth buffer, however this would take
  297. // up significantly more space and is completely overkill for a depth buffer. As such, comparing
  298. // the first 32-bits of this depth value is "good enough" that if we get a hit on it being
  299. // equal, we can pretty much guarantee that it's actually equal.
  300. //
  301. pass = bit_cast<u32>(z) == bit_cast<u32>(*depth);
  302. #endif
  303. break;
  304. case DepthTestFunction::LessOrEqual:
  305. pass = z <= *depth;
  306. break;
  307. case DepthTestFunction::Less:
  308. pass = z < *depth;
  309. break;
  310. }
  311. if (!pass) {
  312. pixel_mask[y] ^= 1 << x;
  313. continue;
  314. }
  315. depth_staging[y][x] = z;
  316. z_pass_count++;
  317. }
  318. }
  319. // Nice, no pixels passed the depth test -> block rejected by early z
  320. if (z_pass_count == 0)
  321. continue;
  322. }
  323. // Draw the pixels according to the previously generated mask
  324. auto coords = b0;
  325. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++, coords += step_y) {
  326. if (pixel_mask[y] == 0) {
  327. coords += dbdx * RASTERIZER_BLOCK_SIZE;
  328. continue;
  329. }
  330. auto* pixel = pixel_staging[y];
  331. for (int x = 0; x < RASTERIZER_BLOCK_SIZE; x++, coords += dbdx, pixel++) {
  332. if (~pixel_mask[y] & (1 << x))
  333. continue;
  334. // Perspective correct barycentric coordinates
  335. auto barycentric = FloatVector3(coords.x(), coords.y(), coords.z()) * one_over_area;
  336. auto const w_coordinates = FloatVector3 {
  337. vertex0.window_coordinates.w(),
  338. vertex1.window_coordinates.w(),
  339. vertex2.window_coordinates.w(),
  340. };
  341. float const interpolated_reciprocal_w = interpolate(w_coordinates.x(), w_coordinates.y(), w_coordinates.z(), barycentric);
  342. float const interpolated_w = 1 / interpolated_reciprocal_w;
  343. barycentric = barycentric * w_coordinates * interpolated_w;
  344. // FIXME: make this more generic. We want to interpolate more than just color and uv
  345. FloatVector4 vertex_color;
  346. if (options.shade_smooth) {
  347. vertex_color = interpolate(vertex0.color, vertex1.color, vertex2.color, barycentric);
  348. } else {
  349. vertex_color = vertex0.color;
  350. }
  351. auto uv = interpolate(vertex0.tex_coord, vertex1.tex_coord, vertex2.tex_coord, barycentric);
  352. // Calculate depth of fragment for fog
  353. //
  354. // OpenGL 1.5 spec chapter 3.10: "An implementation may choose to approximate the
  355. // eye-coordinate distance from the eye to each fragment center by |Ze|."
  356. float fog_fragment_depth = interpolate(vertex0_eye_absz, vertex1_eye_absz, vertex2_eye_absz, barycentric);
  357. *pixel = pixel_shader(uv, vertex_color, fog_fragment_depth);
  358. INCREASE_STATISTICS_COUNTER(g_num_pixels_shaded, 1);
  359. }
  360. }
  361. if (options.enable_alpha_test && options.alpha_test_func != AlphaTestFunction::Always) {
  362. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++) {
  363. if (pixel_mask[y] == 0)
  364. continue;
  365. auto src = pixel_staging[y];
  366. for (int x = 0; x < RASTERIZER_BLOCK_SIZE; x++, src++) {
  367. if (~pixel_mask[y] & (1 << x))
  368. continue;
  369. bool passed = true;
  370. switch (options.alpha_test_func) {
  371. case AlphaTestFunction::Less:
  372. passed = src->w() < options.alpha_test_ref_value;
  373. break;
  374. case AlphaTestFunction::Equal:
  375. passed = src->w() == options.alpha_test_ref_value;
  376. break;
  377. case AlphaTestFunction::LessOrEqual:
  378. passed = src->w() <= options.alpha_test_ref_value;
  379. break;
  380. case AlphaTestFunction::Greater:
  381. passed = src->w() > options.alpha_test_ref_value;
  382. break;
  383. case AlphaTestFunction::NotEqual:
  384. passed = src->w() != options.alpha_test_ref_value;
  385. break;
  386. case AlphaTestFunction::GreaterOrEqual:
  387. passed = src->w() >= options.alpha_test_ref_value;
  388. break;
  389. case AlphaTestFunction::Never:
  390. case AlphaTestFunction::Always:
  391. VERIFY_NOT_REACHED();
  392. }
  393. if (!passed)
  394. pixel_mask[y] ^= (1 << x);
  395. }
  396. }
  397. }
  398. // Write to depth buffer
  399. if (options.enable_depth_test && options.enable_depth_write) {
  400. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++) {
  401. if (pixel_mask[y] == 0)
  402. continue;
  403. auto* depth = &depth_buffer.scanline(y0 + y)[x0];
  404. for (int x = 0; x < RASTERIZER_BLOCK_SIZE; x++, depth++) {
  405. if (~pixel_mask[y] & (1 << x))
  406. continue;
  407. *depth = depth_staging[y][x];
  408. }
  409. }
  410. }
  411. // We will not update the color buffer at all
  412. if (!options.color_mask || !options.enable_color_write)
  413. continue;
  414. if (options.enable_blending) {
  415. // Blend color values from pixel_staging into render_target
  416. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++) {
  417. auto src = pixel_staging[y];
  418. auto dst = &render_target.scanline(y0 + y)[x0];
  419. for (int x = 0; x < RASTERIZER_BLOCK_SIZE; x++, src++, dst++) {
  420. if (~pixel_mask[y] & (1 << x))
  421. continue;
  422. auto float_dst = to_vec4(*dst);
  423. auto src_factor = src_constant
  424. + *src * src_factor_src_color
  425. + FloatVector4(src->w(), src->w(), src->w(), src->w()) * src_factor_src_alpha
  426. + float_dst * src_factor_dst_color
  427. + FloatVector4(float_dst.w(), float_dst.w(), float_dst.w(), float_dst.w()) * src_factor_dst_alpha;
  428. auto dst_factor = dst_constant
  429. + *src * dst_factor_src_color
  430. + FloatVector4(src->w(), src->w(), src->w(), src->w()) * dst_factor_src_alpha
  431. + float_dst * dst_factor_dst_color
  432. + FloatVector4(float_dst.w(), float_dst.w(), float_dst.w(), float_dst.w()) * dst_factor_dst_alpha;
  433. *dst = (*dst & ~options.color_mask) | (to_rgba32(*src * src_factor + float_dst * dst_factor) & options.color_mask);
  434. INCREASE_STATISTICS_COUNTER(g_num_pixels_blended, 1);
  435. }
  436. }
  437. } else {
  438. // Copy color values from pixel_staging into render_target
  439. for (int y = 0; y < RASTERIZER_BLOCK_SIZE; y++) {
  440. auto src = pixel_staging[y];
  441. auto dst = &render_target.scanline(y + y0)[x0];
  442. for (int x = 0; x < RASTERIZER_BLOCK_SIZE; x++, src++, dst++) {
  443. if (~pixel_mask[y] & (1 << x))
  444. continue;
  445. *dst = (*dst & ~options.color_mask) | (to_rgba32(*src) & options.color_mask);
  446. }
  447. }
  448. }
  449. }
  450. }
  451. }
  452. static Gfx::IntSize closest_multiple(const Gfx::IntSize& min_size, size_t step)
  453. {
  454. int width = ((min_size.width() + step - 1) / step) * step;
  455. int height = ((min_size.height() + step - 1) / step) * step;
  456. return { width, height };
  457. }
  458. Device::Device(const Gfx::IntSize& min_size)
  459. : m_render_target { Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRA8888, closest_multiple(min_size, RASTERIZER_BLOCK_SIZE)).release_value_but_fixme_should_propagate_errors() }
  460. , m_depth_buffer { adopt_own(*new DepthBuffer(closest_multiple(min_size, RASTERIZER_BLOCK_SIZE))) }
  461. {
  462. m_options.scissor_box = m_render_target->rect();
  463. }
  464. DeviceInfo Device::info() const
  465. {
  466. return {
  467. .vendor_name = "SerenityOS",
  468. .device_name = "SoftGPU",
  469. .num_texture_units = NUM_SAMPLERS
  470. };
  471. }
  472. static void generate_texture_coordinates(Vertex& vertex, RasterizerOptions const& options)
  473. {
  474. auto generate_coordinate = [&](size_t config_index) -> float {
  475. auto mode = options.texcoord_generation_config[config_index].mode;
  476. switch (mode) {
  477. case TexCoordGenerationMode::ObjectLinear: {
  478. auto coefficients = options.texcoord_generation_config[config_index].coefficients;
  479. return coefficients.dot(vertex.position);
  480. }
  481. case TexCoordGenerationMode::EyeLinear: {
  482. auto coefficients = options.texcoord_generation_config[config_index].coefficients;
  483. return coefficients.dot(vertex.eye_coordinates);
  484. }
  485. case TexCoordGenerationMode::SphereMap: {
  486. auto const eye_unit = vertex.eye_coordinates.normalized();
  487. FloatVector3 const eye_unit_xyz = { eye_unit.x(), eye_unit.y(), eye_unit.z() };
  488. auto const normal = vertex.normal;
  489. auto reflection = eye_unit_xyz - normal * 2 * normal.dot(eye_unit_xyz);
  490. reflection.set_z(reflection.z() + 1);
  491. auto const reflection_value = (config_index == 0) ? reflection.x() : reflection.y();
  492. return reflection_value / (2 * reflection.length()) + 0.5f;
  493. }
  494. case TexCoordGenerationMode::ReflectionMap: {
  495. auto const eye_unit = vertex.eye_coordinates.normalized();
  496. FloatVector3 const eye_unit_xyz = { eye_unit.x(), eye_unit.y(), eye_unit.z() };
  497. auto const normal = vertex.normal;
  498. auto reflection = eye_unit_xyz - normal * 2 * normal.dot(eye_unit_xyz);
  499. switch (config_index) {
  500. case 0:
  501. return reflection.x();
  502. case 1:
  503. return reflection.y();
  504. case 2:
  505. return reflection.z();
  506. default:
  507. VERIFY_NOT_REACHED();
  508. }
  509. }
  510. case TexCoordGenerationMode::NormalMap: {
  511. auto const normal = vertex.normal;
  512. switch (config_index) {
  513. case 0:
  514. return normal.x();
  515. case 1:
  516. return normal.y();
  517. case 2:
  518. return normal.z();
  519. default:
  520. VERIFY_NOT_REACHED();
  521. }
  522. }
  523. default:
  524. VERIFY_NOT_REACHED();
  525. }
  526. };
  527. auto const enabled_coords = options.texcoord_generation_enabled_coordinates;
  528. vertex.tex_coord = {
  529. ((enabled_coords & TexCoordGenerationCoordinate::S) > 0) ? generate_coordinate(0) : vertex.tex_coord.x(),
  530. ((enabled_coords & TexCoordGenerationCoordinate::T) > 0) ? generate_coordinate(1) : vertex.tex_coord.y(),
  531. ((enabled_coords & TexCoordGenerationCoordinate::R) > 0) ? generate_coordinate(2) : vertex.tex_coord.z(),
  532. ((enabled_coords & TexCoordGenerationCoordinate::Q) > 0) ? generate_coordinate(3) : vertex.tex_coord.w(),
  533. };
  534. }
  535. void Device::draw_primitives(PrimitiveType primitive_type, FloatMatrix4x4 const& model_view_transform, FloatMatrix3x3 const& normal_transform,
  536. FloatMatrix4x4 const& projection_transform, FloatMatrix4x4 const& texture_transform, Vector<Vertex> const& vertices,
  537. Vector<size_t> const& enabled_texture_units)
  538. {
  539. // At this point, the user has effectively specified that they are done with defining the geometry
  540. // of what they want to draw. We now need to do a few things (https://www.khronos.org/opengl/wiki/Rendering_Pipeline_Overview):
  541. //
  542. // 1. Transform all of the vertices in the current vertex list into eye space by mulitplying the model-view matrix
  543. // 2. Transform all of the vertices from eye space into clip space by multiplying by the projection matrix
  544. // 3. If culling is enabled, we cull the desired faces (https://learnopengl.com/Advanced-OpenGL/Face-culling)
  545. // 4. Each element of the vertex is then divided by w to bring the positions into NDC (Normalized Device Coordinates)
  546. // 5. The vertices are sorted (for the rasteriser, how are we doing this? 3Dfx did this top to bottom in terms of vertex y coordinates)
  547. // 6. The vertices are then sent off to the rasteriser and drawn to the screen
  548. float scr_width = m_render_target->width();
  549. float scr_height = m_render_target->height();
  550. m_triangle_list.clear_with_capacity();
  551. m_processed_triangles.clear_with_capacity();
  552. // Let's construct some triangles
  553. if (primitive_type == PrimitiveType::Triangles) {
  554. Triangle triangle;
  555. for (size_t i = 0; i < vertices.size(); i += 3) {
  556. triangle.vertices[0] = vertices.at(i);
  557. triangle.vertices[1] = vertices.at(i + 1);
  558. triangle.vertices[2] = vertices.at(i + 2);
  559. m_triangle_list.append(triangle);
  560. }
  561. } else if (primitive_type == PrimitiveType::Quads) {
  562. // We need to construct two triangles to form the quad
  563. Triangle triangle;
  564. VERIFY(vertices.size() % 4 == 0);
  565. for (size_t i = 0; i < vertices.size(); i += 4) {
  566. // Triangle 1
  567. triangle.vertices[0] = vertices.at(i);
  568. triangle.vertices[1] = vertices.at(i + 1);
  569. triangle.vertices[2] = vertices.at(i + 2);
  570. m_triangle_list.append(triangle);
  571. // Triangle 2
  572. triangle.vertices[0] = vertices.at(i + 2);
  573. triangle.vertices[1] = vertices.at(i + 3);
  574. triangle.vertices[2] = vertices.at(i);
  575. m_triangle_list.append(triangle);
  576. }
  577. } else if (primitive_type == PrimitiveType::TriangleFan) {
  578. Triangle triangle;
  579. triangle.vertices[0] = vertices.at(0); // Root vertex is always the vertex defined first
  580. for (size_t i = 1; i < vertices.size() - 1; i++) // This is technically `n-2` triangles. We start at index 1
  581. {
  582. triangle.vertices[1] = vertices.at(i);
  583. triangle.vertices[2] = vertices.at(i + 1);
  584. m_triangle_list.append(triangle);
  585. }
  586. } else if (primitive_type == PrimitiveType::TriangleStrip) {
  587. Triangle triangle;
  588. for (size_t i = 0; i < vertices.size() - 2; i++) {
  589. if (i % 2 == 0) {
  590. triangle.vertices[0] = vertices.at(i);
  591. triangle.vertices[1] = vertices.at(i + 1);
  592. triangle.vertices[2] = vertices.at(i + 2);
  593. } else {
  594. triangle.vertices[0] = vertices.at(i + 1);
  595. triangle.vertices[1] = vertices.at(i);
  596. triangle.vertices[2] = vertices.at(i + 2);
  597. }
  598. m_triangle_list.append(triangle);
  599. }
  600. }
  601. // Now let's transform each triangle and send that to the GPU
  602. auto const depth_half_range = (m_options.depth_max - m_options.depth_min) / 2;
  603. auto const depth_halfway = (m_options.depth_min + m_options.depth_max) / 2;
  604. for (auto& triangle : m_triangle_list) {
  605. // Transform vertices into eye coordinates using the model-view transform
  606. triangle.vertices[0].eye_coordinates = model_view_transform * triangle.vertices[0].position;
  607. triangle.vertices[1].eye_coordinates = model_view_transform * triangle.vertices[1].position;
  608. triangle.vertices[2].eye_coordinates = model_view_transform * triangle.vertices[2].position;
  609. // Transform eye coordinates into clip coordinates using the projection transform
  610. triangle.vertices[0].clip_coordinates = projection_transform * triangle.vertices[0].eye_coordinates;
  611. triangle.vertices[1].clip_coordinates = projection_transform * triangle.vertices[1].eye_coordinates;
  612. triangle.vertices[2].clip_coordinates = projection_transform * triangle.vertices[2].eye_coordinates;
  613. // At this point, we're in clip space
  614. // Here's where we do the clipping. This is a really crude implementation of the
  615. // https://learnopengl.com/Getting-started/Coordinate-Systems
  616. // "Note that if only a part of a primitive e.g. a triangle is outside the clipping volume OpenGL
  617. // will reconstruct the triangle as one or more triangles to fit inside the clipping range. "
  618. //
  619. // ALL VERTICES ARE DEFINED IN A CLOCKWISE ORDER
  620. // Okay, let's do some face culling first
  621. m_clipped_vertices.clear_with_capacity();
  622. m_clipped_vertices.append(triangle.vertices[0]);
  623. m_clipped_vertices.append(triangle.vertices[1]);
  624. m_clipped_vertices.append(triangle.vertices[2]);
  625. m_clipper.clip_triangle_against_frustum(m_clipped_vertices);
  626. if (m_clipped_vertices.size() < 3)
  627. continue;
  628. for (auto& vec : m_clipped_vertices) {
  629. // To normalized device coordinates (NDC)
  630. auto const one_over_w = 1 / vec.clip_coordinates.w();
  631. auto const ndc_coordinates = FloatVector4 {
  632. vec.clip_coordinates.x() * one_over_w,
  633. vec.clip_coordinates.y() * one_over_w,
  634. vec.clip_coordinates.z() * one_over_w,
  635. one_over_w,
  636. };
  637. // To window coordinates
  638. // FIXME: implement viewport functionality
  639. vec.window_coordinates = {
  640. scr_width / 2 + ndc_coordinates.x() * scr_width / 2,
  641. scr_height / 2 - ndc_coordinates.y() * scr_height / 2,
  642. depth_half_range * ndc_coordinates.z() + depth_halfway,
  643. ndc_coordinates.w(),
  644. };
  645. }
  646. Triangle tri;
  647. tri.vertices[0] = m_clipped_vertices[0];
  648. for (size_t i = 1; i < m_clipped_vertices.size() - 1; i++) {
  649. tri.vertices[1] = m_clipped_vertices[i];
  650. tri.vertices[2] = m_clipped_vertices[i + 1];
  651. m_processed_triangles.append(tri);
  652. }
  653. }
  654. for (auto& triangle : m_processed_triangles) {
  655. // Let's calculate the (signed) area of the triangle
  656. // https://cp-algorithms.com/geometry/oriented-triangle-area.html
  657. float dxAB = triangle.vertices[0].window_coordinates.x() - triangle.vertices[1].window_coordinates.x(); // A.x - B.x
  658. float dxBC = triangle.vertices[1].window_coordinates.x() - triangle.vertices[2].window_coordinates.x(); // B.X - C.x
  659. float dyAB = triangle.vertices[0].window_coordinates.y() - triangle.vertices[1].window_coordinates.y();
  660. float dyBC = triangle.vertices[1].window_coordinates.y() - triangle.vertices[2].window_coordinates.y();
  661. float area = (dxAB * dyBC) - (dxBC * dyAB);
  662. if (area == 0.0f)
  663. continue;
  664. if (m_options.enable_culling) {
  665. bool is_front = (m_options.front_face == WindingOrder::CounterClockwise ? area < 0 : area > 0);
  666. if (!is_front && m_options.cull_back)
  667. continue;
  668. if (is_front && m_options.cull_front)
  669. continue;
  670. }
  671. if (area > 0)
  672. swap(triangle.vertices[0], triangle.vertices[1]);
  673. // Transform normals
  674. triangle.vertices[0].normal = normal_transform * triangle.vertices[0].normal;
  675. triangle.vertices[1].normal = normal_transform * triangle.vertices[1].normal;
  676. triangle.vertices[2].normal = normal_transform * triangle.vertices[2].normal;
  677. if (m_options.normalization_enabled) {
  678. triangle.vertices[0].normal.normalize();
  679. triangle.vertices[1].normal.normalize();
  680. triangle.vertices[2].normal.normalize();
  681. }
  682. // Generate texture coordinates if at least one coordinate is enabled
  683. if (m_options.texcoord_generation_enabled_coordinates != TexCoordGenerationCoordinate::None) {
  684. generate_texture_coordinates(triangle.vertices[0], m_options);
  685. generate_texture_coordinates(triangle.vertices[1], m_options);
  686. generate_texture_coordinates(triangle.vertices[2], m_options);
  687. }
  688. // Apply texture transformation
  689. // FIXME: implement multi-texturing: texcoords should be stored per texture unit
  690. triangle.vertices[0].tex_coord = texture_transform * triangle.vertices[0].tex_coord;
  691. triangle.vertices[1].tex_coord = texture_transform * triangle.vertices[1].tex_coord;
  692. triangle.vertices[2].tex_coord = texture_transform * triangle.vertices[2].tex_coord;
  693. submit_triangle(triangle, enabled_texture_units);
  694. }
  695. }
  696. void Device::submit_triangle(const Triangle& triangle, Vector<size_t> const& enabled_texture_units)
  697. {
  698. rasterize_triangle(m_options, *m_render_target, *m_depth_buffer, triangle, [this, &enabled_texture_units](FloatVector4 const& uv, FloatVector4 const& color, float fog_depth) -> FloatVector4 {
  699. FloatVector4 fragment = color;
  700. for (size_t i : enabled_texture_units) {
  701. // FIXME: implement GL_TEXTURE_1D, GL_TEXTURE_3D and GL_TEXTURE_CUBE_MAP
  702. auto const& sampler = m_samplers[i];
  703. FloatVector4 texel = sampler.sample_2d({ uv.x(), uv.y() });
  704. INCREASE_STATISTICS_COUNTER(g_num_sampler_calls, 1);
  705. // FIXME: Implement more blend modes
  706. switch (sampler.config().fixed_function_texture_env_mode) {
  707. case TextureEnvMode::Modulate:
  708. fragment = fragment * texel;
  709. break;
  710. case TextureEnvMode::Replace:
  711. fragment = texel;
  712. break;
  713. case TextureEnvMode::Decal: {
  714. float src_alpha = fragment.w();
  715. float one_minus_src_alpha = 1 - src_alpha;
  716. fragment.set_x(texel.x() * src_alpha + fragment.x() * one_minus_src_alpha);
  717. fragment.set_y(texel.y() * src_alpha + fragment.y() * one_minus_src_alpha);
  718. fragment.set_z(texel.z() * src_alpha + fragment.z() * one_minus_src_alpha);
  719. break;
  720. }
  721. default:
  722. VERIFY_NOT_REACHED();
  723. }
  724. }
  725. // Calculate fog
  726. // Math from here: https://opengl-notes.readthedocs.io/en/latest/topics/texturing/aliasing.html
  727. if (m_options.fog_enabled) {
  728. float factor = 0.0f;
  729. switch (m_options.fog_mode) {
  730. case FogMode::Linear:
  731. factor = (m_options.fog_end - fog_depth) / (m_options.fog_end - m_options.fog_start);
  732. break;
  733. case FogMode::Exp:
  734. factor = expf(-m_options.fog_density * fog_depth);
  735. break;
  736. case FogMode::Exp2:
  737. factor = expf(-((m_options.fog_density * fog_depth) * (m_options.fog_density * fog_depth)));
  738. break;
  739. default:
  740. VERIFY_NOT_REACHED();
  741. }
  742. // Mix texel's RGB with fog's RBG - leave alpha alone
  743. fragment.set_x(mix(m_options.fog_color.x(), fragment.x(), factor));
  744. fragment.set_y(mix(m_options.fog_color.y(), fragment.y(), factor));
  745. fragment.set_z(mix(m_options.fog_color.z(), fragment.z(), factor));
  746. }
  747. return fragment;
  748. });
  749. }
  750. void Device::resize(const Gfx::IntSize& min_size)
  751. {
  752. wait_for_all_threads();
  753. m_render_target = Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRA8888, closest_multiple(min_size, RASTERIZER_BLOCK_SIZE)).release_value_but_fixme_should_propagate_errors();
  754. m_depth_buffer = adopt_own(*new DepthBuffer(m_render_target->size()));
  755. }
  756. void Device::clear_color(const FloatVector4& color)
  757. {
  758. wait_for_all_threads();
  759. uint8_t r = static_cast<uint8_t>(clamp(color.x(), 0.0f, 1.0f) * 255);
  760. uint8_t g = static_cast<uint8_t>(clamp(color.y(), 0.0f, 1.0f) * 255);
  761. uint8_t b = static_cast<uint8_t>(clamp(color.z(), 0.0f, 1.0f) * 255);
  762. uint8_t a = static_cast<uint8_t>(clamp(color.w(), 0.0f, 1.0f) * 255);
  763. auto const fill_color = Gfx::Color(r, g, b, a);
  764. if (m_options.scissor_enabled) {
  765. auto fill_rect = m_render_target->rect();
  766. fill_rect.intersect(scissor_box_to_window_coordinates(m_options.scissor_box, fill_rect));
  767. Gfx::Painter painter { *m_render_target };
  768. painter.fill_rect(fill_rect, fill_color);
  769. return;
  770. }
  771. m_render_target->fill(fill_color);
  772. }
  773. void Device::clear_depth(float depth)
  774. {
  775. wait_for_all_threads();
  776. if (m_options.scissor_enabled) {
  777. m_depth_buffer->clear(scissor_box_to_window_coordinates(m_options.scissor_box, m_render_target->rect()), depth);
  778. return;
  779. }
  780. m_depth_buffer->clear(depth);
  781. }
  782. void Device::blit(Gfx::Bitmap const& source, int x, int y)
  783. {
  784. wait_for_all_threads();
  785. INCREASE_STATISTICS_COUNTER(g_num_pixels, source.width() * source.height());
  786. INCREASE_STATISTICS_COUNTER(g_num_pixels_shaded, source.width() * source.height());
  787. Gfx::Painter painter { *m_render_target };
  788. painter.blit({ x, y }, source, source.rect(), 1.0f, true);
  789. }
  790. void Device::blit_to(Gfx::Bitmap& target)
  791. {
  792. wait_for_all_threads();
  793. Gfx::Painter painter { target };
  794. painter.blit({ 0, 0 }, *m_render_target, m_render_target->rect(), 1.0f, false);
  795. if constexpr (ENABLE_STATISTICS_OVERLAY)
  796. draw_statistics_overlay(target);
  797. }
  798. void Device::draw_statistics_overlay(Gfx::Bitmap& target)
  799. {
  800. static Core::ElapsedTimer timer;
  801. static String debug_string;
  802. static int frame_counter;
  803. frame_counter++;
  804. int milliseconds = 0;
  805. if (timer.is_valid())
  806. milliseconds = timer.elapsed();
  807. else
  808. timer.start();
  809. Gfx::Painter painter { target };
  810. if (milliseconds > 500) {
  811. if (g_num_pixels == 0)
  812. g_num_pixels = 1;
  813. int num_rendertarget_pixels = m_render_target->width() * m_render_target->height();
  814. StringBuilder builder;
  815. builder.append(String::formatted("Timings : {:.1}ms {:.1}FPS\n",
  816. static_cast<double>(milliseconds) / frame_counter,
  817. (milliseconds > 0) ? 1000.0 * frame_counter / milliseconds : 9999.0));
  818. builder.append(String::formatted("Triangles : {}\n", g_num_rasterized_triangles));
  819. builder.append(String::formatted("Pixels : {}, Shaded: {}%, Blended: {}%, Overdraw: {}%\n",
  820. g_num_pixels,
  821. g_num_pixels_shaded * 100 / g_num_pixels,
  822. g_num_pixels_blended * 100 / g_num_pixels_shaded,
  823. g_num_pixels_shaded * 100 / num_rendertarget_pixels - 100));
  824. builder.append(String::formatted("Sampler calls: {}\n", g_num_sampler_calls));
  825. debug_string = builder.to_string();
  826. frame_counter = 0;
  827. timer.start();
  828. }
  829. g_num_rasterized_triangles = 0;
  830. g_num_pixels = 0;
  831. g_num_pixels_shaded = 0;
  832. g_num_pixels_blended = 0;
  833. g_num_sampler_calls = 0;
  834. auto& font = Gfx::FontDatabase::default_fixed_width_font();
  835. for (int y = -1; y < 2; y++)
  836. for (int x = -1; x < 2; x++)
  837. if (x != 0 && y != 0)
  838. painter.draw_text(target.rect().translated(x + 2, y + 2), debug_string, font, Gfx::TextAlignment::TopLeft, Gfx::Color::Black);
  839. painter.draw_text(target.rect().translated(2, 2), debug_string, font, Gfx::TextAlignment::TopLeft, Gfx::Color::White);
  840. }
  841. void Device::wait_for_all_threads() const
  842. {
  843. // FIXME: Wait for all render threads to finish when multithreading is being implemented
  844. }
  845. void Device::set_options(const RasterizerOptions& options)
  846. {
  847. wait_for_all_threads();
  848. m_options = options;
  849. // FIXME: Recreate or reinitialize render threads here when multithreading is being implemented
  850. }
  851. Gfx::RGBA32 Device::get_backbuffer_pixel(int x, int y)
  852. {
  853. // FIXME: Reading individual pixels is very slow, rewrite this to transfer whole blocks
  854. if (x < 0 || y < 0 || x >= m_render_target->width() || y >= m_render_target->height())
  855. return 0;
  856. return m_render_target->scanline(y)[x];
  857. }
  858. float Device::get_depthbuffer_value(int x, int y)
  859. {
  860. // FIXME: Reading individual pixels is very slow, rewrite this to transfer whole blocks
  861. if (x < 0 || y < 0 || x >= m_render_target->width() || y >= m_render_target->height())
  862. return 1.0f;
  863. return m_depth_buffer->scanline(y)[x];
  864. }
  865. NonnullRefPtr<Image> Device::create_image(ImageFormat format, unsigned width, unsigned height, unsigned depth, unsigned levels, unsigned layers)
  866. {
  867. VERIFY(width > 0);
  868. VERIFY(height > 0);
  869. VERIFY(depth > 0);
  870. VERIFY(levels > 0);
  871. VERIFY(layers > 0);
  872. return adopt_ref(*new Image(format, width, height, depth, levels, layers));
  873. }
  874. void Device::set_sampler_config(unsigned sampler, SamplerConfig const& config)
  875. {
  876. m_samplers[sampler].set_config(config);
  877. }
  878. }