28typedef struct ALIGN(16)
Mat3 {
39typedef struct ALIGN(16)
Mat4 {
53static inline Mat3 mat3_new_column_major(
float m00,
float m01,
float m02,
float m10,
float m11,
float m12,
float m20,
54 float m21,
float m22) {
69static inline Mat4 mat4_new_column_major(
float m00,
float m01,
float m02,
float m03,
float m10,
float m11,
float m12,
70 float m13,
float m20,
float m21,
float m22,
float m23,
float m30,
float m31,
71 float m32,
float m33) {
92static inline void mat3_print(
Mat3 mat,
const char* name) {
93 printf(
"%s = \n", name);
94 for (
int i = 0; i < 3; i++) {
96 for (
int j = 0; j < 3; j++) {
97 printf(
"%8.4f", mat.m[j][i]);
98 if (j < 2) printf(
", ");
105static inline void mat4_print(
Mat4 m,
const char* name) {
106 printf(
"%s = \n", name);
107 for (
int i = 0; i < 4; i++) {
109 for (
int j = 0; j < 4; j++) {
110 printf(
"%6.3f ", m.m[j][i]);
120static inline Mat3 mat3_identity(
void) {
return (
Mat3){{{1.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f}, {0.0f, 0.0f, 1.0f}}}; }
122static inline bool mat3_equal(
Mat3 a,
Mat3 b) {
123 static float EPSILON = 1e-6f;
124 const float* pa = &a.m[0][0];
125 const float* pb = &b.m[0][0];
128 if (!simd_equals_eps(simd_load(pa), simd_load(pb), EPSILON))
return false;
131 if (!simd_equals_eps(simd_load(pa + 4), simd_load(pb + 4), EPSILON))
return false;
134 float da = pa[8] - pb[8];
135 return fabsf(da) <= EPSILON;
138static inline bool mat4_equal(
Mat4 a,
Mat4 b) {
139 static float EPSILON = 1e-6f;
141 return simd_equals_eps(a.cols[0], b.cols[0], EPSILON) && simd_equals_eps(a.cols[1], b.cols[1], EPSILON) &&
142 simd_equals_eps(a.cols[2], b.cols[2], EPSILON) && simd_equals_eps(a.cols[3], b.cols[3], EPSILON);
145static inline Mat3 mat3_diag(
Mat3 m) {
146 return (
Mat3){{{m.m[0][0], 0.0f, 0.0f}, {0.0f, m.m[1][1], 0.0f}, {0.0f, 0.0f, m.m[2][2]}}};
149static inline Mat4 mat4_identity(
void) {
151 m.cols[0] = simd_set(1.0f, 0.0f, 0.0f, 0.0f);
152 m.cols[1] = simd_set(0.0f, 1.0f, 0.0f, 0.0f);
153 m.cols[2] = simd_set(0.0f, 0.0f, 1.0f, 0.0f);
154 m.cols[3] = simd_set(0.0f, 0.0f, 0.0f, 1.0f);
158static inline Mat4 mat4_diag(
Mat4 m) {
162 r.cols[0] = simd_set(simd_get_x(m.cols[0]), 0, 0, 0);
165 r.cols[1] = simd_set(0, m.m[1][1], 0, 0);
166 r.cols[2] = simd_set(0, 0, m.m[2][2], 0);
167 r.cols[3] = simd_set(0, 0, 0, m.m[3][3]);
178 for (
int col = 0; col < 3; col++) {
179 for (
int row = 0; row < 3; row++) {
180 result.m[col][row] = 0.0f;
181 for (
int k = 0; k < 3; k++) {
182 result.m[col][row] += a.m[k][row] * b.m[col][k];
189static inline Mat3 mat3_add_scalar(
Mat3 m,
float scalar) {
191 simd_vec_t s_vec = simd_set1(scalar);
194 const float* src = &m.m[0][0];
195 float* dst = &result.m[0][0];
197 simd_store(dst, simd_add(simd_load(src), s_vec));
198 simd_store(dst + 4, simd_add(simd_load(src + 4), s_vec));
201 dst[8] = src[8] + scalar;
207 const float* pa = &a.m[0][0];
208 const float* pb = &b.m[0][0];
209 float* pr = &result.m[0][0];
212 simd_store(pr, simd_add(simd_load(pa), simd_load(pb)));
215 simd_store(pr + 4, simd_add(simd_load(pa + 4), simd_load(pb + 4)));
218 pr[8] = pa[8] + pb[8];
222static inline Mat3 mat3_scalar_mul(
Mat3 m,
float scalar) {
224 simd_vec_t s_vec = simd_set1(scalar);
226 const float* src = &m.m[0][0];
227 float* dst = &result.m[0][0];
229 simd_store(dst, simd_mul(simd_load(src), s_vec));
230 simd_store(dst + 4, simd_mul(simd_load(src + 4), s_vec));
231 dst[8] = src[8] * scalar;
236static inline float mat3_determinant(
Mat3 m) {
238 return m.m[0][0] * (m.m[1][1] * m.m[2][2] - m.m[2][1] * m.m[1][2]) -
239 m.m[0][1] * (m.m[1][0] * m.m[2][2] - m.m[2][0] * m.m[1][2]) +
240 m.m[0][2] * (m.m[1][0] * m.m[2][1] - m.m[2][0] * m.m[1][1]);
246 const float tolerance = 1e-6f;
249 *L = mat3_identity();
250 *P = mat3_identity();
252 for (
int k = 0; k < 3; ++k) {
255 float max_val = fabsf(U->m[k][k]);
256 for (
int i = k + 1; i < 3; ++i) {
257 float val = fabsf(U->m[k][i]);
263 if (max_val < tolerance)
return false;
266 if (pivot_row != k) {
267 for (
int j = 0; j < 3; ++j) {
270 U->m[j][k] = U->m[j][pivot_row];
271 U->m[j][pivot_row] = tmp;
273 P->m[j][k] = P->m[j][pivot_row];
274 P->m[j][pivot_row] = tmp;
277 L->m[j][k] = L->m[j][pivot_row];
278 L->m[j][pivot_row] = tmp;
283 for (
int i = k + 1; i < 3; ++i) {
284 float factor = U->m[k][i] / U->m[k][k];
286 for (
int j = k; j < 3; ++j) {
287 U->m[j][i] -= factor * U->m[j][k];
294static inline Vec3 forward_substitution_mat3(
Mat3 L,
Vec3 b) {
296 x.
x = b.
x / L.m[0][0];
297 x.
y = (b.
y - L.m[0][1] * x.
x) / L.m[1][1];
298 x.
z = (b.
z - L.m[0][2] * x.
x - L.m[1][2] * x.
y) / L.m[2][2];
302static inline Vec3 backward_substitution_mat3(
Mat3 U,
Vec3 b) {
304 x.
z = b.
z / U.m[2][2];
305 x.
y = (b.
y - U.m[2][1] * x.
z) / U.m[1][1];
306 x.
x = (b.
x - U.m[1][0] * x.
y - U.m[2][0] * x.
z) / U.m[0][0];
310static inline Mat3 mat3_exp(
Mat3 A,
int terms) {
311 Mat3 result = mat3_identity();
312 Mat3 power_A = mat3_identity();
313 float factorial = 1.0f;
314 for (
int n = 1; n < terms; ++n) {
315 factorial *= (float)n;
316 power_A = mat3_mul(power_A, A);
317 Mat3 term = mat3_scalar_mul(power_A, 1.0f / factorial);
318 result = mat3_add(result, term);
326 for (
int i = 0; i < 4; i++) {
332 simd_vec_t b_col = b.cols[i];
335 simd_vec_t x = simd_splat_x(b_col);
336 simd_vec_t y = simd_splat_y(b_col);
337 simd_vec_t z = simd_splat_z(b_col);
338 simd_vec_t w = simd_splat_w(b_col);
342 simd_vec_t r = simd_mul(a.cols[0], x);
343 r = simd_add(r, simd_mul(a.cols[1], y));
344 r = simd_add(r, simd_mul(a.cols[2], z));
345 r = simd_add(r, simd_mul(a.cols[3], w));
357 simd_vec_t vx = simd_set1(v.
x);
358 simd_vec_t vy = simd_set1(v.
y);
359 simd_vec_t vz = simd_set1(v.
z);
363 simd_vec_t c0 = simd_load(&m.m[0][0]);
365 simd_vec_t c1 = simd_load(&m.m[1][0]);
367 simd_vec_t c2 = simd_load(&m.m[2][0]);
370 simd_vec_t res = simd_mul(c0, vx);
371 res = simd_add(res, simd_mul(c1, vy));
372 res = simd_add(res, simd_mul(c2, vz));
376 simd_vec_t temp = res;
388 simd_vec_t vec = simd_load((
const float*)&v);
390 simd_vec_t x = simd_splat_x(vec);
391 simd_vec_t y = simd_splat_y(vec);
392 simd_vec_t z = simd_splat_z(vec);
393 simd_vec_t w = simd_splat_w(vec);
395 simd_vec_t res = simd_mul(m.cols[0], x);
396 res = simd_add(res, simd_mul(m.cols[1], y));
397 res = simd_add(res, simd_mul(m.cols[2], z));
398 res = simd_add(res, simd_mul(m.cols[3], w));
401 simd_store((
float*)&out, res);
405static inline Mat4 mat4_div(
Mat4 a,
float b) {
407 simd_vec_t div = simd_set1(b);
408 result.cols[0] = simd_div(a.cols[0], div);
409 result.cols[1] = simd_div(a.cols[1], div);
410 result.cols[2] = simd_div(a.cols[2], div);
411 result.cols[3] = simd_div(a.cols[3], div);
419static inline Mat4 mat4_translate(
Vec3 translation) {
420 Mat4 m = mat4_identity();
421 m.cols[3] = simd_set(translation.
x, translation.
y, translation.
z, 1.0f);
425static inline Mat4 mat4_scale(
Vec3 scale) {
426 Mat4 m = mat4_identity();
427 m.cols[0] = simd_mul(m.cols[0], simd_set1(scale.
x));
428 m.cols[1] = simd_mul(m.cols[1], simd_set1(scale.
y));
429 m.cols[2] = simd_mul(m.cols[2], simd_set1(scale.
z));
433static inline Mat4 mat4_rotate_x(
float angle) {
434 float c = cosf(angle);
435 float s = sinf(angle);
436 Mat4 m = mat4_identity();
437 m.cols[1] = simd_set(0.0f, c, s, 0.0f);
438 m.cols[2] = simd_set(0.0f, -s, c, 0.0f);
442static inline Mat4 mat4_rotate_y(
float angle) {
443 float c = cosf(angle);
444 float s = sinf(angle);
445 Mat4 m = mat4_identity();
446 m.cols[0] = simd_set(c, 0.0f, -s, 0.0f);
447 m.cols[2] = simd_set(s, 0.0f, c, 0.0f);
451static inline Mat4 mat4_rotate_z(
float angle) {
452 float c = cosf(angle);
453 float s = sinf(angle);
454 Mat4 m = mat4_identity();
455 m.cols[0] = simd_set(c, s, 0.0f, 0.0f);
456 m.cols[1] = simd_set(-s, c, 0.0f, 0.0f);
460static inline Mat4 mat4_rotate(
Vec3 axis,
float angle) {
461 SimdVec3 a = vec3_normalize(vec3_load(axis));
462 float c = cosf(angle);
463 float s = sinf(angle);
466 Mat4 m = mat4_identity();
469 m.m[0][0] = t * a.x * a.x + c;
470 m.m[1][1] = t * a.y * a.y + c;
471 m.m[2][2] = t * a.z * a.z + c;
476 m.m[1][0] = t * a.x * a.y - s * a.z;
480 m.m[2][0] = t * a.x * a.z + s * a.y;
484 m.m[0][1] = t * a.x * a.y + s * a.z;
488 m.m[2][1] = t * a.y * a.z - s * a.x;
492 m.m[0][2] = t * a.x * a.z - s * a.y;
496 m.m[1][2] = t * a.y * a.z + s * a.x;
505static inline Mat4 mat4_transpose(
Mat4 m) {
515static inline float mat4_determinant(
Mat4 m) {
517 float m00 = m.m[0][0], m10 = m.m[0][1], m20 = m.m[0][2], m30 = m.m[0][3];
519 float m01 = m.m[1][0], m11 = m.m[1][1], m21 = m.m[1][2], m31 = m.m[1][3];
521 float m02 = m.m[2][0], m12 = m.m[2][1], m22 = m.m[2][2], m32 = m.m[2][3];
523 float m03 = m.m[3][0], m13 = m.m[3][1], m23 = m.m[3][2], m33 = m.m[3][3];
528 float det = m00 * (m11 * (m22 * m33 - m32 * m23) - m21 * (m12 * m33 - m32 * m13) + m31 * (m12 * m23 - m22 * m13)) -
529 m10 * (m01 * (m22 * m33 - m32 * m23) - m21 * (m02 * m33 - m32 * m03) + m31 * (m02 * m23 - m22 * m03)) +
530 m20 * (m01 * (m12 * m33 - m32 * m13) - m11 * (m02 * m33 - m32 * m03) + m31 * (m02 * m13 - m12 * m03)) -
531 m30 * (m01 * (m12 * m23 - m22 * m13) - m11 * (m02 * m23 - m22 * m03) + m21 * (m02 * m13 - m12 * m03));
536static inline Mat4 mat4_inverse(
Mat4 m) {
538 float m00 = m.m[0][0], m01 = m.m[1][0], m02 = m.m[2][0], m03 = m.m[3][0];
539 float m10 = m.m[0][1], m11 = m.m[1][1], m12 = m.m[2][1], m13 = m.m[3][1];
540 float m20 = m.m[0][2], m21 = m.m[1][2], m22 = m.m[2][2], m23 = m.m[3][2];
541 float m30 = m.m[0][3], m31 = m.m[1][3], m32 = m.m[2][3], m33 = m.m[3][3];
545 float Fac0 = m22 * m33 - m32 * m23;
546 float Fac1 = m21 * m33 - m31 * m23;
547 float Fac2 = m21 * m32 - m31 * m22;
548 float Fac3 = m20 * m33 - m30 * m23;
549 float Fac4 = m20 * m32 - m30 * m22;
550 float Fac5 = m20 * m31 - m30 * m21;
552 float Fac6 = m02 * m13 - m12 * m03;
553 float Fac7 = m01 * m13 - m11 * m03;
554 float Fac8 = m01 * m12 - m11 * m02;
555 float Fac9 = m00 * m13 - m10 * m03;
556 float Fac10 = m00 * m12 - m10 * m02;
557 float Fac11 = m00 * m11 - m10 * m01;
562 simd_vec_t col0 = simd_set((m11 * Fac0 - m12 * Fac1 + m13 * Fac2), -(m10 * Fac0 - m12 * Fac3 + m13 * Fac4),
563 (m10 * Fac1 - m11 * Fac3 + m13 * Fac5), -(m10 * Fac2 - m11 * Fac4 + m12 * Fac5));
566 simd_vec_t col1 = simd_set(-(m01 * Fac0 - m02 * Fac1 + m03 * Fac2), (m00 * Fac0 - m02 * Fac3 + m03 * Fac4),
567 -(m00 * Fac1 - m01 * Fac3 + m03 * Fac5), (m00 * Fac2 - m01 * Fac4 + m02 * Fac5));
570 simd_vec_t col2 = simd_set((m31 * Fac6 - m32 * Fac7 + m33 * Fac8), -(m30 * Fac6 - m32 * Fac9 + m33 * Fac10),
571 (m30 * Fac7 - m31 * Fac9 + m33 * Fac11), -(m30 * Fac8 - m31 * Fac10 + m32 * Fac11));
574 simd_vec_t col3 = simd_set(-(m21 * Fac6 - m22 * Fac7 + m23 * Fac8), (m20 * Fac6 - m22 * Fac9 + m23 * Fac10),
575 -(m20 * Fac7 - m21 * Fac9 + m23 * Fac11), (m20 * Fac8 - m21 * Fac10 + m22 * Fac11));
580 simd_vec_t input_col0 = simd_set(m00, m10, m20, m30);
581 float det = simd_dot4(input_col0, col0);
584 if (fabsf(det) < 1e-8f) {
585 return mat4_identity();
589 simd_vec_t inv_det_vec = simd_set1(1.0f / det);
592 inv.cols[0] = simd_mul(col0, inv_det_vec);
593 inv.cols[1] = simd_mul(col1, inv_det_vec);
594 inv.cols[2] = simd_mul(col2, inv_det_vec);
595 inv.cols[3] = simd_mul(col3, inv_det_vec);
604static inline Mat4 mat4_ortho(
float left,
float right,
float bottom,
float top,
float near,
float far) {
605 Mat4 m = mat4_identity();
607 m.cols[0] = simd_set(2.0f / (right - left), 0, 0, 0);
608 m.cols[1] = simd_set(0, 2.0f / (top - bottom), 0, 0);
609 m.cols[2] = simd_set(0, 0, -2.0f / (far - near), 0);
612 m.cols[3] = simd_set(-(right + left) / (right - left), -(top + bottom) / (top - bottom),
613 -(far + near) / (far - near), 1.0f);
617static inline Mat4 mat4_perspective(
float fov_radians,
float aspect,
float near,
float far) {
618 float tan_half_fov = tanf(fov_radians / 2.0f);
620 m.cols[0] = simd_set(1.0f / (aspect * tan_half_fov), 0, 0, 0);
621 m.cols[1] = simd_set(0, 1.0f / tan_half_fov, 0, 0);
622 m.cols[2] = simd_set(0, 0, -(far + near) / (far - near), -1.0f);
623 m.cols[3] = simd_set(0, 0, -(2.0f * far * near) / (far - near), 0.0f);
627static inline Mat4 mat4_look_at(SimdVec3 eye, SimdVec3 target, SimdVec3 up) {
628 SimdVec3 z = vec3_normalize(vec3_sub(eye, target));
629 SimdVec3 x = vec3_normalize(vec3_cross(up, z));
630 SimdVec3 y = vec3_cross(z, x);
633 view.cols[0] = simd_set(x.x, y.x, z.x, 0.0f);
634 view.cols[1] = simd_set(x.y, y.y, z.y, 0.0f);
635 view.cols[2] = simd_set(x.z, y.z, z.z, 0.0f);
638 view.cols[3] = simd_set(-vec3_dot(x, eye), -vec3_dot(y, eye), -vec3_dot(z, eye), 1.0f);
644 res.cols[0] = simd_add(a.cols[0], b.cols[0]);
645 res.cols[1] = simd_add(a.cols[1], b.cols[1]);
646 res.cols[2] = simd_add(a.cols[2], b.cols[2]);
647 res.cols[3] = simd_add(a.cols[3], b.cols[3]);
653 res.cols[0] = simd_sub(a.cols[0], b.cols[0]);
654 res.cols[1] = simd_sub(a.cols[1], b.cols[1]);
655 res.cols[2] = simd_sub(a.cols[2], b.cols[2]);
656 res.cols[3] = simd_sub(a.cols[3], b.cols[3]);
660static inline Mat4 mat4_scalar_mul(
Mat4 a,
float s) {
662 simd_vec_t v = simd_set1(s);
663 res.cols[0] = simd_mul(a.cols[0], v);
664 res.cols[1] = simd_mul(a.cols[1], v);
665 res.cols[2] = simd_mul(a.cols[2], v);
666 res.cols[3] = simd_mul(a.cols[3], v);
#define simd_transpose4(r0, r1, r2, r3)
Transposes a 4x4 matrix defined by 4 row/col vectors in-place.
A 3x3 matrix for 2D transformations and 3D rotations.
A 4x4 matrix for 3D transformations.
3D vector storage type (12 bytes, unaligned).
4D vector storage type (16 bytes, naturally aligned).