30 SimdVec3 sv0 = vec3_load(v0);
31 SimdVec3 sv1 = vec3_load(v1);
33 sv0 = vec3_normalize(sv0);
35 float dot = vec3_dot(sv0, sv1);
36 sv1 = vec3_sub(sv1, vec3_mul(sv0, dot));
37 sv1 = vec3_normalize(sv1);
39 SimdVec3 sv2 = vec3_cross(sv0, sv1);
41 return (
OrthonormalBasis){vec3_store(sv0), vec3_store(sv1), vec3_store(sv2)};
57 Mat3 V = mat3_identity();
59 const int MAX_ITERS = 32;
60 const float EPSILON = 1e-10f;
62 for (
int iter = 0; iter < MAX_ITERS; ++iter) {
64 float max = fabsf(A.m[0][1]);
66 if (fabsf(A.m[0][2]) > max) {
69 max = fabsf(A.m[0][2]);
71 if (fabsf(A.m[1][2]) > max) {
74 max = fabsf(A.m[1][2]);
77 if (max < EPSILON)
break;
79 float app = A.m[p][p];
80 float aqq = A.m[q][q];
81 float apq = A.m[p][q];
83 float phi = 0.5f * atanf((2.0f * apq) / (aqq - app + 1e-20f));
87 for (
int r = 0; r < 3; ++r) {
88 float arp = A.m[r][p];
89 float arq = A.m[r][q];
90 A.m[r][p] = c * arp - s * arq;
91 A.m[r][q] = s * arp + c * arq;
94 for (
int r = 0; r < 3; ++r) {
95 float arp = A.m[p][r];
96 float arq = A.m[q][r];
97 A.m[p][r] = c * arp - s * arq;
98 A.m[q][r] = s * arp + c * arq;
101 A.m[p][p] = c * c * app - 2 * s * c * apq + s * s * aqq;
102 A.m[q][q] = s * s * app + 2 * s * c * apq + c * c * aqq;
103 A.m[p][q] = A.m[q][p] = 0.0f;
105 for (
int r = 0; r < 3; ++r) {
106 float vrp = V.m[r][p];
107 float vrq = V.m[r][q];
108 V.m[r][p] = c * vrp - s * vrq;
109 V.m[r][q] = s * vrp + c * vrq;
123 for (
int i = 0; i < 3; ++i) {
124 for (
int j = 0; j < 3; ++j) {
126 for (
int k = 0; k < 3; ++k) {
127 ATA.m[i][j] += A.m[i][k] * A.m[j][k];
135 int order[3] = {0, 1, 2};
138 for (
int i = 0; i < 2; ++i) {
139 for (
int j = i + 1; j < 3; ++j) {
140 if (vals[order[i]] < vals[order[j]]) {
148 Vec3 sorted_eigen = {vals[order[0]], vals[order[1]], vals[order[2]]};
151 for (
int i = 0; i < 3; ++i) {
152 for (
int j = 0; j < 3; ++j) {
153 V_sorted.m[i][j] = V->m[order[i]][j];
158 S->
x = sqrtf(fmaxf(0.0f, sorted_eigen.
x));
159 S->
y = sqrtf(fmaxf(0.0f, sorted_eigen.
y));
160 S->
z = sqrtf(fmaxf(0.0f, sorted_eigen.
z));
162 SimdVec3 colA[3] = {vec3_load((
Vec3){A.m[0][0], A.m[0][1], A.m[0][2]}),
163 vec3_load((
Vec3){A.m[1][0], A.m[1][1], A.m[1][2]}),
164 vec3_load((
Vec3){A.m[2][0], A.m[2][1], A.m[2][2]})};
166 float* s_arr = (
float*)S;
167 for (
int i = 0; i < 3; ++i) {
168 float sigma = s_arr[i];
170 Vec3 v_col = {V->m[i][0], V->m[i][1], V->m[i][2]};
172 vec3_add(vec3_add(vec3_mul(colA[0], v_col.
x), vec3_mul(colA[1], v_col.
y)), vec3_mul(colA[2], v_col.
z));
173 SimdVec3 u_col = vec3_mul(Av, 1.0f / sigma);
174 Vec3 res = vec3_store(u_col);
179 U->m[i][0] = U->m[i][1] = U->m[i][2] = 0.0f;
183 if (S->
y > 1e-6f && S->
z < 1e-6f) {
184 SimdVec3 u0 = vec3_load((
Vec3){U->m[0][0], U->m[0][1], U->m[0][2]});
185 SimdVec3 u1 = vec3_load((
Vec3){U->m[1][0], U->m[1][1], U->m[1][2]});
186 SimdVec3 u2 = vec3_cross(u0, u1);
187 Vec3 res = vec3_store(u2);
193 if (mat3_determinant(*U) < 0.0f) {
194 U->m[2][0] = -U->m[2][0];
195 U->m[2][1] = -U->m[2][1];
196 U->m[2][2] = -U->m[2][2];
206 for (
int i = 0; i < 4; ++i) {
210 for (
int i = 0; i < 4; ++i) {
212 for (
int j = 0; j < i; ++j) {
213 float r = vec4_dot(q[j], v[i]);
217 q[i] = vec4_sub(q[i], vec4_mul(q[j], r));
220 float norm = vec4_length(q[i]);
221 if (norm < 1e-6f) norm = 1e-6f;
224 q[i] = vec4_mul(q[i], 1.0f / norm);
227 for (
int j = 0; j < i; ++j) {
232 for (
int i = 0; i < 4; ++i) {
240static inline void mat4_power_iteration(
Mat4 A,
Vec4* eigenvector,
float* eigenvalue,
int max_iter,
float tol) {
241 SimdVec4 v = vec4_load((
Vec4){1.0f, 0.0f, 0.0f, 0.0f});
243 float lambda_old = 0.0f;
245 for (
int iter = 0; iter < max_iter; ++iter) {
246 Vec4 v_scalar = vec4_store(v);
247 Vec4 Av_scalar = mat4_mul_vec4(A, v_scalar);
248 Av = vec4_load(Av_scalar);
250 *eigenvalue = vec4_dot(Av, v);
251 Av = vec4_normalize(Av);
253 if (fabsf(*eigenvalue - lambda_old) < tol) {
256 lambda_old = *eigenvalue;
260 *eigenvector = vec4_store(v);
263static inline float mat4_norm_frobenius(
Mat4 A) {
264 SimdVec4 c0 = {.v = A.cols[0]};
265 SimdVec4 c1 = {.v = A.cols[1]};
266 SimdVec4 c2 = {.v = A.cols[2]};
267 SimdVec4 c3 = {.v = A.cols[3]};
270 sum += vec4_dot(c0, c0);
271 sum += vec4_dot(c1, c1);
272 sum += vec4_dot(c2, c2);
273 sum += vec4_dot(c3, c3);
278static inline bool mat3_is_positive_definite(
Mat3 A) {
279 if (A.m[0][0] <= 0.0f)
return false;
280 float det2 = A.m[0][0] * A.m[1][1] - A.m[1][0] * A.m[0][1];
281 if (det2 <= 0.0f)
return false;
282 if (mat3_determinant(A) <= 0.0f)
return false;
286static inline float mat4_condition_number(
Mat4 A) {
287 float norm_A = mat4_norm_frobenius(A);
288 Mat4 A_inv = mat4_inverse(A);
289 float norm_A_inv = mat4_norm_frobenius(A_inv);
290 return norm_A * norm_A_inv;
296 const float tolerance = 1e-6f;
298 *L = mat4_identity();
299 *P = mat4_identity();
301 for (
int k = 0; k < 4; ++k) {
303 float max_val = fabsf(U->m[k][k]);
304 for (
int i = k + 1; i < 4; ++i) {
305 float val = fabsf(U->m[k][i]);
312 if (max_val < tolerance)
return false;
314 if (pivot_row != k) {
315 for (
int j = 0; j < 4; ++j) {
318 U->m[j][k] = U->m[j][pivot_row];
319 U->m[j][pivot_row] = tmp;
321 P->m[j][k] = P->m[j][pivot_row];
322 P->m[j][pivot_row] = tmp;
325 L->m[j][k] = L->m[j][pivot_row];
326 L->m[j][pivot_row] = tmp;
331 for (
int i = k + 1; i < 4; ++i) {
332 float factor = U->m[k][i] / U->m[k][k];
334 for (
int j = k; j < 4; ++j) {
335 U->m[j][i] -= factor * U->m[j][k];
342static inline Vec4 forward_substitution_mat4(
Mat4 L,
Vec4 b) {
345 x_arr[0] = b.
x / L.m[0][0];
346 x_arr[1] = (b.
y - L.m[0][1] * x_arr[0]) / L.m[1][1];
347 x_arr[2] = (b.z - (L.m[0][2] * x_arr[0] + L.m[1][2] * x_arr[1])) / L.m[2][2];
348 x_arr[3] = (b.w - (L.m[0][3] * x_arr[0] + L.m[1][3] * x_arr[1] + L.m[2][3] * x_arr[2])) / L.m[3][3];
352static inline Vec4 backward_substitution_mat4(
Mat4 U,
Vec4 b) {
355 x_arr[3] = b.
w / U.m[3][3];
356 x_arr[2] = (b.
z - U.m[3][2] * x_arr[3]) / U.m[2][2];
357 x_arr[1] = (b.y - (U.m[2][1] * x_arr[2] + U.m[3][1] * x_arr[3])) / U.m[1][1];
358 x_arr[0] = (b.x - (U.m[1][0] * x_arr[1] + U.m[2][0] * x_arr[2] + U.m[3][0] * x_arr[3])) / U.m[0][0];
364 if (!mat4_lu(A, &L, &U, &P)) {
368 Pb.
x = P.m[0][0] * b.
x + P.m[1][0] * b.
y + P.m[2][0] * b.
z + P.m[3][0] * b.
w;
369 Pb.
y = P.m[0][1] * b.
x + P.m[1][1] * b.
y + P.m[2][1] * b.
z + P.m[3][1] * b.
w;
370 Pb.
z = P.m[0][2] * b.
x + P.m[1][2] * b.
y + P.m[2][2] * b.
z + P.m[3][2] * b.
w;
371 Pb.
w = P.m[0][3] * b.
x + P.m[1][3] * b.
y + P.m[2][3] * b.
z + P.m[3][3] * b.
w;
373 Vec4 y = forward_substitution_mat4(L, Pb);
374 return backward_substitution_mat4(U, y);
379 if (!mat3_lu(A, &L, &U, &P))
return (
Vec3){0};
382 Pb.
x = P.m[0][0] * b.
x + P.m[1][0] * b.
y + P.m[2][0] * b.
z;
383 Pb.
y = P.m[0][1] * b.
x + P.m[1][1] * b.
y + P.m[2][1] * b.
z;
384 Pb.
z = P.m[0][2] * b.
x + P.m[1][2] * b.
y + P.m[2][2] * b.
z;
386 Vec3 y = forward_substitution_mat3(L, Pb);
387 return backward_substitution_mat3(U, y);
Matrix operations and mathematical utilities.
A 3x3 matrix for 2D transformations and 3D rotations.
A 4x4 matrix for 3D transformations.
3D vector storage type (12 bytes, unaligned).
4D vector storage type (16 bytes, naturally aligned).
float w
W component (also used for homogeneous coordinates)