solidc
Robust collection of general-purpose cross-platform C libraries and data structures designed for rapid and safe development in C
Loading...
Searching...
No Matches
vec.h
1
23#ifndef __VEC_SIMD_H__
24#define __VEC_SIMD_H__
25
26#include <stdio.h>
27#ifdef __cplusplus
28extern "C" {
29#endif
30
31#include "simd.h"
32
33#include <math.h>
34#include <stdbool.h>
35#include <stdint.h>
36#include <string.h>
37
38/* ==================================================
39 Storage Types (Unaligned, Standard C Layout)
40 ================================================== */
41
52typedef struct Vec2 {
53 float x;
54 float y;
55} Vec2;
56
63typedef struct Vec3 {
64 float x;
65 float y;
66 float z;
67} Vec3;
68
75typedef struct Vec4 {
76 float x;
77 float y;
78 float z;
79 float w;
80} Vec4;
81
82/* ==================================================
83 Compute Types (128-bit Aligned, Register Mapped)
84 ================================================== */
85
103typedef union ALIGN(16) SimdVec2 {
104 simd_vec_t v;
105 float f32[4];
106 struct {
107 float x;
108 float y;
109 };
110} SimdVec2;
111
124typedef union ALIGN(16) SimdVec3 {
125 simd_vec_t v;
126 float f32[4];
127 struct {
128 float x;
129 float y;
130 float z;
131 float w;
132 };
133} SimdVec3;
134
150typedef union ALIGN(16) SimdVec4 {
151 simd_vec_t v;
152 float f32[4];
153 struct {
154 float x;
155 float y;
156 float z;
157 float w;
158 };
159} SimdVec4;
160
161/* ==================================================
162 SimdVec2 Operations
163 ================================================== */
164
170static inline void vec2_print(const Vec2 v, const char* name) {
171 if (name) {
172 printf("%s: ", name);
173 }
174 printf("Vec2(%f, %f)\n", v.x, v.y);
175}
176
182static inline void vec3_print(const Vec3 v, const char* name) {
183 if (name) {
184 printf("%s: ", name);
185 }
186 printf("Vec3(%.4f, %.4f, %.4f)\n", v.x, v.y, v.z);
187}
188
194static inline void vec3_print_ex(const Vec3 v, const char* name) {
195 if (name) {
196 printf("%s: ", name);
197 }
198 printf("Vec3(%f, %f, %f)\n", v.x, v.y, v.z);
199}
200
206static inline void vec4_print(const Vec4 v, const char* name) {
207 if (name) {
208 printf("%s: ", name);
209 }
210 printf("Vec4(%.4f, %.4f, %.4f, %.4f)\n", v.x, v.y, v.z, v.w);
211}
212
225static inline SimdVec2 vec2_load(Vec2 v) {
226 SimdVec2 res;
227 // Set Z/W to 0.0f to ensure dot products and other operations
228 // don't accumulate garbage from uninitialized memory
229 res.v = simd_set(v.x, v.y, 0.0f, 0.0f);
230 return res;
231}
232
244static inline Vec2 vec2_store(SimdVec2 v) {
245 // Direct scalar access from union is cleaner than simd_store for just 2 floats
246 return (Vec2){v.x, v.y};
247}
248
260static inline SimdVec2 vec2_add(SimdVec2 a, SimdVec2 b) { return (SimdVec2){.v = simd_add(a.v, b.v)}; }
261
271static inline SimdVec2 vec2_sub(SimdVec2 a, SimdVec2 b) { return (SimdVec2){.v = simd_sub(a.v, b.v)}; }
272
284static inline SimdVec2 vec2_mul(SimdVec2 a, float s) { return (SimdVec2){.v = simd_mul(a.v, simd_set1(s))}; }
285
299static inline float vec2_dot(SimdVec2 a, SimdVec2 b) {
300 // 2D dot can use 3D dot if Z=0 (which we ensure on load),
301 // but scalar extraction is more efficient for just 2 multiplies.
302 // SIMD multiply then scalar extraction:
303 // simd_vec_t mul = simd_mul(a.v, b.v);
304 // However, direct scalar access is simpler:
305 return (a.x * b.x) + (a.y * b.y);
306}
307
319static inline float vec2_length_sq(SimdVec2 v) { return vec2_dot(v, v); }
320
332static inline float vec2_length(SimdVec2 v) { return sqrtf(vec2_length_sq(v)); }
333
348static inline SimdVec2 vec2_normalize(SimdVec2 v) {
349 // Reuse simd_normalize3 since Z=0. This uses rsqrt or proper sqrt
350 // depending on platform and gives us proper normalization.
351 return (SimdVec2){.v = simd_normalize3(v.v)};
352}
353
375static inline SimdVec2 vec2_rotate(SimdVec2 v, float angle) {
376 float c = cosf(angle);
377 float s = sinf(angle);
378
379 // Broadcast components to all lanes for vectorized linear combination
380 simd_vec_t x = simd_splat_x(v.v);
381 simd_vec_t y = simd_splat_y(v.v);
382
383 // Rotation matrix columns:
384 // col0 = {c, s, 0, 0} - maps X component
385 // col1 = {-s, c, 0, 0} - maps Y component
386 simd_vec_t c0 = simd_set(c, s, 0.0f, 0.0f);
387 simd_vec_t c1 = simd_set(-s, c, 0.0f, 0.0f);
388
389 // result = x * col0 + y * col1
390 return (SimdVec2){.v = simd_add(simd_mul(x, c0), simd_mul(y, c1))};
391}
392
403static inline float vec2_distance_sq(SimdVec2 a, SimdVec2 b) { return vec2_length_sq(vec2_sub(b, a)); }
404
412static inline float vec2_distance(SimdVec2 a, SimdVec2 b) { return sqrtf(vec2_distance_sq(a, b)); }
413
424static inline SimdVec2 vec2_lerp(SimdVec2 a, SimdVec2 b, float t) {
425 // Result = a + (b - a) * t
426 SimdVec2 diff = vec2_sub(b, a);
427 SimdVec2 part = vec2_mul(diff, t);
428 return vec2_add(a, part);
429}
430
435static inline SimdVec2 vec2_project(SimdVec2 a, SimdVec2 b) {
436 float b_len_sq = vec2_length_sq(b);
437 if (b_len_sq < 1e-6f) return (SimdVec2){{0}}; // Handle zero-length B
438
439 float scale = vec2_dot(a, b) / b_len_sq;
440 return vec2_mul(b, scale);
441}
442
447static inline SimdVec2 vec2_reject(SimdVec2 a, SimdVec2 b) { return vec2_sub(a, vec2_project(a, b)); }
448
453static inline SimdVec2 vec2_perpendicular(SimdVec2 v) {
454 // 2D Perp is just swapping X and Y and negating one.
455 // We can use simd_set for clarity or swizzle macros if defined.
456 // X' = -Y, Y' = X
457 return (SimdVec2){.v = simd_set(-v.y, v.x, 0.0f, 0.0f)};
458}
459
460/* ==================================================
461 SimdVec3 Operations
462 ================================================== */
463
473static inline SimdVec3 vec3_load(Vec3 v) {
474 SimdVec3 res;
475 // Set W to 0.0f to make accidental dot4/hadd operations safe
476 res.v = simd_set(v.x, v.y, v.z, 0.0f);
477 return res;
478}
479
488static inline Vec3 vec3_store(SimdVec3 v) { return (Vec3){v.x, v.y, v.z}; }
489
499static inline SimdVec3 vec3_add(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_add(a.v, b.v)}; }
500
510static inline SimdVec3 vec3_sub(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_sub(a.v, b.v)}; }
511
521static inline SimdVec3 vec3_mul(SimdVec3 a, float s) { return (SimdVec3){.v = simd_mul(a.v, simd_set1(s))}; }
522
536static inline SimdVec3 vec3_scale(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_mul(a.v, b.v)}; }
537
553static inline float vec3_dot(SimdVec3 a, SimdVec3 b) { return simd_dot3(a.v, b.v); }
554
578static inline SimdVec3 vec3_cross(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_cross(a.v, b.v)}; }
579
592static inline float vec3_length_sq(SimdVec3 v) { return simd_length_sq3(v.v); }
593
602static inline float vec3_length(SimdVec3 v) { return simd_length3(v.v); }
603
617static inline SimdVec3 vec3_normalize(SimdVec3 v) { return (SimdVec3){.v = simd_normalize3(v.v)}; }
618
640static inline SimdVec3 vec3_normalize_fast(SimdVec3 v) { return (SimdVec3){.v = simd_normalize3_fast(v.v)}; }
641
649static inline float vec3_distance_sq(SimdVec3 a, SimdVec3 b) { return vec3_length_sq(vec3_sub(b, a)); }
650
658static inline float vec3_distance(SimdVec3 a, SimdVec3 b) { return sqrtf(vec3_distance_sq(a, b)); }
659
670static inline SimdVec3 vec3_lerp(SimdVec3 a, SimdVec3 b, float t) {
671 SimdVec3 diff = vec3_sub(b, a);
672 return vec3_add(a, vec3_mul(diff, t));
673}
674
684static inline SimdVec3 vec3_project(SimdVec3 a, SimdVec3 b) {
685 float b_len_sq = vec3_length_sq(b);
686 if (b_len_sq < 1e-6f) return (SimdVec3){{0}};
687
688 float scale = vec3_dot(a, b) / b_len_sq;
689 return vec3_mul(b, scale);
690}
691
701static inline SimdVec3 vec3_reject(SimdVec3 a, SimdVec3 b) { return vec3_sub(a, vec3_project(a, b)); }
702
715static inline SimdVec3 vec3_perpendicular(SimdVec3 v) {
716 // Strategy: Cross v with the world axis it is LEAST parallel to.
717 // If |v.y| < 0.9, cross with Y-axis (0,1,0).
718 // Otherwise cross with X-axis (1,0,0).
719
720 // We access scalars here because conditional logic is cleaner in scalar
721 // than trying to construct a branchless SIMD mask for this specific logic.
722 SimdVec3 axis;
723 if (fabsf(v.y) < 0.99f) {
724 axis = vec3_load((Vec3){0.0f, 1.0f, 0.0f}); // Up
725 } else {
726 axis = vec3_load((Vec3){1.0f, 0.0f, 0.0f}); // Right
727 }
728
729 return vec3_normalize(vec3_cross(v, axis));
730}
731
732/* ==================================================
733 SimdVec4 Operations
734 ================================================== */
735
745static inline SimdVec4 vec4_load(Vec4 v) {
746 SimdVec4 res;
747 res.v = simd_set(v.x, v.y, v.z, v.w);
748 return res;
749}
750
757static inline Vec4 vec4_store(SimdVec4 v) { return (Vec4){v.x, v.y, v.z, v.w}; }
758
767static inline float vec4_length(SimdVec4 v) { return simd_length4(v.v); }
768
777static inline float vec4_length_sq(SimdVec4 v) { return simd_length_sq4(v.v); }
778
786static inline SimdVec4 vec4_add(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_add(a.v, b.v)}; }
787
795static inline SimdVec4 vec4_sub(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_sub(a.v, b.v)}; }
796
804static inline SimdVec4 vec4_mul(SimdVec4 a, float s) { return (SimdVec4){.v = simd_mul(a.v, simd_set1(s))}; }
805
816static inline SimdVec4 vec4_div(SimdVec4 a, float s) {
817 // Check against a small epsilon to avoid Division by Zero
818 if (fabsf(s) < 1e-8f) {
819 return (SimdVec4){.v = simd_set_zero()};
820 }
821
822 // Multiplication by reciprocal is faster than division
823 return vec4_mul(a, 1.0f / s);
824}
825
838static inline float vec4_dot(SimdVec4 a, SimdVec4 b) { return simd_dot4(a.v, b.v); }
839
849static inline SimdVec4 vec4_scale(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_mul(a.v, b.v)}; }
850
859static inline SimdVec4 vec4_normalize(SimdVec4 a) { return (SimdVec4){.v = simd_normalize4(a.v)}; }
860
861/* ==================================================
862 Rotations (Optimized)
863 ================================================== */
864
891static inline SimdVec4 vec4_rotate_x(SimdVec4 v, float angle) {
892 float c = cosf(angle);
893 float s = sinf(angle);
894
895 // Apply rotation matrix to Y and Z components
896 // X and W are preserved
897 float ny = v.y * c - v.z * s;
898 float nz = v.y * s + v.z * c;
899
900 return (SimdVec4){.v = simd_set(v.x, ny, nz, v.w)};
901}
902
924static inline SimdVec4 vec4_rotate_y(SimdVec4 v, float angle) {
925 float c = cosf(angle);
926 float s = sinf(angle);
927
928 // Apply rotation matrix to X and Z components
929 // Y and W are preserved
930 float nx = v.x * c + v.z * s;
931 float nz = -v.x * s + v.z * c;
932
933 return (SimdVec4){.v = simd_set(nx, v.y, nz, v.w)};
934}
935
959static inline SimdVec4 vec4_rotate_z(SimdVec4 v, float angle) {
960 float c = cosf(angle);
961 float s = sinf(angle);
962
963 // Apply rotation matrix to X and Y components
964 // Z and W are preserved
965 float nx = v.x * c - v.y * s;
966 float ny = v.x * s + v.y * c;
967
968 return (SimdVec4){.v = simd_set(nx, ny, v.z, v.w)};
969}
970
971/* ==================================================
972 Utility Functions
973 ================================================== */
974
993static inline bool vec3_equals(Vec3 a, Vec3 b, float epsilon) {
994 // Load with matching Z/W handling to ensure comparison works
995 SimdVec3 sa = vec3_load(a);
996 SimdVec3 sb = vec3_load(b);
997 return simd_equals_eps(sa.v, sb.v, epsilon);
998}
999
1013static inline bool vec4_equals(Vec4 a, Vec4 b, float epsilon) {
1014 SimdVec4 sa = vec4_load(a);
1015 SimdVec4 sb = vec4_load(b);
1016 return simd_equals_eps(sa.v, sb.v, epsilon);
1017}
1018
1026static inline float vec4_distance_sq(SimdVec4 a, SimdVec4 b) { return vec4_length_sq(vec4_sub(b, a)); }
1027
1035static inline float vec4_distance(SimdVec4 a, SimdVec4 b) { return sqrtf(vec4_distance_sq(a, b)); }
1036
1047static inline SimdVec4 vec4_lerp(SimdVec4 a, SimdVec4 b, float t) {
1048 SimdVec4 diff = vec4_sub(b, a);
1049 return vec4_add(a, vec4_mul(diff, t));
1050}
1051
1061static inline SimdVec4 vec4_project(SimdVec4 a, SimdVec4 b) {
1062 float b_len_sq = vec4_length_sq(b);
1063 if (b_len_sq < 1e-6f) return (SimdVec4){{0}};
1064
1065 float scale = vec4_dot(a, b) / b_len_sq;
1066 return vec4_mul(b, scale);
1067}
1068
1078static inline SimdVec4 vec4_reject(SimdVec4 a, SimdVec4 b) { return vec4_sub(a, vec4_project(a, b)); }
1079
1083static inline SimdVec4 vec4_min(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_min(a.v, b.v)}; }
1084
1088static inline SimdVec4 vec4_max(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_max(a.v, b.v)}; }
1089
1093static inline SimdVec4 vec4_abs(SimdVec4 v) { return (SimdVec4){.v = simd_abs(v.v)}; }
1094
1098static inline float vec4_sum(SimdVec4 v) { return simd_hadd(v.v); }
1099
1100#ifdef __cplusplus
1101}
1102#endif
1103
1104#endif // __VEC_SIMD_H__
Portable Single Instruction Multiple Data (SIMD) Intrinsics Wrapper.
2D vector storage type (8 bytes, unaligned).
Definition vec.h:52
float y
Y component.
Definition vec.h:54
float x
X component.
Definition vec.h:53
3D vector storage type (12 bytes, unaligned).
Definition vec.h:63
float x
X component.
Definition vec.h:64
float z
Z component.
Definition vec.h:66
float y
Y component.
Definition vec.h:65
4D vector storage type (16 bytes, naturally aligned).
Definition vec.h:75
float y
Y component.
Definition vec.h:77
float w
W component (also used for homogeneous coordinates)
Definition vec.h:79
float x
X component.
Definition vec.h:76
float z
Z component.
Definition vec.h:78