solidc/_2home_2runner_2work_2solidc_2solidc_2include_2vec_8h-example.html

#ifndef __VEC_SIMD_H__

#define __VEC_SIMD_H__


#include <stdio.h>

#ifdef __cplusplus

extern "C" {

#endif


#include "simd.h"


#include <math.h>

#include <stdbool.h>

#include <stdint.h>

#include <string.h>


/* ==================================================

   Storage Types (Unaligned, Standard C Layout)

   ================================================== */


typedef struct Vec2 {

    float x;

    float y;

} Vec2;


typedef struct Vec3 {

    float x;

    float y;

    float z;

} Vec3;


typedef struct Vec4 {

    float x;

    float y;

    float z;

    float w;

} Vec4;


/* ==================================================

   Compute Types (128-bit Aligned, Register Mapped)

   ================================================== */


typedef union ALIGN(16) SimdVec2 {

    simd_vec_t v;

    float f32[4];

    struct {

        float x;

        float y;

    };

} SimdVec2;


typedef union ALIGN(16) SimdVec3 {

    simd_vec_t v;

    float f32[4];

    struct {

        float x;

        float y;

        float z;

        float w;

    };

} SimdVec3;


typedef union ALIGN(16) SimdVec4 {

    simd_vec_t v;

    float f32[4];

    struct {

        float x;

        float y;

        float z;

        float w;

    };

} SimdVec4;


/* ==================================================

   SimdVec2 Operations

   ================================================== */


static inline void vec2_print(const Vec2 v, const char* name) {

    if (name) {

        printf("%s: ", name);

    }

    printf("Vec2(%f, %f)\n", v.x, v.y);

}


static inline void vec3_print(const Vec3 v, const char* name) {

    if (name) {

        printf("%s: ", name);

    }

    printf("Vec3(%.4f, %.4f, %.4f)\n", v.x, v.y, v.z);

}


static inline void vec3_print_ex(const Vec3 v, const char* name) {

    if (name) {

        printf("%s: ", name);

    }

    printf("Vec3(%f, %f, %f)\n", v.x, v.y, v.z);

}


static inline void vec4_print(const Vec4 v, const char* name) {

    if (name) {

        printf("%s: ", name);

    }

    printf("Vec4(%.4f, %.4f, %.4f, %.4f)\n", v.x, v.y, v.z, v.w);

}


static inline SimdVec2 vec2_load(Vec2 v) {

    SimdVec2 res;

    // Set Z/W to 0.0f to ensure dot products and other operations

    // don't accumulate garbage from uninitialized memory

    res.v = simd_set(v.x, v.y, 0.0f, 0.0f);

    return res;

}


static inline Vec2 vec2_store(SimdVec2 v) {

    // Direct scalar access from union is cleaner than simd_store for just 2 floats

    return (Vec2){v.x, v.y};

}


static inline SimdVec2 vec2_add(SimdVec2 a, SimdVec2 b) { return (SimdVec2){.v = simd_add(a.v, b.v)}; }


static inline SimdVec2 vec2_sub(SimdVec2 a, SimdVec2 b) { return (SimdVec2){.v = simd_sub(a.v, b.v)}; }


static inline SimdVec2 vec2_mul(SimdVec2 a, float s) { return (SimdVec2){.v = simd_mul(a.v, simd_set1(s))}; }


static inline float vec2_dot(SimdVec2 a, SimdVec2 b) {

    // 2D dot can use 3D dot if Z=0 (which we ensure on load),

    // but scalar extraction is more efficient for just 2 multiplies.

    // SIMD multiply then scalar extraction:

    // simd_vec_t mul = simd_mul(a.v, b.v);

    // However, direct scalar access is simpler:

    return (a.x * b.x) + (a.y * b.y);

}


static inline float vec2_length_sq(SimdVec2 v) { return vec2_dot(v, v); }


static inline float vec2_length(SimdVec2 v) { return sqrtf(vec2_length_sq(v)); }


static inline SimdVec2 vec2_normalize(SimdVec2 v) {

    // Reuse simd_normalize3 since Z=0. This uses rsqrt or proper sqrt

    // depending on platform and gives us proper normalization.

    return (SimdVec2){.v = simd_normalize3(v.v)};

}


static inline SimdVec2 vec2_rotate(SimdVec2 v, float angle) {

    float c = cosf(angle);

    float s = sinf(angle);


    // Broadcast components to all lanes for vectorized linear combination

    simd_vec_t x = simd_splat_x(v.v);

    simd_vec_t y = simd_splat_y(v.v);


    // Rotation matrix columns:

    // col0 = {c, s, 0, 0}  - maps X component

    // col1 = {-s, c, 0, 0} - maps Y component

    simd_vec_t c0 = simd_set(c, s, 0.0f, 0.0f);

    simd_vec_t c1 = simd_set(-s, c, 0.0f, 0.0f);


    // result = x * col0 + y * col1

    return (SimdVec2){.v = simd_add(simd_mul(x, c0), simd_mul(y, c1))};

}


static inline float vec2_distance_sq(SimdVec2 a, SimdVec2 b) { return vec2_length_sq(vec2_sub(b, a)); }


static inline float vec2_distance(SimdVec2 a, SimdVec2 b) { return sqrtf(vec2_distance_sq(a, b)); }


static inline SimdVec2 vec2_lerp(SimdVec2 a, SimdVec2 b, float t) {

    // Result = a + (b - a) * t

    SimdVec2 diff = vec2_sub(b, a);

    SimdVec2 part = vec2_mul(diff, t);

    return vec2_add(a, part);

}


static inline SimdVec2 vec2_project(SimdVec2 a, SimdVec2 b) {

    float b_len_sq = vec2_length_sq(b);

    if (b_len_sq < 1e-6f) return (SimdVec2){{0}};  // Handle zero-length B


    float scale = vec2_dot(a, b) / b_len_sq;

    return vec2_mul(b, scale);

}


static inline SimdVec2 vec2_reject(SimdVec2 a, SimdVec2 b) { return vec2_sub(a, vec2_project(a, b)); }


static inline SimdVec2 vec2_perpendicular(SimdVec2 v) {

    // 2D Perp is just swapping X and Y and negating one.

    // We can use simd_set for clarity or swizzle macros if defined.

    // X' = -Y, Y' = X

    return (SimdVec2){.v = simd_set(-v.y, v.x, 0.0f, 0.0f)};

}


/* ==================================================

   SimdVec3 Operations

   ================================================== */


static inline SimdVec3 vec3_load(Vec3 v) {

    SimdVec3 res;

    // Set W to 0.0f to make accidental dot4/hadd operations safe

    res.v = simd_set(v.x, v.y, v.z, 0.0f);

    return res;

}


static inline Vec3 vec3_store(SimdVec3 v) { return (Vec3){v.x, v.y, v.z}; }


static inline SimdVec3 vec3_add(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_add(a.v, b.v)}; }


static inline SimdVec3 vec3_sub(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_sub(a.v, b.v)}; }


static inline SimdVec3 vec3_mul(SimdVec3 a, float s) { return (SimdVec3){.v = simd_mul(a.v, simd_set1(s))}; }


static inline SimdVec3 vec3_scale(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_mul(a.v, b.v)}; }


static inline float vec3_dot(SimdVec3 a, SimdVec3 b) { return simd_dot3(a.v, b.v); }


static inline SimdVec3 vec3_cross(SimdVec3 a, SimdVec3 b) { return (SimdVec3){.v = simd_cross(a.v, b.v)}; }


static inline float vec3_length_sq(SimdVec3 v) { return simd_length_sq3(v.v); }


static inline float vec3_length(SimdVec3 v) { return simd_length3(v.v); }


static inline SimdVec3 vec3_normalize(SimdVec3 v) { return (SimdVec3){.v = simd_normalize3(v.v)}; }


static inline SimdVec3 vec3_normalize_fast(SimdVec3 v) { return (SimdVec3){.v = simd_normalize3_fast(v.v)}; }


static inline float vec3_distance_sq(SimdVec3 a, SimdVec3 b) { return vec3_length_sq(vec3_sub(b, a)); }


static inline float vec3_distance(SimdVec3 a, SimdVec3 b) { return sqrtf(vec3_distance_sq(a, b)); }


static inline SimdVec3 vec3_lerp(SimdVec3 a, SimdVec3 b, float t) {

    SimdVec3 diff = vec3_sub(b, a);

    return vec3_add(a, vec3_mul(diff, t));

}


static inline SimdVec3 vec3_project(SimdVec3 a, SimdVec3 b) {

    float b_len_sq = vec3_length_sq(b);

    if (b_len_sq < 1e-6f) return (SimdVec3){{0}};


    float scale = vec3_dot(a, b) / b_len_sq;

    return vec3_mul(b, scale);

}


static inline SimdVec3 vec3_reject(SimdVec3 a, SimdVec3 b) { return vec3_sub(a, vec3_project(a, b)); }


static inline SimdVec3 vec3_perpendicular(SimdVec3 v) {

    // Strategy: Cross v with the world axis it is LEAST parallel to.

    // If |v.y| < 0.9, cross with Y-axis (0,1,0).

    // Otherwise cross with X-axis (1,0,0).


    // We access scalars here because conditional logic is cleaner in scalar

    // than trying to construct a branchless SIMD mask for this specific logic.

    SimdVec3 axis;

    if (fabsf(v.y) < 0.99f) {

        axis = vec3_load((Vec3){0.0f, 1.0f, 0.0f});  // Up

    } else {

        axis = vec3_load((Vec3){1.0f, 0.0f, 0.0f});  // Right

    }


    return vec3_normalize(vec3_cross(v, axis));

}


/* ==================================================

   SimdVec4 Operations

   ================================================== */


static inline SimdVec4 vec4_load(Vec4 v) {

    SimdVec4 res;

    res.v = simd_set(v.x, v.y, v.z, v.w);

    return res;

}


static inline Vec4 vec4_store(SimdVec4 v) { return (Vec4){v.x, v.y, v.z, v.w}; }


static inline float vec4_length(SimdVec4 v) { return simd_length4(v.v); }


static inline float vec4_length_sq(SimdVec4 v) { return simd_length_sq4(v.v); }


static inline SimdVec4 vec4_add(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_add(a.v, b.v)}; }


static inline SimdVec4 vec4_sub(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_sub(a.v, b.v)}; }


static inline SimdVec4 vec4_mul(SimdVec4 a, float s) { return (SimdVec4){.v = simd_mul(a.v, simd_set1(s))}; }


static inline SimdVec4 vec4_div(SimdVec4 a, float s) {

    // Check against a small epsilon to avoid Division by Zero

    if (fabsf(s) < 1e-8f) {

        return (SimdVec4){.v = simd_set_zero()};

    }


    // Multiplication by reciprocal is faster than division

    return vec4_mul(a, 1.0f / s);

}


static inline float vec4_dot(SimdVec4 a, SimdVec4 b) { return simd_dot4(a.v, b.v); }


static inline SimdVec4 vec4_scale(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_mul(a.v, b.v)}; }


static inline SimdVec4 vec4_normalize(SimdVec4 a) { return (SimdVec4){.v = simd_normalize4(a.v)}; }


/* ==================================================

   Rotations (Optimized)

   ================================================== */


static inline SimdVec4 vec4_rotate_x(SimdVec4 v, float angle) {

    float c = cosf(angle);

    float s = sinf(angle);


    // Apply rotation matrix to Y and Z components

    // X and W are preserved

    float ny = v.y * c - v.z * s;

    float nz = v.y * s + v.z * c;


    return (SimdVec4){.v = simd_set(v.x, ny, nz, v.w)};

}


static inline SimdVec4 vec4_rotate_y(SimdVec4 v, float angle) {

    float c = cosf(angle);

    float s = sinf(angle);


    // Apply rotation matrix to X and Z components

    // Y and W are preserved

    float nx = v.x * c + v.z * s;

    float nz = -v.x * s + v.z * c;


    return (SimdVec4){.v = simd_set(nx, v.y, nz, v.w)};

}


static inline SimdVec4 vec4_rotate_z(SimdVec4 v, float angle) {

    float c = cosf(angle);

    float s = sinf(angle);


    // Apply rotation matrix to X and Y components

    // Z and W are preserved

    float nx = v.x * c - v.y * s;

    float ny = v.x * s + v.y * c;


    return (SimdVec4){.v = simd_set(nx, ny, v.z, v.w)};

}


/* ==================================================

   Utility Functions

   ================================================== */


static inline bool vec3_equals(Vec3 a, Vec3 b, float epsilon) {

    // Load with matching Z/W handling to ensure comparison works

    SimdVec3 sa = vec3_load(a);

    SimdVec3 sb = vec3_load(b);

    return simd_equals_eps(sa.v, sb.v, epsilon);

}


static inline bool vec4_equals(Vec4 a, Vec4 b, float epsilon) {

    SimdVec4 sa = vec4_load(a);

    SimdVec4 sb = vec4_load(b);

    return simd_equals_eps(sa.v, sb.v, epsilon);

}


static inline float vec4_distance_sq(SimdVec4 a, SimdVec4 b) { return vec4_length_sq(vec4_sub(b, a)); }


static inline float vec4_distance(SimdVec4 a, SimdVec4 b) { return sqrtf(vec4_distance_sq(a, b)); }


static inline SimdVec4 vec4_lerp(SimdVec4 a, SimdVec4 b, float t) {

    SimdVec4 diff = vec4_sub(b, a);

    return vec4_add(a, vec4_mul(diff, t));

}


static inline SimdVec4 vec4_project(SimdVec4 a, SimdVec4 b) {

    float b_len_sq = vec4_length_sq(b);

    if (b_len_sq < 1e-6f) return (SimdVec4){{0}};


    float scale = vec4_dot(a, b) / b_len_sq;

    return vec4_mul(b, scale);

}


static inline SimdVec4 vec4_reject(SimdVec4 a, SimdVec4 b) { return vec4_sub(a, vec4_project(a, b)); }


static inline SimdVec4 vec4_min(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_min(a.v, b.v)}; }


static inline SimdVec4 vec4_max(SimdVec4 a, SimdVec4 b) { return (SimdVec4){.v = simd_max(a.v, b.v)}; }


static inline SimdVec4 vec4_abs(SimdVec4 v) { return (SimdVec4){.v = simd_abs(v.v)}; }


static inline float vec4_sum(SimdVec4 v) { return simd_hadd(v.v); }


#ifdef __cplusplus

}

#endif


#endif  // __VEC_SIMD_H__

simd.h
Portable Single Instruction Multiple Data (SIMD) Intrinsics Wrapper.

Vec2
2D vector storage type (8 bytes, unaligned).
Definition vec.h:52

Vec2::y
float y
Y component.
Definition vec.h:54

Vec2::x
float x
X component.
Definition vec.h:53

Vec3
3D vector storage type (12 bytes, unaligned).
Definition vec.h:63

Vec3::x
float x
X component.
Definition vec.h:64

Vec3::z
float z
Z component.
Definition vec.h:66

Vec3::y
float y
Y component.
Definition vec.h:65

Vec4
4D vector storage type (16 bytes, naturally aligned).
Definition vec.h:75

Vec4::y
float y
Y component.
Definition vec.h:77

Vec4::w
float w
W component (also used for homogeneous coordinates)
Definition vec.h:79

Vec4::x
float x
X component.
Definition vec.h:76

Vec4::z
float z
Z component.
Definition vec.h:78
a	First vector
b	Second vector
epsilon	Maximum allowed difference per component (e.g., 1e-6f)