diff --git a/packages/vecmath/src/colors/Color.zig b/packages/vecmath/src/colors/Color.zig index dc1f0d7..d407bda 100644 --- a/packages/vecmath/src/colors/Color.zig +++ b/packages/vecmath/src/colors/Color.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Color = extern struct { r: u8, @@ -33,26 +33,26 @@ pub const Color = extern struct { pub inline fn l(comptime literal: []const u8) Color { if (literal.len == 4 and literal[0] == '#') { // #RGB - return .initOpaque( + return initOpaque( (std.fmt.parseUnsigned(u8, literal[1..2], 16) catch unreachable) * 0x11, (std.fmt.parseUnsigned(u8, literal[2..3], 16) catch unreachable) * 0x11, (std.fmt.parseUnsigned(u8, literal[3..4], 16) catch unreachable) * 0x11, ); } else if (literal.len == 5 and literal[0] == '#') { // #RGBA - return .init( + return init( (std.fmt.parseUnsigned(u8, literal[1..2], 16) catch unreachable) * 0x11, (std.fmt.parseUnsigned(u8, literal[2..3], 16) catch unreachable) * 0x11, (std.fmt.parseUnsigned(u8, literal[3..4], 16) catch unreachable) * 0x11, (std.fmt.parseUnsigned(u8, literal[4..5], 16) catch unreachable) * 0x11, ); } else if (literal.len == 7 and literal[0] == '#') { // #RRGGBB - return .initOpaque( + return initOpaque( (std.fmt.parseUnsigned(u8, literal[1..3], 16) catch unreachable), (std.fmt.parseUnsigned(u8, literal[3..5], 16) catch unreachable), (std.fmt.parseUnsigned(u8, literal[5..7], 16) catch unreachable), ); } else if (literal.len == 9 and literal[0] == '#') { // #RRGGBBAA - return .init( + return init( (std.fmt.parseUnsigned(u8, literal[1..3], 16) catch unreachable), (std.fmt.parseUnsigned(u8, literal[3..5], 16) catch unreachable), (std.fmt.parseUnsigned(u8, literal[5..7], 16) catch unreachable), diff --git a/packages/vecmath/src/matrices/Matrix3x2.zig b/packages/vecmath/src/matrices/Matrix3x2.zig index f69fa54..7cd6048 100644 --- a/packages/vecmath/src/matrices/Matrix3x2.zig +++ b/packages/vecmath/src/matrices/Matrix3x2.zig @@ -1,82 +1,290 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Matrix3x2 = extern struct { - ix: f32, - iy: f32, - jx: f32, - jy: f32, - tx: f32, - ty: f32, + // zig fmt: off + ix: f32, iy: f32, + jx: f32, jy: f32, + tx: f32, ty: f32, + // zig fmt: on pub const Array = [6]f32; - pub const identity = init(1, 0, 0, 1, 0, 0); + pub const identity = init( + // zig fmt: off + 1, 0, + 0, 1, + 0, 0, + // zig fmt: on + ); - // --- INIT --- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(ix: f32, iy: f32, jx: f32, jy: f32, tx: f32, ty: f32) Matrix3x2 { - return .{ .ix = ix, .iy = iy, .jx = jx, .jy = jy, .tx = tx, .ty = ty }; + pub inline fn init( + // zig fmt: off + ix: f32, iy: f32, + jx: f32, jy: f32, + tx: f32, ty: f32, + // zig fmt: on + ) Matrix3x2 { + return .{ + // zig fmt: off + .ix = ix, .iy = iy, + .jx = jx, .jy = jy, + .tx = tx, .ty = ty, + // zig fmt: on + }; } - pub inline fn initTranslation(t: Vector2) Matrix3x2 { - return .{ .ix = 1, .iy = 0, .jx = 0, .jy = 1, .tx = t.x, .ty = t.y }; + pub inline fn initVersors(i: vm.Vector2, j: vm.Vector2, t: vm.Vector2) Matrix3x2 { + return .{ + // zig fmt: off + .ix = i.x, .iy = i.y, + .jx = j.x, .jy = j.y, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; } - pub inline fn initRotation(angle_turns: f32) Matrix3x2 { - const c, const s = cossin(angle_turns).asArray(); - return .{ .ix = c, .iy = s, .jx = -s, .jy = c, .tx = 0, .ty = 0 }; + pub inline fn initTranslation(t: vm.Vector2) Matrix3x2 { + return .{ + // zig fmt: off + .ix = 1, .iy = 0, + .jx = 0, .jy = 1, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; } - pub inline fn initScale(s: Vector2) Matrix3x2 { - return .{ .ix = s.x, .iy = 0, .jx = 0, .jy = s.y, .tx = 0, .ty = 0 }; + pub inline fn initRotation(c: vm.Complex) Matrix3x2 { + return .{ + // zig fmt: off + .ix = c.re, .iy = c.im, + .jx = -c.im, .jy = c.re, + .tx = 0, .ty = 0, + // zig fmt: on + }; + } + + pub inline fn initScale(s: vm.Vector2) Matrix3x2 { + return .{ + // zig fmt: off + .ix = s.x, .iy = 0, + .jx = 0, .jy = s.y, + .tx = 0, .ty = 0, + // zig fmt: on + }; + } + + pub inline fn initTranslationRotation(t: vm.Vector2, c: vm.Complex) Matrix3x2 { + return .{ + // zig fmt: off + .ix = c.re, .iy = c.im, + .jx = -c.im, .jy = c.re, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; + } + + pub inline fn initTranslationScale(t: vm.Vector2, s: vm.Vector2) Matrix3x2 { + return .{ + // zig fmt: off + .ix = s.x, .iy = 0, + .jx = 0, .jy = s.y, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationScale(t: vm.Vector2, c: vm.Complex, s: vm.Vector2) Matrix3x2 { + return .{ + // zig fmt: off + .ix = s.x * c.re, .iy = s.x * c.im, + .jx = s.y * -c.im, .jy = s.y * c.re, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; } pub inline fn initArray(array: Array) Matrix3x2 { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Matrix3x2) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Matrix3x2) *Array { - return @ptrCast(self); + // --- ACCESSORS ----------------------------------------------------------- + + pub inline fn getIVersor(self: Matrix3x2) vm.Vector2 { + return .{ .x = self.ix, .y = self.iy }; } - pub inline fn asArrayConstPtr(self: *const Matrix3x2) *const Array { - return @ptrCast(self); + pub inline fn getJVersor(self: Matrix3x2) vm.Vector2 { + return .{ .x = self.jx, .y = self.jy }; } - // --- TRANSFORM --- + pub inline fn getTranslationVector(self: Matrix3x2) vm.Vector2 { + return .{ .x = self.tx, .y = self.ty }; + } - pub inline fn transformPoint(self: Matrix3x2, p: Vector2) Vector2 { + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Matrix3x2, other: Matrix3x2) Matrix3x2 { + return .{ + .ix = self.ix + other.ix, + .iy = self.iy + other.iy, + .jx = self.jx + other.jx, + .jy = self.jy + other.jy, + .tx = self.tx + other.tx, + .ty = self.ty + other.ty, + }; + } + + pub inline fn sub(self: Matrix3x2, other: Matrix3x2) Matrix3x2 { + return .{ + .ix = self.ix - other.ix, + .iy = self.iy - other.iy, + .jx = self.jx - other.jx, + .jy = self.jy - other.jy, + .tx = self.tx - other.tx, + .ty = self.ty - other.ty, + }; + } + + pub inline fn mulScalar(self: Matrix3x2, scalar: f32) Matrix3x2 { + return .{ + .ix = self.ix * scalar, + .iy = self.iy * scalar, + .jx = self.jx * scalar, + .jy = self.jy * scalar, + .tx = self.tx * scalar, + .ty = self.ty * scalar, + }; + } + + pub inline fn divScalar(self: Matrix3x2, scalar: f32) Matrix3x2 { + return .{ + .ix = self.ix / scalar, + .iy = self.iy / scalar, + .jx = self.jx / scalar, + .jy = self.jy / scalar, + .tx = self.tx / scalar, + .ty = self.ty / scalar, + }; + } + + // --- TRANSFORM ----------------------------------------------------------- + + // TODO Move to vm.Vector2 + pub inline fn transformPoint(self: Matrix3x2, p: vm.Vector2) vm.Vector2 { return .{ .x = p.x * self.ix + p.y * self.jx + self.tx, .y = p.x * self.iy + p.y * self.jy + self.ty, }; } - pub inline fn transformPoint_x8(self: Matrix3x2, p: Vector2x8) Vector2x8 { + // TODO Move to vm.Vector2x8 + pub inline fn transformPoint_x8(self: Matrix3x2, p: vm.Vector2x8) vm.Vector2x8 { return .{ - .x = p.x * ps(self.ix) + p.y * ps(self.jx) + ps(self.tx), - .y = p.x * ps(self.iy) + p.y * ps(self.jy) + ps(self.ty), + .x = p.x * vm.ps(self.ix) + p.y * vm.ps(self.jx) + vm.ps(self.tx), + .y = p.x * vm.ps(self.iy) + p.y * vm.ps(self.jy) + vm.ps(self.ty), }; } - pub inline fn transformVector(self: Matrix3x2, v: Vector2) Vector2 { + // TODO Move to vm.Vector2 + pub inline fn transformVector(self: Matrix3x2, v: vm.Vector2) vm.Vector2 { return .{ .x = v.x * self.ix + v.y * self.jx, .y = v.x * self.iy + v.y * self.jy, }; } - pub inline fn transformVector_x8(self: Matrix3x2, v: Vector2x8) Vector2x8 { + // TODO Move to vm.Vector2x8 + pub inline fn transformVector_x8(self: Matrix3x2, v: vm.Vector2x8) vm.Vector2x8 { return .{ - .x = v.x * ps(self.ix) + v.y * ps(self.jx), - .y = v.x * ps(self.iy) + v.y * ps(self.jy), + .x = v.x * vm.ps(self.ix) + v.y * vm.ps(self.jx), + .y = v.x * vm.ps(self.iy) + v.y * vm.ps(self.jy), + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + pub inline fn mulMatrix(self: Matrix3x2, other: Matrix3x2) Matrix3x2 { + return .{ + .ix = other.ix * self.ix + other.iy * self.jx, + .iy = other.ix * self.iy + other.iy * self.jy, + .jx = other.jx * self.ix + other.jy * self.jx, + .jy = other.jx * self.iy + other.jy * self.jy, + .tx = other.tx * self.ix + other.ty * self.jx + self.tx, + .ty = other.tx * self.iy + other.ty * self.jy + self.ty, + }; + } + + pub inline fn mulMatrix_x8(self: Matrix3x2, other: vm.Matrix3x2x8) vm.Matrix3x2 { + return .{ + .ix = other.ix * vm.ps(self.ix) + other.iy * vm.ps(self.jx), + .iy = other.ix * vm.ps(self.iy) + other.iy * vm.ps(self.jy), + .jx = other.jx * vm.ps(self.ix) + other.jy * vm.ps(self.jx), + .jy = other.jx * vm.ps(self.iy) + other.jy * vm.ps(self.jy), + .tx = other.tx * vm.ps(self.ix) + other.ty * vm.ps(self.jx) + vm.ps(self.tx), + .ty = other.tx * vm.ps(self.iy) + other.ty * vm.ps(self.jy) + vm.ps(self.ty), + }; + } + + // INVERSION DERIVATION + // + // Imagine matrix extended to 3×3 like so: + // + // ⎡ix jx tx⎤ + // A = ⎢iy jy ty⎥ + // ⎣ 0 0 1⎦ + // + // Then: + // + // det A = ix · jy − jx · iy + // + // ⎡ ⎡jy ty⎤ ⎡iy ty⎤ ⎡iy jy⎤⎤T + // ⎢ det ⎣ 0 1⎦ −det ⎣ 0 1⎦ det ⎣ 0 0⎦⎥ + // 1 ⎢ ⎡jx tx⎤ ⎡ix tx⎤ ⎡ix jx⎤⎥ 1 ⎡ jy −jx j×t⎤ + // inv A = ⎯⎯⎯⎯⎯ ⎢−det ⎣ 0 1⎦ det ⎣ 0 1⎦ −det ⎣ 0 0⎦⎥ = ⎯⎯⎯⎯⎯ ⎢−iy ix −i×t⎥ + // det A ⎢ ⎡jx tx⎤ ⎡ix tx⎤ ⎡ix jx⎤⎥ det A ⎣ 0 0 det A⎦ + // ⎣ det ⎣jy ty⎦ −det ⎣iy ty⎦ det ⎣iy jy⎦⎦ + // + // After multiplying by 1 / (det A), the third row becomes [0 0 1]. + // When A is orthonormal, we can assume det A = 1. + + pub inline fn inverseOrthonormal(self: Matrix3x2) Matrix3x2 { + std.debug.assert(self.ix == self.jy and self.iy == -self.jx); + const ix = self.ix; + const iy = self.jx; + const jx = self.iy; + const jy = self.jy; + return .{ + .ix = ix, + .iy = iy, + .jx = jx, + .jy = jy, + .tx = -(self.tx * ix + self.ty * jx), + .ty = -(self.tx * iy + self.ty * jy), + }; + } + + pub inline fn inverseAffine(self: Matrix3x2) Matrix3x2 { + const inv_det = 1.0 / (self.ix * self.jy - self.jx * self.iy); + const ix = self.jy; + const iy = -self.iy; + const jx = -self.jx; + const jy = self.ix; + return .{ + .ix = inv_det * ix, + .iy = inv_det * iy, + .jx = inv_det * jx, + .jy = inv_det * jy, + .tx = -inv_det * (self.tx * ix + self.ty * jx), + .ty = -inv_det * (self.tx * iy + self.ty * jy), }; } }; diff --git a/packages/vecmath/src/matrices/Matrix3x2x8.zig b/packages/vecmath/src/matrices/Matrix3x2x8.zig index 6cb3c45..4bd189d 100644 --- a/packages/vecmath/src/matrices/Matrix3x2x8.zig +++ b/packages/vecmath/src/matrices/Matrix3x2x8.zig @@ -1,2 +1,443 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); + +pub const Matrix3x2x8 = struct { + // zig fmt: off + ix: vm.f32x8, iy: vm.f32x8, + jx: vm.f32x8, jy: vm.f32x8, + tx: vm.f32x8, ty: vm.f32x8, + // zig fmt: on + + pub const identity = initSingle( + // zig fmt: off + 1, 0, + 0, 1, + 0, 0, + // zig fmt: on + ); + + // --- INIT ---------------------------------------------------------------- + + pub inline fn init( + // zig fmt: off + ix: vm.f32x8, iy: vm.f32x8, + jx: vm.f32x8, jy: vm.f32x8, + tx: vm.f32x8, ty: vm.f32x8, + // zig fmt: on + ) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = ix, .iy = iy, + .jx = jx, .jy = jy, + .tx = tx, .ty = ty, + // zig fmt: on + }; + } + + pub inline fn initSingle( + // zig fmt: off + ix: f32, iy: f32, + jx: f32, jy: f32, + tx: f32, ty: f32, + // zig fmt: on + ) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(ix), .iy = vm.ps(iy), + .jx = vm.ps(jx), .jy = vm.ps(jy), + .tx = vm.ps(tx), .ty = vm.ps(ty), + // zig fmt: on + }; + } + + pub inline fn initVersors(i: vm.Vector2x8, j: vm.Vector2x8, t: vm.Vector2x8) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = i.x, .iy = i.y, + .jx = j.x, .jy = j.y, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; + } + + pub inline fn initVersorsSingle(i: vm.Vector2, j: vm.Vector2, t: vm.Vector2) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(i.x), .iy = vm.ps(i.y), + .jx = vm.ps(j.x), .jy = vm.ps(j.y), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), + // zig fmt: on + }; + } + + pub inline fn initTranslation(t: vm.Vector2x8) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(1), .iy = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(1), + .tx = t.x, .ty = t.y, + // zig fmt: on + }; + } + + pub inline fn initTranslationSingle(t: vm.Vector2) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(1), .iy = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(1), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), + // zig fmt: on + }; + } + + pub inline fn initRotation(c: vm.Complex_x8) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = c.re, .iy = c.im, + .jx = -c.im, .jy = c.re, + .tx = vm.ps(0), .ty = vm.ps(0), + // zig fmt: on + }; + } + + pub inline fn initRotationSingle(c: vm.Complex) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps( c.re), .iy = vm.ps(c.im), + .jx = vm.ps(-c.im), .jy = vm.ps(c.re), + .tx = vm.ps(0), .ty = vm.ps(0), + // zig fmt: on + }; + } + + pub inline fn initScale(s: vm.Vector2x8) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = s.x, .iy = vm.ps(0), + .jx = vm.ps(0), .jy = s.y, + .tx = vm.ps(0), .ty = vm.ps(0), + // zig fmt: on + }; + } + + pub inline fn initScaleSingle(s: vm.Vector2) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(s.x), .iy = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(s.y), + .tx = vm.ps(0), .ty = vm.ps(0), + // zig fmt: on + }; + } + + pub inline fn initTranslationRotation(t: vm.Vector2x8, c: vm.Complex_x8) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = c.re, .iy = c.im, + .jx = -c.im, .jy = c.re, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationSingle(t: vm.Vector2, c: vm.Complex) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps( c.re), .iy = vm.ps(c.im), + .jx = vm.ps(-c.im), .jy = vm.ps(c.re), + .tx = vm.ps( t.x), .ty = vm.ps(t.y), + // zig fmt: on + }; + } + + pub inline fn initTranslationScale(t: vm.Vector2x8, s: vm.Vector2x8) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = s.x, .iy = vm.ps(0), + .jx = vm.ps(0), .jy = s.y, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; + } + + pub inline fn initTranslationScaleSingle(t: vm.Vector2, s: vm.Vector2) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(s.x), .iy = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(s.y), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationScale(t: vm.Vector2x8, c: vm.Complex_x8, s: vm.Vector2x8) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = s.x * c.re, .iy = s.x * c.im, + .jx = s.y * -c.im, .jy = s.y * c.re, + .tx = t.x, .ty = t.y, + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationScaleSingle(t: vm.Vector2, c: vm.Complex, s: vm.Vector2) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(s.x * c.re), .iy = vm.ps(s.x * c.im), + .jx = vm.ps(s.y * -c.im), .jy = vm.ps(s.y * c.re), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), + // zig fmt: on + }; + } + + pub inline fn initArrayOfMatrices(matrices: [8]vm.Matrix3x2) Matrix3x2x8 { + const vector: @Vector(48, f32) = @as([48]f32, @bitCast(matrices)); + return .{ + .ix = @shuffle(f32, vector, undefined, [_]i32{ 0, 6, 12, 18, 24, 30, 36, 42 }), + .iy = @shuffle(f32, vector, undefined, [_]i32{ 1, 7, 13, 19, 25, 31, 37, 43 }), + .jx = @shuffle(f32, vector, undefined, [_]i32{ 2, 8, 14, 20, 26, 32, 38, 44 }), + .jy = @shuffle(f32, vector, undefined, [_]i32{ 3, 9, 15, 21, 27, 33, 39, 45 }), + .tx = @shuffle(f32, vector, undefined, [_]i32{ 4, 10, 16, 22, 28, 34, 40, 46 }), + .ty = @shuffle(f32, vector, undefined, [_]i32{ 5, 11, 17, 23, 29, 35, 41, 47 }), + }; + } + + pub inline fn splat(matrix: vm.Matrix3x2) Matrix3x2x8 { + return .{ + // zig fmt: off + .ix = vm.ps(matrix.ix), .iy = vm.ps(matrix.iy), + .jx = vm.ps(matrix.jx), .jy = vm.ps(matrix.jy), + .tx = vm.ps(matrix.tx), .ty = vm.ps(matrix.ty), + // zig fmt: on + }; + } + + // --- CONVERSION ---------------------------------------------------------- + + pub inline fn asArrayOfMatrices(self: Matrix3x2x8) [8]vm.Matrix3x2 { + const vector: @Vector(48, f32) = self.ix ++ self.iy ++ self.jx ++ self.jy ++ self.tx ++ self.ty; + return @bitCast(@as([48]f32, @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, 16, 24, 32, 40, + 1, 9, 17, 25, 33, 41, + 2, 10, 18, 26, 34, 42, + 3, 11, 19, 27, 35, 43, + 4, 12, 20, 28, 36, 44, + 5, 13, 21, 29, 37, 45, + 6, 14, 22, 30, 38, 46, + 7, 15, 23, 31, 39, 47, + }))); + } + + pub inline fn unpack(self: Matrix3x2x8) [6]vm.f32x8 { + return .{ self.ix, self.iy, self.jx, self.jy, self.tx, self.ty }; + } + + // --- LOAD AND STORE ------------------------------------------------------ + + pub inline fn loadArrayOfMatrices(self: *Matrix3x2x8, array: *const [8]vm.Matrix3x2) void { + const vector: @Vector(48, f32) = @as(*const [48]f32, @ptrCast(array)).*; + self.ix = @shuffle(f32, vector, undefined, [_]i32{ 0, 6, 12, 18, 24, 30, 36, 42 }); + self.iy = @shuffle(f32, vector, undefined, [_]i32{ 1, 7, 13, 19, 25, 31, 37, 43 }); + self.jx = @shuffle(f32, vector, undefined, [_]i32{ 2, 8, 14, 20, 26, 32, 38, 44 }); + self.jy = @shuffle(f32, vector, undefined, [_]i32{ 3, 9, 15, 21, 27, 33, 39, 45 }); + self.tx = @shuffle(f32, vector, undefined, [_]i32{ 4, 10, 16, 22, 28, 34, 40, 46 }); + self.ty = @shuffle(f32, vector, undefined, [_]i32{ 5, 11, 17, 23, 29, 35, 41, 47 }); + } + + pub inline fn storeArrayOfMatrices(self: *const Matrix3x2x8, array: *[8]vm.Matrix3x2) void { + const vector: @Vector(48, f32) = self.ix ++ self.iy ++ self.jx ++ self.jy ++ self.tx ++ self.ty; + @as(*[48]f32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, 16, 24, 32, 40, + 1, 9, 17, 25, 33, 41, + 2, 10, 18, 26, 34, 42, + 3, 11, 19, 27, 35, 43, + 4, 12, 20, 28, 36, 44, + 5, 13, 21, 29, 37, 45, + 6, 14, 22, 30, 38, 46, + 7, 15, 23, 31, 39, 47, + }); + } + + // --- ACCESSORS ----------------------------------------------------------- + + pub inline fn getIVersor(self: Matrix3x2x8) vm.Vector2x8 { + return .{ .x = self.ix, .y = self.iy }; + } + + pub inline fn getJVersor(self: Matrix3x2x8) vm.Vector2x8 { + return .{ .x = self.jx, .y = self.jy }; + } + + pub inline fn getTranslationVector(self: Matrix3x2x8) vm.Vector2x8 { + return .{ .x = self.tx, .y = self.ty }; + } + + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Matrix3x2x8, other: Matrix3x2x8) Matrix3x2x8 { + return .{ + .ix = self.ix + other.ix, + .iy = self.iy + other.iy, + .jx = self.jx + other.jx, + .jy = self.jy + other.jy, + .tx = self.tx + other.tx, + .ty = self.ty + other.ty, + }; + } + + pub inline fn sub(self: Matrix3x2x8, other: Matrix3x2x8) Matrix3x2x8 { + return .{ + .ix = self.ix - other.ix, + .iy = self.iy - other.iy, + .jx = self.jx - other.jx, + .jy = self.jy - other.jy, + .tx = self.tx - other.tx, + .ty = self.ty - other.ty, + }; + } + + pub inline fn mulScalar(self: Matrix3x2x8, scalar: vm.f32x8) Matrix3x2x8 { + return .{ + .ix = self.ix * scalar, + .iy = self.iy * scalar, + .jx = self.jx * scalar, + .jy = self.jy * scalar, + .tx = self.tx * scalar, + .ty = self.ty * scalar, + }; + } + + pub inline fn mulScalarSingle(self: Matrix3x2x8, scalar: f32) Matrix3x2x8 { + return .{ + .ix = self.ix * vm.ps(scalar), + .iy = self.iy * vm.ps(scalar), + .jx = self.jx * vm.ps(scalar), + .jy = self.jy * vm.ps(scalar), + .tx = self.tx * vm.ps(scalar), + .ty = self.ty * vm.ps(scalar), + }; + } + + pub inline fn divScalar(self: Matrix3x2x8, scalar: f32) Matrix3x2x8 { + return .{ + .ix = self.ix / scalar, + .iy = self.iy / scalar, + .jx = self.jx / scalar, + .jy = self.jy / scalar, + .tx = self.tx / scalar, + .ty = self.ty / scalar, + }; + } + + pub inline fn divScalarSingle(self: Matrix3x2x8, scalar: f32) Matrix3x2x8 { + return .{ + .ix = self.ix / vm.ps(scalar), + .iy = self.iy / vm.ps(scalar), + .jx = self.jx / vm.ps(scalar), + .jy = self.jy / vm.ps(scalar), + .tx = self.tx / vm.ps(scalar), + .ty = self.ty / vm.ps(scalar), + }; + } + + // --- TRANSFORM ----------------------------------------------------------- + + // TODO Move to vm.Vector2x8 + pub inline fn transformPoint(self: Matrix3x2x8, p: vm.Vector2x8) vm.Vector2x8 { + return .{ + .x = p.x * self.ix + p.y * self.jx + self.tx, + .y = p.x * self.iy + p.y * self.jy + self.ty, + }; + } + + // TODO Move to vm.Vector2x8 + pub inline fn transformPointSingle(self: Matrix3x2x8, p: vm.Vector2) vm.Vector2x8 { + return .{ + .x = vm.ps(p.x) * self.ix + vm.ps(p.y) * self.jx + self.tx, + .y = vm.ps(p.x) * self.iy + vm.ps(p.y) * self.jy + self.ty, + }; + } + + // TODO Move to vm.Vector2x8 + pub inline fn transformVector(self: Matrix3x2x8, v: vm.Vector2x8) vm.Vector2x8 { + return .{ + .x = v.x * self.ix + v.y * self.jx, + .y = v.x * self.iy + v.y * self.jy, + }; + } + + // TODO Move to vm.Vector2x8 + pub inline fn transformVectorSingle(self: Matrix3x2x8, v: vm.Vector2) vm.Vector2x8 { + return .{ + .x = vm.ps(v.x) * self.ix + vm.ps(v.y) * self.jx, + .y = vm.ps(v.x) * self.iy + vm.ps(v.y) * self.jy, + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + pub inline fn mulMatrix(self: Matrix3x2x8, other: Matrix3x2x8) Matrix3x2x8 { + return .{ + .ix = other.ix * self.ix + other.iy * self.jx, + .iy = other.ix * self.iy + other.iy * self.jy, + .jx = other.jx * self.ix + other.jy * self.jx, + .jy = other.jx * self.iy + other.jy * self.jy, + .tx = other.tx * self.ix + other.ty * self.jx + self.tx, + .ty = other.tx * self.iy + other.ty * self.jy + self.ty, + }; + } + + pub inline fn mulMatrixSingle(self: Matrix3x2x8, other: vm.Matrix3x2) Matrix3x2x8 { + return .{ + .ix = vm.ps(other.ix) * self.ix + vm.ps(other.iy) * self.jx, + .iy = vm.ps(other.ix) * self.iy + vm.ps(other.iy) * self.jy, + .jx = vm.ps(other.jx) * self.ix + vm.ps(other.jy) * self.jx, + .jy = vm.ps(other.jx) * self.iy + vm.ps(other.jy) * self.jy, + .tx = vm.ps(other.tx) * self.ix + vm.ps(other.ty) * self.jx + self.tx, + .ty = vm.ps(other.tx) * self.iy + vm.ps(other.ty) * self.jy + self.ty, + }; + } + + pub inline fn premulMatrixSingle(self: Matrix3x2x8, other: vm.Matrix3x2) Matrix3x2x8 { + return .{ + .ix = self.ix * vm.ps(other.ix) + self.iy * vm.ps(other.jx), + .iy = self.ix * vm.ps(other.iy) + self.iy * vm.ps(other.jy), + .jx = self.jx * vm.ps(other.ix) + self.jy * vm.ps(other.jx), + .jy = self.jx * vm.ps(other.iy) + self.jy * vm.ps(other.jy), + .tx = self.tx * vm.ps(other.ix) + self.ty * vm.ps(other.jx) + vm.ps(other.tx), + .ty = self.tx * vm.ps(other.iy) + self.ty * vm.ps(other.jy) + vm.ps(other.ty), + }; + } + + pub inline fn inverseOrthonormal(self: Matrix3x2x8) Matrix3x2x8 { + std.debug.assert(@reduce(.And, self.ix == self.jy and self.iy == -self.jx)); + const ix = self.ix; + const iy = self.jx; + const jx = self.iy; + const jy = self.jy; + return .{ + .ix = ix, + .iy = iy, + .jx = jx, + .jy = jy, + .tx = -(self.tx * ix + self.ty * jx), + .ty = -(self.tx * iy + self.ty * jy), + }; + } + + pub inline fn inverseAffine(self: Matrix3x2x8) Matrix3x2x8 { + const inv_det = vm.ps(1.0) / (self.ix * self.jy - self.jx * self.iy); + const ix = self.jy; + const iy = -self.iy; + const jx = -self.jx; + const jy = self.ix; + return .{ + .ix = inv_det * ix, + .iy = inv_det * iy, + .jx = inv_det * jx, + .jy = inv_det * jy, + .tx = -inv_det * (self.tx * ix + self.ty * jx), + .ty = -inv_det * (self.tx * iy + self.ty * jy), + }; + } +}; diff --git a/packages/vecmath/src/matrices/Matrix4x4.zig b/packages/vecmath/src/matrices/Matrix4x4.zig index 19d1518..615ad81 100644 --- a/packages/vecmath/src/matrices/Matrix4x4.zig +++ b/packages/vecmath/src/matrices/Matrix4x4.zig @@ -1,39 +1,68 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Matrix4x4 = extern struct { - ix: f32, - iy: f32, - iz: f32, - iw: f32, - jx: f32, - jy: f32, - jz: f32, - jw: f32, - kx: f32, - ky: f32, - kz: f32, - kw: f32, - tx: f32, - ty: f32, - tz: f32, - tw: f32, + // zig fmt: off + ix: f32, iy: f32, iz: f32, iw: f32, + jx: f32, jy: f32, jz: f32, jw: f32, + kx: f32, ky: f32, kz: f32, kw: f32, + tx: f32, ty: f32, tz: f32, tw: f32, + // zig fmt: on pub const Array = [16]f32; - pub const identity = init(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); + pub const identity = init( + // zig fmt: off + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1, + // zig fmt: on + ); - // --- INIT --- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(ix: f32, iy: f32, iz: f32, iw: f32, jx: f32, jy: f32, jz: f32, jw: f32, kx: f32, ky: f32, kz: f32, kw: f32, tx: f32, ty: f32, tz: f32, tw: f32) Matrix4x4 { - return .{ .ix = ix, .iy = iy, .iz = iz, .iw = iw, .jx = jx, .jy = jy, .jz = jz, .jw = jw, .kx = kx, .ky = ky, .kz = kz, .kw = kw, .tx = tx, .ty = ty, .tz = tz, .tw = tw }; + pub inline fn init( + // zig fmt: off + ix: f32, iy: f32, iz: f32, iw: f32, + jx: f32, jy: f32, jz: f32, jw: f32, + kx: f32, ky: f32, kz: f32, kw: f32, + tx: f32, ty: f32, tz: f32, tw: f32, + // zig fmt: on + ) Matrix4x4 { + return .{ + // zig fmt: off + .ix = ix, .iy = iy, .iz = iz, .iw = iw, + .jx = jx, .jy = jy, .jz = jz, .jw = jw, + .kx = kx, .ky = ky, .kz = kz, .kw = kw, + .tx = tx, .ty = ty, .tz = tz, .tw = tw, + // zig fmt: on + }; } - pub inline fn initTranslation(t: Vector3) Matrix4x4 { - return .{ .ix = 1, .iy = 0, .iz = 0, .iw = 0, .jx = 0, .jy = 1, .jz = 0, .jw = 0, .kx = 0, .ky = 0, .kz = 1, .kw = 0, .tx = t.x, .ty = t.y, .tz = t.z, .tw = 1 }; + pub inline fn initVersors(i: vm.Vector4, j: vm.Vector4, k: vm.Vector4, t: vm.Vector4) Matrix4x4 { + return .{ + // zig fmt: off + .ix = i.x, .iy = i.y, .iz = i.z, .iw = i.w, + .jx = j.x, .jy = j.y, .jz = j.z, .jw = j.w, + .kx = k.x, .ky = k.y, .kz = k.z, .kw = k.w, + .tx = t.x, .ty = t.y, .tz = t.z, .tw = t.w, + // zig fmt: on + }; } - pub inline fn initRotation(q: Quaternion) Matrix4x4 { + pub inline fn initTranslation(t: vm.Vector3) Matrix4x4 { + return .{ + // zig fmt: off + .ix = 1, .iy = 0, .iz = 0, .iw = 0, + .jx = 0, .jy = 1, .jz = 0, .jw = 0, + .kx = 0, .ky = 0, .kz = 1, .kw = 0, + .tx = t.x, .ty = t.y, .tz = t.z, .tw = 1, + // zig fmt: on + }; + } + + pub inline fn initRotation(q: vm.Quaternion) Matrix4x4 { const xx = q.x * q.x; const xy = q.x * q.y; const xz = q.x * q.z; @@ -44,34 +73,198 @@ pub const Matrix4x4 = extern struct { const zz = q.z * q.z; const zw = q.z * q.w; - return .{ .ix = 1 - 2 * (yy + zz), .jx = 2 * (xy + zw), .kx = 2 * (xz - yw), .tx = 0, .iy = 2 * (xy - zw), .jy = 1 - 2 * (xx + zz), .ky = 2 * (yz + xw), .ty = 0, .iz = 2 * (xz + yw), .jz = 2 * (yz - xw), .kz = 1 - 2 * (xx + yy), .tz = 0, .iw = 0, .jw = 0, .kw = 0, .tw = 1 }; + return .{ + // zig fmt: off + .ix = 1 - 2 * (yy + zz), .iy = 2 * (xy - zw), .iz = 2 * (xz + yw), .iw = 0, + .jx = 2 * (xy + zw), .jy = 1 - 2 * (xx + zz), .jz = 2 * (yz - xw), .jw = 0, + .kx = 2 * (xz - yw), .ky = 2 * (yz + xw), .kz = 1 - 2 * (xx + yy), .kw = 0, + .tx = 0, .ty = 0, .tz = 0, .tw = 1, + // zig fmt: on + }; } - pub inline fn initScale(s: Vector3) Matrix4x4 { - return .{ .ix = s.x, .iy = 0, .iz = 0, .iw = 0, .jx = 0, .jy = s.y, .jz = 0, .jw = 0, .kx = 0, .ky = 0, .kz = s.z, .kw = 0, .tx = 0, .ty = 0, .tz = 0, .tw = 1 }; + pub inline fn initScale(s: vm.Vector3) Matrix4x4 { + return .{ + // zig fmt: off + .ix = s.x, .iy = 0, .iz = 0, .iw = 0, + .jx = 0, .jy = s.y, .jz = 0, .jw = 0, + .kx = 0, .ky = 0, .kz = s.z, .kw = 0, + .tx = 0, .ty = 0, .tz = 0, .tw = 1, + // zig fmt: on + }; + } + + pub inline fn initTranslationRotation(t: vm.Vector3, q: vm.Quaternion) Matrix4x4 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = 1 - 2 * (yy + zz), .iy = 2 * (xy - zw), .iz = 2 * (xz + yw), .iw = 0, + .jx = 2 * (xy + zw), .jy = 1 - 2 * (xx + zz), .jz = 2 * (yz - xw), .jw = 0, + .kx = 2 * (xz - yw), .ky = 2 * (yz + xw), .kz = 1 - 2 * (xx + yy), .kw = 0, + .tx = t.x, .ty = t.y, .tz = t.z, .tw = 1, + // zig fmt: on + }; + } + + pub inline fn initTranslationScale(t: vm.Vector3, s: vm.Vector3) Matrix4x4 { + return .{ + // zig fmt: off + .ix = s.x, .iy = 0, .iz = 0, .iw = 0, + .jx = 0, .jy = s.y, .jz = 0, .jw = 0, + .kx = 0, .ky = 0, .kz = s.z, .kw = 0, + .tx = t.x, .ty = t.y, .tz = t.z, .tw = 1, + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationScale(t: vm.Vector3, q: vm.Quaternion, s: vm.Vector3) Matrix4x4 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = s.x * (1 - 2 * (yy + zz)), .iy = s.x * 2 * (xy - zw), .iz = s.x * 2 * (xz + yw), .iw = 0, + .jx = s.y * 2 * (xy + zw), .jy = s.y * (1 - 2 * (xx + zz)), .jz = s.y * 2 * (yz - xw), .jw = 0, + .kx = s.z * 2 * (xz - yw), .ky = s.z * 2 * (yz + xw), .kz = s.z * (1 - 2 * (xx + yy)), .kw = 0, + .tx = t.x, .ty = t.y, .tz = t.z, .tw = 1, + // zig fmt: on + }; } pub inline fn initArray(array: Array) Matrix4x4 { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Matrix4x4) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Matrix4x4) *Array { - return @ptrCast(self); + // --- ACCESSORS ----------------------------------------------------------- + + pub inline fn getIVersor(self: Matrix4x4) vm.Vector4 { + return .{ .x = self.ix, .y = self.iy, .z = self.iz, .w = self.iw }; } - pub inline fn asArrayConstPtr(self: *const Matrix4x4) *const Array { - return @ptrCast(self); + pub inline fn getJVersor(self: Matrix4x4) vm.Vector4 { + return .{ .x = self.jx, .y = self.jy, .z = self.jz, .w = self.jw }; } - // --- TRANSFORM --- + pub inline fn getKVersor(self: Matrix4x4) vm.Vector4 { + return .{ .x = self.kx, .y = self.ky, .z = self.kz, .w = self.kw }; + } - pub inline fn transformPoint(self: Matrix4x4, p: Vector3) Vector3 { + pub inline fn getTranslationVector(self: Matrix4x4) vm.Vector4 { + return .{ .x = self.tx, .y = self.ty, .z = self.tz, .w = self.tw }; + } + + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Matrix4x4, other: Matrix4x4) Matrix4x4 { + return .{ + .ix = self.ix + other.ix, + .iy = self.iy + other.iy, + .iz = self.iz + other.iz, + .iw = self.iw + other.iw, + .jx = self.jx + other.jx, + .jy = self.jy + other.jy, + .jz = self.jz + other.jz, + .jw = self.jw + other.jw, + .kx = self.kx + other.kx, + .ky = self.ky + other.ky, + .kz = self.kz + other.kz, + .kw = self.kw + other.kw, + .tx = self.tx + other.tx, + .ty = self.ty + other.ty, + .tz = self.tz + other.tz, + .tw = self.tw + other.tw, + }; + } + + pub inline fn sub(self: Matrix4x4, other: Matrix4x4) Matrix4x4 { + return .{ + .ix = self.ix - other.ix, + .iy = self.iy - other.iy, + .iz = self.iz - other.iz, + .iw = self.iw - other.iw, + .jx = self.jx - other.jx, + .jy = self.jy - other.jy, + .jz = self.jz - other.jz, + .jw = self.jw - other.jw, + .kx = self.kx - other.kx, + .ky = self.ky - other.ky, + .kz = self.kz - other.kz, + .kw = self.kw - other.kw, + .tx = self.tx - other.tx, + .ty = self.ty - other.ty, + .tz = self.tz - other.tz, + .tw = self.tw - other.tw, + }; + } + + pub inline fn mulScalar(self: Matrix4x4, scalar: f32) Matrix4x4 { + return .{ + .ix = self.ix * scalar, + .iy = self.iy * scalar, + .iz = self.iz * scalar, + .iw = self.iw * scalar, + .jx = self.jx * scalar, + .jy = self.jy * scalar, + .jz = self.jz * scalar, + .jw = self.jw * scalar, + .kx = self.kx * scalar, + .ky = self.ky * scalar, + .kz = self.kz * scalar, + .kw = self.kw * scalar, + .tx = self.tx * scalar, + .ty = self.ty * scalar, + .tz = self.tz * scalar, + .tw = self.tw * scalar, + }; + } + + pub inline fn divScalar(self: Matrix4x4, scalar: f32) Matrix4x4 { + return .{ + .ix = self.ix / scalar, + .iy = self.iy / scalar, + .iz = self.iz / scalar, + .iw = self.iw / scalar, + .jx = self.jx / scalar, + .jy = self.jy / scalar, + .jz = self.jz / scalar, + .jw = self.jw / scalar, + .kx = self.kx / scalar, + .ky = self.ky / scalar, + .kz = self.kz / scalar, + .kw = self.kw / scalar, + .tx = self.tx / scalar, + .ty = self.ty / scalar, + .tz = self.tz / scalar, + .tw = self.tw / scalar, + }; + } + + // --- TRANSFORM ----------------------------------------------------------- + + // TODO Move to vm.Vector3 + pub inline fn transformPoint(self: Matrix4x4, p: vm.Vector3) vm.Vector3 { return .{ .x = p.x * self.ix + p.y * self.jx + p.z * self.kx + self.tx, .y = p.x * self.iy + p.y * self.jy + p.z * self.ky + self.ty, @@ -79,15 +272,17 @@ pub const Matrix4x4 = extern struct { }; } - pub inline fn transformPoint_x8(self: Matrix4x4, p: Vector3x8) Vector3x8 { + // TODO Move to vm.Vector3x8 + pub inline fn transformPoint_x8(self: Matrix4x4, p: vm.Vector3x8) vm.Vector3x8 { return .{ - .x = p.x * ps(self.ix) + p.y * ps(self.jx) + p.z * ps(self.kx) + ps(self.tx), - .y = p.x * ps(self.iy) + p.y * ps(self.jy) + p.z * ps(self.ky) + ps(self.ty), - .z = p.x * ps(self.iz) + p.y * ps(self.jz) + p.z * ps(self.kz) + ps(self.tz), + .x = p.x * vm.ps(self.ix) + p.y * vm.ps(self.jx) + p.z * vm.ps(self.kx) + vm.ps(self.tx), + .y = p.x * vm.ps(self.iy) + p.y * vm.ps(self.jy) + p.z * vm.ps(self.ky) + vm.ps(self.ty), + .z = p.x * vm.ps(self.iz) + p.y * vm.ps(self.jz) + p.z * vm.ps(self.kz) + vm.ps(self.tz), }; } - pub inline fn transformVector(self: Matrix4x4, v: Vector3) Vector3 { + // TODO Move to vm.Vector3 + pub inline fn transformVector(self: Matrix4x4, v: vm.Vector3) vm.Vector3 { return .{ .x = v.x * self.ix + v.y * self.jx + v.z * self.kx, .y = v.x * self.iy + v.y * self.jy + v.z * self.ky, @@ -95,15 +290,17 @@ pub const Matrix4x4 = extern struct { }; } - pub inline fn transformVector_x8(self: Matrix4x4, v: Vector3x8) Vector3x8 { + // TODO Move to vm.Vector3x8 + pub inline fn transformVector_x8(self: Matrix4x4, v: vm.Vector3x8) vm.Vector3x8 { return .{ - .x = v.x * ps(self.ix) + v.y * ps(self.jx) + v.z * ps(self.kx), - .y = v.x * ps(self.iy) + v.y * ps(self.jy) + v.z * ps(self.ky), - .z = v.x * ps(self.iz) + v.y * ps(self.jz) + v.z * ps(self.kz), + .x = v.x * vm.ps(self.ix) + v.y * vm.ps(self.jx) + v.z * vm.ps(self.kx), + .y = v.x * vm.ps(self.iy) + v.y * vm.ps(self.jy) + v.z * vm.ps(self.ky), + .z = v.x * vm.ps(self.iz) + v.y * vm.ps(self.jz) + v.z * vm.ps(self.kz), }; } - pub inline fn transformHomogeneous(self: Matrix4x4, h: Vector4) Vector4 { + // TODO Move to vm.Vector4 + pub inline fn transformHomogeneous(self: Matrix4x4, h: vm.Vector4) vm.Vector4 { return .{ .x = h.x * self.ix + h.y * self.jx + h.z * self.kx + h.w * self.tx, .y = h.x * self.iy + h.y * self.jy + h.z * self.ky + h.w * self.ty, @@ -112,12 +309,293 @@ pub const Matrix4x4 = extern struct { }; } - pub inline fn transformHomogeneous_x8(self: Matrix4x4, h: Vector4x8) Vector4x8 { + // TODO Move to vm.Vector4x8 + pub inline fn transformHomogeneous_x8(self: Matrix4x4, h: vm.Vector4x8) vm.Vector4x8 { return .{ - .x = h.x * ps(self.ix) + h.y * ps(self.jx) + h.z * ps(self.kx) + h.w * ps(self.tx), - .y = h.x * ps(self.iy) + h.y * ps(self.jy) + h.z * ps(self.ky) + h.w * ps(self.ty), - .z = h.x * ps(self.iz) + h.y * ps(self.jz) + h.z * ps(self.kz) + h.w * ps(self.tz), - .w = h.x * ps(self.iw) + h.y * ps(self.jw) + h.z * ps(self.kw) + h.w * ps(self.tw), + .x = h.x * vm.ps(self.ix) + h.y * vm.ps(self.jx) + h.z * vm.ps(self.kx) + h.w * vm.ps(self.tx), + .y = h.x * vm.ps(self.iy) + h.y * vm.ps(self.jy) + h.z * vm.ps(self.ky) + h.w * vm.ps(self.ty), + .z = h.x * vm.ps(self.iz) + h.y * vm.ps(self.jz) + h.z * vm.ps(self.kz) + h.w * vm.ps(self.tz), + .w = h.x * vm.ps(self.iw) + h.y * vm.ps(self.jw) + h.z * vm.ps(self.kw) + h.w * vm.ps(self.tw), + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + /// The caller asserts that W rows of all matrices are equal to [0 0 0 1]. + pub fn mulMatrixAffine(self: Matrix4x4, other: Matrix4x4) Matrix4x4 { + std.debug.assert(self.iw == 0 and self.jw == 0 and self.kw == 0 and self.tw == 1); + std.debug.assert(other.iw == 0 and other.jw == 0 and other.kw == 0 and other.tw == 1); + return .{ + .ix = other.ix * self.ix + other.iy * self.jx + other.iz * self.kx, + .iy = other.ix * self.iy + other.iy * self.jy + other.iz * self.ky, + .iz = other.ix * self.iz + other.iy * self.jz + other.iz * self.kz, + .iw = 0, + .jx = other.jx * self.ix + other.jy * self.jx + other.jz * self.kx, + .jy = other.jx * self.iy + other.jy * self.jy + other.jz * self.ky, + .jz = other.jx * self.iz + other.jy * self.jz + other.jz * self.kz, + .jw = 0, + .kx = other.kx * self.ix + other.ky * self.jx + other.kz * self.kx, + .ky = other.kx * self.iy + other.ky * self.jy + other.kz * self.ky, + .kz = other.kx * self.iz + other.ky * self.jz + other.kz * self.kz, + .kw = 0, + .tx = other.tx * self.tx + other.ty * self.jx + other.tz * self.kx + self.tx, + .ty = other.tx * self.ty + other.ty * self.jy + other.tz * self.ky + self.ty, + .tz = other.tx * self.tz + other.ty * self.jz + other.tz * self.kz + self.tz, + .tw = 1, + }; + } + + // TODO Move to vm.Matrix4x4x8 + /// The caller asserts that W rows of all matrices are equal to [0 0 0 1]. + pub fn mulMatrixAffine_x8(self: Matrix4x4, other: vm.Matrix4x4x8) vm.Matrix4x4x8 { + std.debug.assert(self.iw == 0 and self.jw == 0 and self.kw == 0 and self.tw == 1); + std.debug.assert(@reduce(.And, (other.iw == vm.ps(0)) & (other.jw == vm.ps(0)) & (other.kw == vm.ps(0)) & (other.tw == vm.ps(1)))); + return .{ + .ix = other.ix * vm.ps(self.ix) + other.iy * vm.ps(self.jx) + other.iz * vm.ps(self.kx), + .iy = other.ix * vm.ps(self.iy) + other.iy * vm.ps(self.jy) + other.iz * vm.ps(self.ky), + .iz = other.ix * vm.ps(self.iz) + other.iy * vm.ps(self.jz) + other.iz * vm.ps(self.kz), + .iw = vm.ps(0), + .jx = other.jx * vm.ps(self.ix) + other.jy * vm.ps(self.jx) + other.jz * vm.ps(self.kx), + .jy = other.jx * vm.ps(self.iy) + other.jy * vm.ps(self.jy) + other.jz * vm.ps(self.ky), + .jz = other.jx * vm.ps(self.iz) + other.jy * vm.ps(self.jz) + other.jz * vm.ps(self.kz), + .jw = vm.ps(0), + .kx = other.kx * vm.ps(self.ix) + other.ky * vm.ps(self.jx) + other.kz * vm.ps(self.kx), + .ky = other.kx * vm.ps(self.iy) + other.ky * vm.ps(self.jy) + other.kz * vm.ps(self.ky), + .kz = other.kx * vm.ps(self.iz) + other.ky * vm.ps(self.jz) + other.kz * vm.ps(self.kz), + .kw = vm.ps(0), + .tx = other.tx * vm.ps(self.tx) + other.ty * vm.ps(self.jx) + other.tz * vm.ps(self.kx) + vm.ps(self.tx), + .ty = other.tx * vm.ps(self.ty) + other.ty * vm.ps(self.jy) + other.tz * vm.ps(self.ky) + vm.ps(self.ty), + .tz = other.tx * vm.ps(self.tz) + other.ty * vm.ps(self.jz) + other.tz * vm.ps(self.kz) + vm.ps(self.tz), + .tw = vm.ps(1), + }; + } + + pub fn mulMatrixFull(self: Matrix4x4, other: Matrix4x4) Matrix4x4 { + return .{ + .ix = other.ix * self.ix + other.iy * self.jx + other.iz * self.kx + other.iw * self.tx, + .iy = other.ix * self.iy + other.iy * self.jy + other.iz * self.ky + other.iw * self.ty, + .iz = other.ix * self.iz + other.iy * self.jz + other.iz * self.kz + other.iw * self.tz, + .iw = other.ix * self.iw + other.iy * self.jw + other.iz * self.kw + other.iw * self.tw, + .jx = other.jx * self.ix + other.jy * self.jx + other.jz * self.kx + other.jw * self.tx, + .jy = other.jx * self.iy + other.jy * self.jy + other.jz * self.ky + other.jw * self.ty, + .jz = other.jx * self.iz + other.jy * self.jz + other.jz * self.kz + other.jw * self.tz, + .jw = other.jx * self.iw + other.jy * self.jw + other.jz * self.kw + other.jw * self.tw, + .kx = other.kx * self.ix + other.ky * self.jx + other.kz * self.kx + other.kw * self.tx, + .ky = other.kx * self.iy + other.ky * self.jy + other.kz * self.ky + other.kw * self.ty, + .kz = other.kx * self.iz + other.ky * self.jz + other.kz * self.kz + other.kw * self.tz, + .kw = other.kx * self.iw + other.ky * self.jw + other.kz * self.kw + other.kw * self.tw, + .tx = other.tx * self.ix + other.ty * self.jx + other.tz * self.kx + other.tw * self.tx, + .ty = other.tx * self.iy + other.ty * self.jy + other.tz * self.ky + other.tw * self.ty, + .tz = other.tx * self.iz + other.ty * self.jz + other.tz * self.kz + other.tw * self.tz, + .tw = other.tx * self.iw + other.ty * self.jw + other.tz * self.kw + other.tw * self.tw, + }; + } + + // TODO Move to vm.Matrix4x4x8 + pub fn mulMatrixFull_x8(self: Matrix4x4, other: vm.Matrix4x4x8) vm.Matrix4x4x8 { + return .{ + .ix = other.ix * vm.ps(self.ix) + other.iy * vm.ps(self.jx) + other.iz * vm.ps(self.kx) + other.iw * vm.ps(self.tx), + .iy = other.ix * vm.ps(self.iy) + other.iy * vm.ps(self.jy) + other.iz * vm.ps(self.ky) + other.iw * vm.ps(self.ty), + .iz = other.ix * vm.ps(self.iz) + other.iy * vm.ps(self.jz) + other.iz * vm.ps(self.kz) + other.iw * vm.ps(self.tz), + .iw = other.ix * vm.ps(self.iw) + other.iy * vm.ps(self.jw) + other.iz * vm.ps(self.kw) + other.iw * vm.ps(self.tw), + .jx = other.jx * vm.ps(self.ix) + other.jy * vm.ps(self.jx) + other.jz * vm.ps(self.kx) + other.jw * vm.ps(self.tx), + .jy = other.jx * vm.ps(self.iy) + other.jy * vm.ps(self.jy) + other.jz * vm.ps(self.ky) + other.jw * vm.ps(self.ty), + .jz = other.jx * vm.ps(self.iz) + other.jy * vm.ps(self.jz) + other.jz * vm.ps(self.kz) + other.jw * vm.ps(self.tz), + .jw = other.jx * vm.ps(self.iw) + other.jy * vm.ps(self.jw) + other.jz * vm.ps(self.kw) + other.jw * vm.ps(self.tw), + .kx = other.kx * vm.ps(self.ix) + other.ky * vm.ps(self.jx) + other.kz * vm.ps(self.kx) + other.kw * vm.ps(self.tx), + .ky = other.kx * vm.ps(self.iy) + other.ky * vm.ps(self.jy) + other.kz * vm.ps(self.ky) + other.kw * vm.ps(self.ty), + .kz = other.kx * vm.ps(self.iz) + other.ky * vm.ps(self.jz) + other.kz * vm.ps(self.kz) + other.kw * vm.ps(self.tz), + .kw = other.kx * vm.ps(self.iw) + other.ky * vm.ps(self.jw) + other.kz * vm.ps(self.kw) + other.kw * vm.ps(self.tw), + .tx = other.tx * vm.ps(self.ix) + other.ty * vm.ps(self.jx) + other.tz * vm.ps(self.kx) + other.tw * vm.ps(self.tx), + .ty = other.tx * vm.ps(self.iy) + other.ty * vm.ps(self.jy) + other.tz * vm.ps(self.ky) + other.tw * vm.ps(self.ty), + .tz = other.tx * vm.ps(self.iz) + other.ty * vm.ps(self.jz) + other.tz * vm.ps(self.kz) + other.tw * vm.ps(self.tz), + .tw = other.tx * vm.ps(self.iw) + other.ty * vm.ps(self.jw) + other.tz * vm.ps(self.kw) + other.tw * vm.ps(self.tw), + }; + } + + // INVERSION DERIVATION (affine and orthonormal case) + // + // We assume the matrix looks like so: + // + // ⎡ix jx kx tx⎤ + // ⎢iy jy ky ty⎥ + // A = ⎢iz jz kz tz⎥ + // ⎣ 0 0 0 1⎦ + // + // Then: + // ⎡ix jx kx⎤ + // det A = det ⎢iy jy ky⎥ + // ⎣iz jz kz⎦ + // + // ⎡ ⎡jy ky ty⎤ ⎡iy ky ty⎤ ⎡iy jy ty⎤ ⎡iy jy ky⎤⎤T + // ⎢ det ⎢jz kz tz⎥ −det ⎢iz kz tz⎥ det ⎢iz jz tz⎥ −det ⎢iz jz kz⎥⎥ + // ⎢ ⎣ 0 0 1⎦ ⎣ 0 0 1⎦ ⎣ 0 0 1⎦ ⎣ 0 0 0⎦⎥ + // ⎢ ⎡jx kx tx⎤ ⎡ix kx tx⎤ ⎡ix jx tx⎤ ⎡ix jx kx⎤⎥ + // ⎢−det ⎢jz kz tz⎥ det ⎢iz kz tz⎥ −det ⎢iz jz tz⎥ det ⎢iz jz kz⎥⎥ + // 1 ⎢ ⎣ 0 0 1⎦ ⎣ 0 0 1⎦ ⎣ 0 0 1⎦ ⎣ 0 0 0⎦⎥ + // inv A = ⎯⎯⎯⎯⎯ ⎢ ⎡jx kx tx⎤ ⎡ix kx tx⎤ ⎡ix jx tx⎤ ⎡ix jx kx⎤⎥ + // det A ⎢ det ⎢jy ky ty⎥ −det ⎢iy ky ty⎥ det ⎢iy jy ty⎥ −det ⎢iy jy ky⎥⎥ + // ⎢ ⎣ 0 0 1⎦ ⎣ 0 0 1⎦ ⎣ 0 0 1⎦ ⎣ 0 0 0⎦⎥ + // ⎢ ⎡jx kx tx⎤ ⎡ix kx tx⎤ ⎡ix jx tx⎤ ⎡ix jx kx⎤⎥ + // ⎢−det ⎢jy ky ty⎥ det ⎢iy ky ty⎥ −det ⎢iy jy ty⎥ det ⎢iy jy ky⎥⎥ + // ⎣ ⎣jz kz tz⎦ ⎣iz kz tz⎦ ⎣iz jz tz⎦ ⎣iz jz kz⎦⎦ + // + // ⎡ ⎡kx jx tx⎤⎤ + // ⎢jy · kz − ky · jz kx · jz − jx · kz jx · ky − kx · jy det ⎢ky jy ty⎥⎥ + // ⎢ ⎣kz jz tz⎦⎥ + // ⎢ ⎡ix kx tx⎤⎥ + // 1 ⎢ky · iz − iy · kz ix · kz − kx · iz kx · iy − ix · ky det ⎢iy ky ty⎥⎥ + // inv A = ⎯⎯⎯⎯⎯ ⎢ ⎣iz kz tz⎦⎥ + // det A ⎢ ⎡jx ix tx⎤⎥ + // ⎢iy · jz − jy · iz jx · iz − ix · jz ix · jy − jx · iy det ⎢jy iy ty⎥⎥ + // ⎢ ⎣jz iz tz⎦⎥ + // ⎣ 0 0 0 det A ⎦ + // + // After multiplying by 1 / (det A), the fourth row becomes [0 0 0 1]. + // When A is orthonormal, we can assume det A = 1. + + pub fn inverseOrthonormal(self: Matrix4x4) Matrix4x4 { + std.debug.assert(self.iw == 0 and self.jw == 0 and self.kw == 0 and self.tw == 1); + const ix = self.ix; + const iy = self.jx; + const iz = self.kx; + const jx = self.iy; + const jy = self.jy; + const jz = self.ky; + const kx = self.iz; + const ky = self.jz; + const kz = self.kz; + return .{ + .ix = ix, + .iy = iy, + .iz = iz, + .iw = 0, + .jx = jx, + .jy = jy, + .jz = jz, + .jw = 0, + .kx = kx, + .ky = ky, + .kz = kz, + .kw = 0, + .tx = -(self.tx * ix + self.ty * jx + self.tz * kx), + .ty = -(self.tx * iy + self.ty * jy + self.tz * ky), + .tz = -(self.tx * iz + self.ty * jz + self.tz * kz), + .tw = 1, + }; + } + + pub fn inverseAffine(self: Matrix4x4) Matrix4x4 { + std.debug.assert(self.iw == 0 and self.jw == 0 and self.kw == 0 and self.tw == 1); + const inv_det = 1.0 / ( + // zig fmt: off + self.ix * (self.jy * self.kz - self.ky * self.jz) + + self.jx * (self.ky * self.iz - self.iy * self.kz) + + self.kx * (self.iy * self.jz - self.iy * self.jz) + // zig fmt: on + ); + const ix = self.jy * self.kz - self.ky * self.jz; + const iy = self.ky * self.iz - self.iy * self.kz; + const iz = self.iy * self.jz - self.jy * self.iz; + const jx = self.kx * self.jz - self.jx * self.kz; + const jy = self.ix * self.kz - self.kx * self.iz; + const jz = self.jx * self.iz - self.ix * self.jz; + const kx = self.jx * self.ky - self.kx * self.jy; + const ky = self.kx * self.iy - self.ix * self.ky; + const kz = self.ix * self.jy - self.jx * self.iy; + return .{ + .ix = inv_det * ix, + .iy = inv_det * iy, + .iz = inv_det * iz, + .iw = 0, + .jx = inv_det * jx, + .jy = inv_det * jy, + .jz = inv_det * jz, + .jw = 0, + .kx = inv_det * kx, + .ky = inv_det * ky, + .kz = inv_det * kz, + .kw = 0, + .tx = -inv_det * (self.tx * ix + self.ty * jx + self.tz * kx), + .ty = -inv_det * (self.tx * iy + self.ty * jy + self.tz * ky), + .tz = -inv_det * (self.tx * iz + self.ty * jz + self.tz * kz), + .tw = 1, + }; + } + + // DETERMINANT DERIVATION (full case) + // + // ⎡ix jx kx tx⎤ + // ⎢iy jy ky ty⎥ + // det A = det ⎢iz jz kz tz⎥ + // ⎣iw jw kw tw⎦ + // + // ⎡jy ky ty⎤ ⎡iy ky ty⎤ ⎡iy jy ty⎤ ⎡iy jy ky⎤ + // det A = ix · det ⎢jz kz tz⎥ − jx · det ⎢iz kz tz⎥ + kx · det ⎢iz jz tz⎥ − tx · det ⎢iz jz kz⎥ + // ⎣jw kw tw⎦ ⎣iw kw tw⎦ ⎣iw jw tw⎦ ⎣iw jw kw⎦ + // + // ⎛ ⎡kz tz⎤ ⎡jz tz⎤ ⎡jz kz⎤⎞ + // det A = ix · ⎝jy · det ⎣kw tw⎦ − ky · det ⎣jw tw⎦ + ty · det ⎣jw kw⎦⎠ + + // + // ⎛ ⎡kz tz⎤ ⎡iz tz⎤ ⎡iz kz⎤⎞ + // − jx · ⎝iy · det ⎣kw tw⎦ − ky · det ⎣iw tw⎦ + ty · det ⎣iw kw⎦⎠ + + // + // ⎛ ⎡jz tz⎤ ⎡iz tz⎤ ⎡iz jz⎤⎞ + // + kx · ⎝iy · det ⎣jw tw⎦ − jy · det ⎣iw tw⎦ + ty · det ⎣iw jw⎦⎠ + + // + // ⎛ ⎡jz kz⎤ ⎡iz kz⎤ ⎡iz jz⎤⎞ + // − tx · ⎝iy · det ⎣jw kw⎦ − jy · det ⎣iw kw⎦ + ky · det ⎣iw jw⎦⎠ + + pub fn inverseFull(self: Matrix4x4) Matrix4x4 { + const iy_jw = self.iy * self.jw - self.jy * self.iw; + const iy_jz = self.iy * self.jz - self.jy * self.iz; + const iy_kz = self.iy * self.kz - self.ky * self.iz; + const iy_kw = self.iy * self.kw - self.ky * self.iw; + const iy_tz = self.iy * self.tz - self.ty * self.iz; + const iy_tw = self.iy * self.tw - self.ty * self.iw; + const iz_jw = self.iz * self.jw - self.jz * self.iw; + const iz_kw = self.iz * self.kw - self.kz * self.iw; + const iz_tw = self.iz * self.tw - self.tz * self.iw; + const jy_kz = self.jy * self.kz - self.ky * self.jz; + const jy_kw = self.jy * self.kw - self.ky * self.jw; + const jy_tw = self.jy * self.tw - self.ty * self.jw; + const jy_tz = self.jy * self.tz - self.ty * self.jz; + const jz_kw = self.jz * self.kw - self.kz * self.jw; + const jz_tw = self.jz * self.tw - self.tz * self.jw; + const ky_tz = self.ky * self.tz - self.ty * self.kz; + const ky_tw = self.ky * self.tw - self.ty * self.kw; + const kz_tw = self.kz * self.tw - self.tz * self.kw; + + const det_ix = self.jy * kz_tw - self.ky * jz_tw + self.ty * jz_kw; + const det_jx = self.iy * kz_tw - self.ky * iz_tw + self.ty * iz_kw; + const det_kx = self.iy * jz_tw - self.jy * iz_tw + self.ty * iz_jw; + const det_tx = self.iy * jz_kw - self.jy * iz_kw + self.ky * iz_jw; + + const det_iy = self.jx * kz_tw - self.kx * jz_tw + self.tx * jz_kw; + const det_jy = self.ix * kz_tw - self.kx * iz_tw + self.tx * iz_kw; + const det_ky = self.ix * jz_tw - self.jz * iz_tw + self.tx * iz_jw; + const det_ty = self.ix * jz_kw - self.jx * iz_kw + self.kx * iz_jw; + + const det_iz = self.jx * ky_tw - self.kx * jy_tw + self.tx * jy_kw; + const det_jz = self.ix * ky_tw - self.kx * iy_tw + self.tx * iy_kw; + const det_kz = self.ix * jy_tw - self.jx * iy_tw + self.tx * iy_jw; + const det_tz = self.ix * jy_kw - self.jx * iy_kw + self.kx * iy_jw; + + const det_iw = self.jx * ky_tz - self.kx * jy_tz + self.tx * jy_kz; + const det_jw = self.ix * ky_tz - self.kx * iy_tz + self.tx * iy_kz; + const det_kw = self.ix * jy_tz - self.jx * iy_tz + self.tx * iy_jz; + const det_tw = self.ix * jy_kz - self.jx * iy_kz + self.kx * iy_jz; + + const det = self.ix * det_ix - self.jx * det_jx + self.kx * det_kx - self.tx * det_tx; + const inv_det = 1.0 / det; + + return .{ + // zig fmt: off + .ix = inv_det * det_ix, .iy = -inv_det * det_jx, .iz = inv_det * det_kx, .iw = -inv_det * det_tx, + .jx = -inv_det * det_iy, .jy = inv_det * det_jy, .jz = -inv_det * det_ky, .jw = inv_det * det_ty, + .kx = inv_det * det_iz, .ky = -inv_det * det_jz, .kz = inv_det * det_kz, .kw = -inv_det * det_tz, + .tx = -inv_det * det_iw, .ty = inv_det * det_jw, .tz = -inv_det * det_kw, .tw = inv_det * det_tw, + // zig fmt: on }; } }; diff --git a/packages/vecmath/src/matrices/Matrix4x4x8.zig b/packages/vecmath/src/matrices/Matrix4x4x8.zig index 6cb3c45..95c74f7 100644 --- a/packages/vecmath/src/matrices/Matrix4x4x8.zig +++ b/packages/vecmath/src/matrices/Matrix4x4x8.zig @@ -1,2 +1,779 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); + +pub const Matrix4x4x8 = extern struct { + // zig fmt: off + ix: vm.f32x8, iy: vm.f32x8, iz: vm.f32x8, iw: vm.f32x8, + jx: vm.f32x8, jy: vm.f32x8, jz: vm.f32x8, jw: vm.f32x8, + kx: vm.f32x8, ky: vm.f32x8, kz: vm.f32x8, kw: vm.f32x8, + tx: vm.f32x8, ty: vm.f32x8, tz: vm.f32x8, tw: vm.f32x8, + // zig fmt: on + + pub const identity = initSingle( + // zig fmt: off + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1, + // zig fmt: on + ); + + // --- INIT ---------------------------------------------------------------- + + pub inline fn init( + // zig fmt: off + ix: vm.f32x8, iy: vm.f32x8, iz: vm.f32x8, iw: vm.f32x8, + jx: vm.f32x8, jy: vm.f32x8, jz: vm.f32x8, jw: vm.f32x8, + kx: vm.f32x8, ky: vm.f32x8, kz: vm.f32x8, kw: vm.f32x8, + tx: vm.f32x8, ty: vm.f32x8, tz: vm.f32x8, tw: vm.f32x8, + // zig fmt: on + ) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = ix, .iy = iy, .iz = iz, .iw = iw, + .jx = jx, .jy = jy, .jz = jz, .jw = jw, + .kx = kx, .ky = ky, .kz = kz, .kw = kw, + .tx = tx, .ty = ty, .tz = tz, .tw = tw, + // zig fmt: on + }; + } + + pub inline fn initSingle( + // zig fmt: off + ix: f32, iy: f32, iz: f32, iw: f32, + jx: f32, jy: f32, jz: f32, jw: f32, + kx: f32, ky: f32, kz: f32, kw: f32, + tx: f32, ty: f32, tz: f32, tw: f32, + // zig fmt: on + ) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = vm.ps(ix), .iy = vm.ps(iy), .iz = vm.ps(iz), .iw = vm.ps(iw), + .jx = vm.ps(jx), .jy = vm.ps(jy), .jz = vm.ps(jz), .jw = vm.ps(jw), + .kx = vm.ps(kx), .ky = vm.ps(ky), .kz = vm.ps(kz), .kw = vm.ps(kw), + .tx = vm.ps(tx), .ty = vm.ps(ty), .tz = vm.ps(tz), .tw = vm.ps(tw), + // zig fmt: on + }; + } + + pub inline fn initVersors(i: vm.Vector4x8, j: vm.Vector4x8, k: vm.Vector4x8, t: vm.Vector4x8) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = i.x, .iy = i.y, .iz = i.z, .iw = i.w, + .jx = j.x, .jy = j.y, .jz = j.z, .jw = j.w, + .kx = k.x, .ky = k.y, .kz = k.z, .kw = k.w, + .tx = t.x, .ty = t.y, .tz = t.z, .tw = t.w, + // zig fmt: on + }; + } + + pub inline fn initVersorsSingle(i: vm.Vector4, j: vm.Vector4, k: vm.Vector4, t: vm.Vector4) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = vm.ps(i.x), .iy = vm.ps(i.y), .iz = vm.ps(i.z), .iw = vm.ps(i.w), + .jx = vm.ps(j.x), .jy = vm.ps(j.y), .jz = vm.ps(j.z), .jw = vm.ps(j.w), + .kx = vm.ps(k.x), .ky = vm.ps(k.y), .kz = vm.ps(k.z), .kw = vm.ps(k.w), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), .tz = vm.ps(t.z), .tw = vm.ps(t.w), + // zig fmt: on + }; + } + + pub inline fn initTranslation(t: vm.Vector3x8) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = vm.ps(1), .iy = vm.ps(0), .iz = vm.ps(0), .iw = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(1), .jz = vm.ps(0), .jw = vm.ps(0), + .kx = vm.ps(0), .ky = vm.ps(0), .kz = vm.ps(1), .kw = vm.ps(0), + .tx = t.x, .ty = t.y, .tz = t.z, .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initTranslationSingle(t: vm.Vector3) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = vm.ps(1), .iy = vm.ps(0), .iz = vm.ps(0), .iw = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(1), .jz = vm.ps(0), .jw = vm.ps(0), + .kx = vm.ps(0), .ky = vm.ps(0), .kz = vm.ps(1), .kw = vm.ps(0), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), .tz = vm.ps(t.z), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initRotation(q: vm.Quaternion_x8) Matrix4x4x8 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = vm.ps(1) - vm.ps(2) * (yy + zz), .iy = vm.ps(2) * (xy - zw), .iz = vm.ps(2) * (xz + yw), .iw = vm.ps(0), + .jx = vm.ps(2) * (xy + zw), .jy = vm.ps(1) - vm.ps(2) * (xx + zz), .jz = vm.ps(2) * (yz - xw), .jw = vm.ps(0), + .kx = vm.ps(2) * (xz - yw), .ky = vm.ps(2) * (yz + xw), .kz = vm.ps(1) - vm.ps(2) * (xx + yy), .kw = vm.ps(0), + .tx = vm.ps(0), .ty = vm.ps(0), .tz = vm.ps(0), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initRotationSingle(q: vm.Quaternion) Matrix4x4x8 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = vm.ps(1 - 2 * (yy + zz)), .iy = vm.ps(2 * (xy - zw)), .iz = vm.ps(2 * (xz + yw)), .iw = vm.ps(0), + .jx = vm.ps(2 * (xy + zw)), .jy = vm.ps(1 - 2 * (xx + zz)), .jz = vm.ps(2 * (yz - xw)), .jw = vm.ps(0), + .kx = vm.ps(2 * (xz - yw)), .ky = vm.ps(2 * (yz + xw)), .kz = vm.ps(1 - 2 * (xx + yy)), .kw = vm.ps(0), + .tx = vm.ps(0), .ty = vm.ps(0), .tz = vm.ps(0), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initScale(s: vm.Vector3x8) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = s.x, .iy = vm.ps(0), .iz = vm.ps(0), .iw = vm.ps(0), + .jx = vm.ps(0), .jy = s.y, .jz = vm.ps(0), .jw = vm.ps(0), + .kx = vm.ps(0), .ky = vm.ps(0), .kz = s.z, .kw = vm.ps(0), + .tx = vm.ps(0), .ty = vm.ps(0), .tz = vm.ps(0), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initScaleSingle(s: vm.Vector3) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = vm.ps(s.x), .iy = vm.ps(0), .iz = vm.ps(0), .iw = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(s.y), .jz = vm.ps(0), .jw = vm.ps(0), + .kx = vm.ps(0), .ky = vm.ps(0), .kz = vm.ps(s.z), .kw = vm.ps(0), + .tx = vm.ps(0), .ty = vm.ps(0), .tz = vm.ps(0), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initTranslationRotation(t: vm.Vector3x8, q: vm.Quaternion_x8) Matrix4x4x8 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = vm.ps(1) - vm.ps(2) * (yy + zz), .iy = vm.ps(2) * (xy - zw), .iz = vm.ps(2) * (xz + yw), .iw = vm.ps(0), + .jx = vm.ps(2) * (xy + zw), .jy = vm.ps(1) - vm.ps(2) * (xx + zz), .jz = vm.ps(2) * (yz - xw), .jw = vm.ps(0), + .kx = vm.ps(2) * (xz - yw), .ky = vm.ps(2) * (yz + xw), .kz = vm.ps(1) - vm.ps(2) * (xx + yy), .kw = vm.ps(0), + .tx = t.x, .ty = t.y, .tz = t.z, .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationSingle(t: vm.Vector3, q: vm.Quaternion) Matrix4x4x8 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = vm.ps(1 - 2 * (yy + zz)), .iy = vm.ps(2 * (xy - zw)), .iz = vm.ps(2 * (xz + yw)), .iw = vm.ps(0), + .jx = vm.ps(2 * (xy + zw)), .jy = vm.ps(1 - 2 * (xx + zz)), .jz = vm.ps(2 * (yz - xw)), .jw = vm.ps(0), + .kx = vm.ps(2 * (xz - yw)), .ky = vm.ps(2 * (yz + xw)), .kz = vm.ps(1 - 2 * (xx + yy)), .kw = vm.ps(0), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), .tz = vm.ps(t.z), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initTranslationScale(t: vm.Vector3x8, s: vm.Vector3x8) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = s.x, .iy = vm.ps(0), .iz = vm.ps(0), .iw = vm.ps(0), + .jx = vm.ps(0), .jy = s.y, .jz = vm.ps(0), .jw = vm.ps(0), + .kx = vm.ps(0), .ky = vm.ps(0), .kz = s.z, .kw = vm.ps(0), + .tx = t.x, .ty = t.y, .tz = t.z, .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initTranslationScaleSingle(t: vm.Vector3, s: vm.Vector3) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = vm.ps(s.x), .iy = vm.ps(0), .iz = vm.ps(0), .iw = vm.ps(0), + .jx = vm.ps(0), .jy = vm.ps(s.y), .jz = vm.ps(0), .jw = vm.ps(0), + .kx = vm.ps(0), .ky = vm.ps(0), .kz = vm.ps(s.z), .kw = vm.ps(0), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), .tz = vm.ps(t.z), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationScale(t: vm.Vector3x8, q: vm.Quaternion_x8, s: vm.Vector3x8) Matrix4x4x8 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = s.x * (vm.ps(1) - vm.ps(2) * (yy + zz)), .iy = s.x * vm.ps(2) * (xy - zw), .iz = s.x * vm.ps(2) * (xz + yw), .iw = vm.ps(0), + .jx = s.y * vm.ps(2) * (xy + zw), .jy = s.y * (vm.ps(1) - vm.ps(2) * (xx + zz)), .jz = s.y * vm.ps(2) * (yz - xw), .jw = vm.ps(0), + .kx = s.z * vm.ps(2) * (xz - yw), .ky = s.z * vm.ps(2) * (yz + xw), .kz = s.z * (vm.ps(1) - vm.ps(2) * (xx + yy)), .kw = vm.ps(0), + .tx = t.x, .ty = t.y, .tz = t.z, .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initTranslationRotationScaleSingle(t: vm.Vector3, q: vm.Quaternion, s: vm.Vector3) Matrix4x4x8 { + const xx = q.x * q.x; + const xy = q.x * q.y; + const xz = q.x * q.z; + const xw = q.x * q.w; + const yy = q.y * q.y; + const yz = q.y * q.z; + const yw = q.y * q.w; + const zz = q.z * q.z; + const zw = q.z * q.w; + + return .{ + // zig fmt: off + .ix = vm.ps(s.x * (1 - 2 * (yy + zz))), .iy = vm.ps(s.x * 2 * (xy - zw)), .iz = vm.ps(s.x * 2 * (xz + yw)), .iw = vm.ps(0), + .jx = vm.ps(s.y * 2 * (xy + zw)), .jy = vm.ps(s.y * (1 - 2 * (xx + zz))), .jz = vm.ps(s.y * 2 * (yz - xw)), .jw = vm.ps(0), + .kx = vm.ps(s.z * 2 * (xz - yw)), .ky = vm.ps(s.z * 2 * (yz + xw)), .kz = vm.ps(s.z * (1 - 2 * (xx + yy))), .kw = vm.ps(0), + .tx = vm.ps(t.x), .ty = vm.ps(t.y), .tz = vm.ps(t.z), .tw = vm.ps(1), + // zig fmt: on + }; + } + + pub inline fn initArrayOfMatrices(matrices: [8]vm.Matrix4x4) Matrix4x4x8 { + const vector: @Vector(128, f32) = @as([128]f32, @bitCast(matrices)); + return .{ + .ix = @shuffle(f32, vector, undefined, [_]i32{ 0, 16, 32, 48, 64, 80, 96, 112 }), + .iy = @shuffle(f32, vector, undefined, [_]i32{ 1, 17, 33, 49, 65, 81, 97, 113 }), + .iz = @shuffle(f32, vector, undefined, [_]i32{ 2, 18, 34, 50, 66, 82, 98, 114 }), + .iw = @shuffle(f32, vector, undefined, [_]i32{ 3, 19, 35, 51, 67, 83, 99, 115 }), + .jx = @shuffle(f32, vector, undefined, [_]i32{ 4, 20, 36, 52, 68, 84, 100, 116 }), + .jy = @shuffle(f32, vector, undefined, [_]i32{ 5, 21, 37, 53, 69, 85, 101, 117 }), + .jz = @shuffle(f32, vector, undefined, [_]i32{ 6, 22, 38, 54, 70, 86, 102, 118 }), + .jw = @shuffle(f32, vector, undefined, [_]i32{ 7, 23, 39, 55, 71, 87, 103, 119 }), + .kx = @shuffle(f32, vector, undefined, [_]i32{ 8, 24, 40, 56, 72, 88, 104, 120 }), + .ky = @shuffle(f32, vector, undefined, [_]i32{ 9, 25, 41, 57, 73, 89, 105, 121 }), + .kz = @shuffle(f32, vector, undefined, [_]i32{ 10, 26, 42, 58, 74, 90, 106, 122 }), + .kw = @shuffle(f32, vector, undefined, [_]i32{ 11, 27, 43, 59, 75, 91, 107, 123 }), + .tx = @shuffle(f32, vector, undefined, [_]i32{ 12, 28, 44, 60, 76, 92, 108, 124 }), + .ty = @shuffle(f32, vector, undefined, [_]i32{ 13, 29, 45, 61, 77, 93, 109, 125 }), + .tz = @shuffle(f32, vector, undefined, [_]i32{ 14, 30, 46, 62, 78, 94, 110, 126 }), + .tw = @shuffle(f32, vector, undefined, [_]i32{ 15, 31, 47, 63, 79, 95, 111, 127 }), + }; + } + + pub inline fn splat(matrix: vm.Matrix4x4) Matrix4x4x8 { + return .{ + // zig fmt: off + .ix = vm.ps(matrix.ix), .iy = vm.ps(matrix.iy), .iz = vm.ps(matrix.iz), .iw = vm.ps(matrix.iw), + .jx = vm.ps(matrix.jx), .jy = vm.ps(matrix.jy), .jz = vm.ps(matrix.jz), .jw = vm.ps(matrix.jw), + .kx = vm.ps(matrix.kx), .ky = vm.ps(matrix.ky), .kz = vm.ps(matrix.kz), .kw = vm.ps(matrix.kw), + .tx = vm.ps(matrix.tx), .ty = vm.ps(matrix.ty), .tz = vm.ps(matrix.tz), .tw = vm.ps(matrix.tw), + // zig fmt: on + }; + } + + // --- CONVERSION ---------------------------------------------------------- + + pub inline fn asArrayOfMatrices(self: Matrix4x4x8) [8]vm.Matrix4x4 { + const vector: @Vector(128, f32) = self.ix ++ self.iy ++ self.iz ++ self.iw ++ self.jx ++ self.jy ++ self.jz ++ self.jw ++ self.kx ++ self.ky ++ self.kz ++ self.kw ++ self.tx ++ self.ty ++ self.tz ++ self.tw; + return @bitCast(@as([128]f32, @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, + 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121, + 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122, + 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123, + 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, + 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, + 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, + 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127, + }))); + } + + pub inline fn unpack(self: Matrix4x4x8) [16]vm.f32x8 { + return .{ self.ix, self.iy, self.iz, self.iw, self.jx, self.jy, self.jz, self.jw, self.kx, self.ky, self.kz, self.kw, self.tx, self.ty, self.tz, self.tw }; + } + + // --- LOAD AND STORE ------------------------------------------------------ + + pub inline fn loadArrayOfMatrices(self: *Matrix4x4x8, array: *const [8]vm.Matrix4x4) void { + const vector: @Vector(128, f32) = @as(*const [128]f32, @ptrCast(array)).*; + self.ix = @shuffle(f32, vector, undefined, [_]i32{ 0, 16, 32, 48, 64, 80, 96, 112 }); + self.iy = @shuffle(f32, vector, undefined, [_]i32{ 1, 17, 33, 49, 65, 81, 97, 113 }); + self.iz = @shuffle(f32, vector, undefined, [_]i32{ 2, 18, 34, 50, 66, 82, 98, 114 }); + self.iw = @shuffle(f32, vector, undefined, [_]i32{ 3, 19, 35, 51, 67, 83, 99, 115 }); + self.jx = @shuffle(f32, vector, undefined, [_]i32{ 4, 20, 36, 52, 68, 84, 100, 116 }); + self.jy = @shuffle(f32, vector, undefined, [_]i32{ 5, 21, 37, 53, 69, 85, 101, 117 }); + self.jz = @shuffle(f32, vector, undefined, [_]i32{ 6, 22, 38, 54, 70, 86, 102, 118 }); + self.jw = @shuffle(f32, vector, undefined, [_]i32{ 7, 23, 39, 55, 71, 87, 103, 119 }); + self.kx = @shuffle(f32, vector, undefined, [_]i32{ 8, 24, 40, 56, 72, 88, 104, 120 }); + self.ky = @shuffle(f32, vector, undefined, [_]i32{ 9, 25, 41, 57, 73, 89, 105, 121 }); + self.kz = @shuffle(f32, vector, undefined, [_]i32{ 10, 26, 42, 58, 74, 90, 106, 122 }); + self.kw = @shuffle(f32, vector, undefined, [_]i32{ 11, 27, 43, 59, 75, 91, 107, 123 }); + self.tx = @shuffle(f32, vector, undefined, [_]i32{ 12, 28, 44, 60, 76, 92, 108, 124 }); + self.ty = @shuffle(f32, vector, undefined, [_]i32{ 13, 29, 45, 61, 77, 93, 109, 125 }); + self.tz = @shuffle(f32, vector, undefined, [_]i32{ 14, 30, 46, 62, 78, 94, 110, 126 }); + self.tw = @shuffle(f32, vector, undefined, [_]i32{ 15, 31, 47, 63, 79, 95, 111, 127 }); + } + + pub inline fn storeArrayOfMatrices(self: *const Matrix4x4x8, array: *[8]vm.Matrix4x4) void { + const vector: @Vector(128, f32) = self.ix ++ self.iy ++ self.iz ++ self.iw ++ self.jx ++ self.jy ++ self.jz ++ self.jw ++ self.kx ++ self.ky ++ self.kz ++ self.kw ++ self.tx ++ self.ty ++ self.tz ++ self.tw; + @as(*[128]f32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, + 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121, + 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122, + 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123, + 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, + 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, + 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, + 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127, + }); + } + + // --- ACCESSORS ----------------------------------------------------------- + + pub inline fn getIVersor(self: Matrix4x4x8) vm.Vector4 { + return .{ .x = self.ix, .y = self.iy, .z = self.iz, .w = self.iw }; + } + + pub inline fn getJVersor(self: Matrix4x4x8) vm.Vector4 { + return .{ .x = self.jx, .y = self.jy, .z = self.jz, .w = self.jw }; + } + + pub inline fn getKVersor(self: Matrix4x4x8) vm.Vector4 { + return .{ .x = self.kx, .y = self.ky, .z = self.kz, .w = self.kw }; + } + + pub inline fn getTranslationVector(self: Matrix4x4x8) vm.Vector4 { + return .{ .x = self.tx, .y = self.ty, .z = self.tz, .w = self.tw }; + } + + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Matrix4x4x8, other: Matrix4x4x8) Matrix4x4x8 { + return .{ + .ix = self.ix + other.ix, + .iy = self.iy + other.iy, + .iz = self.iz + other.iz, + .iw = self.iw + other.iw, + .jx = self.jx + other.jx, + .jy = self.jy + other.jy, + .jz = self.jz + other.jz, + .jw = self.jw + other.jw, + .kx = self.kx + other.kx, + .ky = self.ky + other.ky, + .kz = self.kz + other.kz, + .kw = self.kw + other.kw, + .tx = self.tx + other.tx, + .ty = self.ty + other.ty, + .tz = self.tz + other.tz, + .tw = self.tw + other.tw, + }; + } + + pub inline fn sub(self: Matrix4x4x8, other: Matrix4x4x8) Matrix4x4x8 { + return .{ + .ix = self.ix - other.ix, + .iy = self.iy - other.iy, + .iz = self.iz - other.iz, + .iw = self.iw - other.iw, + .jx = self.jx - other.jx, + .jy = self.jy - other.jy, + .jz = self.jz - other.jz, + .jw = self.jw - other.jw, + .kx = self.kx - other.kx, + .ky = self.ky - other.ky, + .kz = self.kz - other.kz, + .kw = self.kw - other.kw, + .tx = self.tx - other.tx, + .ty = self.ty - other.ty, + .tz = self.tz - other.tz, + .tw = self.tw - other.tw, + }; + } + + pub inline fn mulScalar(self: Matrix4x4x8, scalar: vm.f32x8) Matrix4x4x8 { + return .{ + .ix = self.ix * scalar, + .iy = self.iy * scalar, + .iz = self.iz * scalar, + .iw = self.iw * scalar, + .jx = self.jx * scalar, + .jy = self.jy * scalar, + .jz = self.jz * scalar, + .jw = self.jw * scalar, + .kx = self.kx * scalar, + .ky = self.ky * scalar, + .kz = self.kz * scalar, + .kw = self.kw * scalar, + .tx = self.tx * scalar, + .ty = self.ty * scalar, + .tz = self.tz * scalar, + .tw = self.tw * scalar, + }; + } + + pub inline fn mulScalarSingle(self: Matrix4x4x8, scalar: f32) Matrix4x4x8 { + return .{ + .ix = self.ix * vm.ps(scalar), + .iy = self.iy * vm.ps(scalar), + .iz = self.iz * vm.ps(scalar), + .iw = self.iw * vm.ps(scalar), + .jx = self.jx * vm.ps(scalar), + .jy = self.jy * vm.ps(scalar), + .jz = self.jz * vm.ps(scalar), + .jw = self.jw * vm.ps(scalar), + .kx = self.kx * vm.ps(scalar), + .ky = self.ky * vm.ps(scalar), + .kz = self.kz * vm.ps(scalar), + .kw = self.kw * vm.ps(scalar), + .tx = self.tx * vm.ps(scalar), + .ty = self.ty * vm.ps(scalar), + .tz = self.tz * vm.ps(scalar), + .tw = self.tw * vm.ps(scalar), + }; + } + + pub inline fn divScalar(self: Matrix4x4x8, scalar: vm.f32x8) Matrix4x4x8 { + return .{ + .ix = self.ix / scalar, + .iy = self.iy / scalar, + .iz = self.iz / scalar, + .iw = self.iw / scalar, + .jx = self.jx / scalar, + .jy = self.jy / scalar, + .jz = self.jz / scalar, + .jw = self.jw / scalar, + .kx = self.kx / scalar, + .ky = self.ky / scalar, + .kz = self.kz / scalar, + .kw = self.kw / scalar, + .tx = self.tx / scalar, + .ty = self.ty / scalar, + .tz = self.tz / scalar, + .tw = self.tw / scalar, + }; + } + + pub inline fn divScalarSingle(self: Matrix4x4x8, scalar: f32) Matrix4x4x8 { + return .{ + .ix = self.ix / vm.ps(scalar), + .iy = self.iy / vm.ps(scalar), + .iz = self.iz / vm.ps(scalar), + .iw = self.iw / vm.ps(scalar), + .jx = self.jx / vm.ps(scalar), + .jy = self.jy / vm.ps(scalar), + .jz = self.jz / vm.ps(scalar), + .jw = self.jw / vm.ps(scalar), + .kx = self.kx / vm.ps(scalar), + .ky = self.ky / vm.ps(scalar), + .kz = self.kz / vm.ps(scalar), + .kw = self.kw / vm.ps(scalar), + .tx = self.tx / vm.ps(scalar), + .ty = self.ty / vm.ps(scalar), + .tz = self.tz / vm.ps(scalar), + .tw = self.tw / vm.ps(scalar), + }; + } + + // --- TRANSFORM ----------------------------------------------------------- + + // TODO Move to vm.Vector3x8 + pub inline fn transformPoint(self: Matrix4x4x8, p: vm.Vector3x8) vm.Vector3x8 { + return .{ + .x = p.x * self.ix + p.y * self.jx + p.z * self.kx + self.tx, + .y = p.x * self.iy + p.y * self.jy + p.z * self.ky + self.ty, + .z = p.x * self.iz + p.y * self.jz + p.z * self.kz + self.tz, + }; + } + + // TODO Move to vm.Vector3x8 + pub inline fn transformPointSingle(self: Matrix4x4x8, p: vm.Vector3) vm.Vector3x8 { + return .{ + .x = vm.ps(p.x) * self.ix + vm.ps(p.y) * self.jx + vm.ps(p.z) * self.kx + self.tx, + .y = vm.ps(p.x) * self.iy + vm.ps(p.y) * self.jy + vm.ps(p.z) * self.ky + self.ty, + .z = vm.ps(p.x) * self.iz + vm.ps(p.y) * self.jz + vm.ps(p.z) * self.kz + self.tz, + }; + } + + // TODO Move to vm.Vector3x8 + pub inline fn transformVector(self: Matrix4x4x8, v: vm.Vector3x8) vm.Vector3x8 { + return .{ + .x = v.x * self.ix + v.y * self.jx + v.z * self.kx, + .y = v.x * self.iy + v.y * self.jy + v.z * self.ky, + .z = v.x * self.iz + v.y * self.jz + v.z * self.kz, + }; + } + + // TODO Move to vm.Vector3x8 + pub inline fn transformVectorSingle(self: Matrix4x4x8, v: vm.Vector3) vm.Vector3x8 { + return .{ + .x = vm.ps(v.x) * self.ix + vm.ps(v.y) * self.jx + vm.ps(v.z) * self.kx, + .y = vm.ps(v.x) * self.iy + vm.ps(v.y) * self.jy + vm.ps(v.z) * self.ky, + .z = vm.ps(v.x) * self.iz + vm.ps(v.y) * self.jz + vm.ps(v.z) * self.kz, + }; + } + + // TODO Move to vm.Vector4x8 + pub inline fn transformHomogeneous(self: Matrix4x4x8, h: vm.Vector4x8) vm.Vector4x8 { + return .{ + .x = h.x * self.ix + h.y * self.jx + h.z * self.kx + h.w * self.tx, + .y = h.x * self.iy + h.y * self.jy + h.z * self.ky + h.w * self.ty, + .z = h.x * self.iz + h.y * self.jz + h.z * self.kz + h.w * self.tz, + .w = h.x * self.iw + h.y * self.jw + h.z * self.kw + h.w * self.tw, + }; + } + + // TODO Move to vm.Vector4x8 + pub inline fn transformHomogeneousSingle(self: Matrix4x4x8, h: vm.Vector4) vm.Vector4x8 { + return .{ + .x = vm.ps(h.x) * self.ix + vm.ps(h.y) * self.jx + vm.ps(h.z) * self.kx + vm.ps(h.w) * self.tx, + .y = vm.ps(h.x) * self.iy + vm.ps(h.y) * self.jy + vm.ps(h.z) * self.ky + vm.ps(h.w) * self.ty, + .z = vm.ps(h.x) * self.iz + vm.ps(h.y) * self.jz + vm.ps(h.z) * self.kz + vm.ps(h.w) * self.tz, + .w = vm.ps(h.x) * self.iw + vm.ps(h.y) * self.jw + vm.ps(h.z) * self.kw + vm.ps(h.w) * self.tw, + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + /// The caller asserts that W rows of all matrices are equal to [0 0 0 1]. + pub fn mulMatrixAffine(self: Matrix4x4x8, other: Matrix4x4x8) Matrix4x4x8 { + std.debug.assert(@reduce(.And, (self.iw == vm.ps(0)) & (self.jw == vm.ps(0)) & (self.kw == vm.ps(0)) & (self.tw == vm.ps(1)))); + std.debug.assert(@reduce(.And, (other.iw == vm.ps(0)) & (other.jw == vm.ps(0)) & (other.kw == vm.ps(0)) & (other.tw == vm.ps(1)))); + return .{ + .ix = other.ix * self.ix + other.iy * self.jx + other.iz * self.kx, + .iy = other.ix * self.iy + other.iy * self.jy + other.iz * self.ky, + .iz = other.ix * self.iz + other.iy * self.jz + other.iz * self.kz, + .iw = vm.ps(0), + .jx = other.jx * self.ix + other.jy * self.jx + other.jz * self.kx, + .jy = other.jx * self.iy + other.jy * self.jy + other.jz * self.ky, + .jz = other.jx * self.iz + other.jy * self.jz + other.jz * self.kz, + .jw = vm.ps(0), + .kx = other.kx * self.ix + other.ky * self.jx + other.kz * self.kx, + .ky = other.kx * self.iy + other.ky * self.jy + other.kz * self.ky, + .kz = other.kx * self.iz + other.ky * self.jz + other.kz * self.kz, + .kw = vm.ps(0), + .tx = other.tx * self.tx + other.ty * self.jx + other.tz * self.kx + self.tx, + .ty = other.tx * self.ty + other.ty * self.jy + other.tz * self.ky + self.ty, + .tz = other.tx * self.tz + other.ty * self.jz + other.tz * self.kz + self.tz, + .tw = vm.ps(1), + }; + } + + /// The caller asserts that W rows of all matrices are equal to [0 0 0 1]. + pub fn mulMatrixAffineSingle(self: Matrix4x4x8, other: vm.Matrix4x4) vm.Matrix4x4x8 { + std.debug.assert(@reduce(.And, (self.iw == vm.ps(0)) & (self.jw == vm.ps(0)) & (self.kw == vm.ps(0)) & (self.tw == vm.ps(1)))); + std.debug.assert(other.iw == 0 and other.jw == 0 and other.kw == 0 and other.tw == 1); + return .{ + .ix = vm.ps(other.ix) * self.ix + vm.ps(other.iy) * self.jx + vm.ps(other.iz) * self.kx, + .iy = vm.ps(other.ix) * self.iy + vm.ps(other.iy) * self.jy + vm.ps(other.iz) * self.ky, + .iz = vm.ps(other.ix) * self.iz + vm.ps(other.iy) * self.jz + vm.ps(other.iz) * self.kz, + .iw = vm.ps(0), + .jx = vm.ps(other.jx) * self.ix + vm.ps(other.jy) * self.jx + vm.ps(other.jz) * self.kx, + .jy = vm.ps(other.jx) * self.iy + vm.ps(other.jy) * self.jy + vm.ps(other.jz) * self.ky, + .jz = vm.ps(other.jx) * self.iz + vm.ps(other.jy) * self.jz + vm.ps(other.jz) * self.kz, + .jw = vm.ps(0), + .kx = vm.ps(other.kx) * self.ix + vm.ps(other.ky) * self.jx + vm.ps(other.kz) * self.kx, + .ky = vm.ps(other.kx) * self.iy + vm.ps(other.ky) * self.jy + vm.ps(other.kz) * self.ky, + .kz = vm.ps(other.kx) * self.iz + vm.ps(other.ky) * self.jz + vm.ps(other.kz) * self.kz, + .kw = vm.ps(0), + .tx = vm.ps(other.tx) * self.tx + vm.ps(other.ty) * self.jx + vm.ps(other.tz) * self.kx + self.tx, + .ty = vm.ps(other.tx) * self.ty + vm.ps(other.ty) * self.jy + vm.ps(other.tz) * self.ky + self.ty, + .tz = vm.ps(other.tx) * self.tz + vm.ps(other.ty) * self.jz + vm.ps(other.tz) * self.kz + self.tz, + .tw = vm.ps(1), + }; + } + + pub fn mulMatrixFull(self: Matrix4x4x8, other: Matrix4x4x8) Matrix4x4x8 { + return .{ + .ix = other.ix * self.ix + other.iy * self.jx + other.iz * self.kx + other.iw * self.tx, + .iy = other.ix * self.iy + other.iy * self.jy + other.iz * self.ky + other.iw * self.ty, + .iz = other.ix * self.iz + other.iy * self.jz + other.iz * self.kz + other.iw * self.tz, + .iw = other.ix * self.iw + other.iy * self.jw + other.iz * self.kw + other.iw * self.tw, + .jx = other.jx * self.ix + other.jy * self.jx + other.jz * self.kx + other.jw * self.tx, + .jy = other.jx * self.iy + other.jy * self.jy + other.jz * self.ky + other.jw * self.ty, + .jz = other.jx * self.iz + other.jy * self.jz + other.jz * self.kz + other.jw * self.tz, + .jw = other.jx * self.iw + other.jy * self.jw + other.jz * self.kw + other.jw * self.tw, + .kx = other.kx * self.ix + other.ky * self.jx + other.kz * self.kx + other.kw * self.tx, + .ky = other.kx * self.iy + other.ky * self.jy + other.kz * self.ky + other.kw * self.ty, + .kz = other.kx * self.iz + other.ky * self.jz + other.kz * self.kz + other.kw * self.tz, + .kw = other.kx * self.iw + other.ky * self.jw + other.kz * self.kw + other.kw * self.tw, + .tx = other.tx * self.ix + other.ty * self.jx + other.tz * self.kx + other.tw * self.tx, + .ty = other.tx * self.iy + other.ty * self.jy + other.tz * self.ky + other.tw * self.ty, + .tz = other.tx * self.iz + other.ty * self.jz + other.tz * self.kz + other.tw * self.tz, + .tw = other.tx * self.iw + other.ty * self.jw + other.tz * self.kw + other.tw * self.tw, + }; + } + + pub fn mulMatrixFullSingle(self: Matrix4x4x8, other: vm.Matrix4x4) vm.Matrix4x4x8 { + return .{ + .ix = vm.ps(other.ix) * self.ix + vm.ps(other.iy) * self.jx + vm.ps(other.iz) * self.kx + vm.ps(other.iw) * self.tx, + .iy = vm.ps(other.ix) * self.iy + vm.ps(other.iy) * self.jy + vm.ps(other.iz) * self.ky + vm.ps(other.iw) * self.ty, + .iz = vm.ps(other.ix) * self.iz + vm.ps(other.iy) * self.jz + vm.ps(other.iz) * self.kz + vm.ps(other.iw) * self.tz, + .iw = vm.ps(other.ix) * self.iw + vm.ps(other.iy) * self.jw + vm.ps(other.iz) * self.kw + vm.ps(other.iw) * self.tw, + .jx = vm.ps(other.jx) * self.ix + vm.ps(other.jy) * self.jx + vm.ps(other.jz) * self.kx + vm.ps(other.jw) * self.tx, + .jy = vm.ps(other.jx) * self.iy + vm.ps(other.jy) * self.jy + vm.ps(other.jz) * self.ky + vm.ps(other.jw) * self.ty, + .jz = vm.ps(other.jx) * self.iz + vm.ps(other.jy) * self.jz + vm.ps(other.jz) * self.kz + vm.ps(other.jw) * self.tz, + .jw = vm.ps(other.jx) * self.iw + vm.ps(other.jy) * self.jw + vm.ps(other.jz) * self.kw + vm.ps(other.jw) * self.tw, + .kx = vm.ps(other.kx) * self.ix + vm.ps(other.ky) * self.jx + vm.ps(other.kz) * self.kx + vm.ps(other.kw) * self.tx, + .ky = vm.ps(other.kx) * self.iy + vm.ps(other.ky) * self.jy + vm.ps(other.kz) * self.ky + vm.ps(other.kw) * self.ty, + .kz = vm.ps(other.kx) * self.iz + vm.ps(other.ky) * self.jz + vm.ps(other.kz) * self.kz + vm.ps(other.kw) * self.tz, + .kw = vm.ps(other.kx) * self.iw + vm.ps(other.ky) * self.jw + vm.ps(other.kz) * self.kw + vm.ps(other.kw) * self.tw, + .tx = vm.ps(other.tx) * self.ix + vm.ps(other.ty) * self.jx + vm.ps(other.tz) * self.kx + vm.ps(other.tw) * self.tx, + .ty = vm.ps(other.tx) * self.iy + vm.ps(other.ty) * self.jy + vm.ps(other.tz) * self.ky + vm.ps(other.tw) * self.ty, + .tz = vm.ps(other.tx) * self.iz + vm.ps(other.ty) * self.jz + vm.ps(other.tz) * self.kz + vm.ps(other.tw) * self.tz, + .tw = vm.ps(other.tx) * self.iw + vm.ps(other.ty) * self.jw + vm.ps(other.tz) * self.kw + vm.ps(other.tw) * self.tw, + }; + } + + pub fn inverseOrthonormal(self: Matrix4x4x8) Matrix4x4x8 { + std.debug.assert(@reduce(.And, (self.iw == vm.ps(0)) & (self.jw == vm.ps(0)) & (self.kw == vm.ps(0)) & (self.tw == vm.ps(1)))); + const ix = self.ix; + const iy = self.jx; + const iz = self.kx; + const jx = self.iy; + const jy = self.jy; + const jz = self.ky; + const kx = self.iz; + const ky = self.jz; + const kz = self.kz; + return .{ + .ix = ix, + .iy = iy, + .iz = iz, + .iw = vm.ps(0), + .jx = jx, + .jy = jy, + .jz = jz, + .jw = vm.ps(0), + .kx = kx, + .ky = ky, + .kz = kz, + .kw = vm.ps(0), + .tx = -(self.tx * ix + self.ty * jx + self.tz * kx), + .ty = -(self.tx * iy + self.ty * jy + self.tz * ky), + .tz = -(self.tx * iz + self.ty * jz + self.tz * kz), + .tw = vm.ps(1), + }; + } + + pub fn inverseAffine(self: Matrix4x4x8) Matrix4x4x8 { + std.debug.assert(@reduce(.And, (self.iw == vm.ps(0)) & (self.jw == vm.ps(0)) & (self.kw == vm.ps(0)) & (self.tw == vm.ps(1)))); + const inv_det = vm.ps(1.0) / ( + // zig fmt: off + self.ix * (self.jy * self.kz - self.ky * self.jz) + + self.jx * (self.ky * self.iz - self.iy * self.kz) + + self.kx * (self.iy * self.jz - self.iy * self.jz) + // zig fmt: on + ); + const ix = self.jy * self.kz - self.ky * self.jz; + const iy = self.ky * self.iz - self.iy * self.kz; + const iz = self.iy * self.jz - self.jy * self.iz; + const jx = self.kx * self.jz - self.jx * self.kz; + const jy = self.ix * self.kz - self.kx * self.iz; + const jz = self.jx * self.iz - self.ix * self.jz; + const kx = self.jx * self.ky - self.kx * self.jy; + const ky = self.kx * self.iy - self.ix * self.ky; + const kz = self.ix * self.jy - self.jx * self.iy; + return .{ + .ix = inv_det * ix, + .iy = inv_det * iy, + .iz = inv_det * iz, + .iw = vm.ps(0), + .jx = inv_det * jx, + .jy = inv_det * jy, + .jz = inv_det * jz, + .jw = vm.ps(0), + .kx = inv_det * kx, + .ky = inv_det * ky, + .kz = inv_det * kz, + .kw = vm.ps(0), + .tx = -inv_det * (self.tx * ix + self.ty * jx + self.tz * kx), + .ty = -inv_det * (self.tx * iy + self.ty * jy + self.tz * ky), + .tz = -inv_det * (self.tx * iz + self.ty * jz + self.tz * kz), + .tw = vm.ps(1), + }; + } + + pub fn inverseFull(self: Matrix4x4x8) Matrix4x4x8 { + const iy_jz = self.iy * self.jz - self.jy * self.iz; + const iy_jw = self.iy * self.jw - self.jy * self.iw; + const iy_kz = self.iy * self.kz - self.ky * self.iz; + const iy_kw = self.iy * self.kw - self.ky * self.iw; + const iy_tz = self.iy * self.tz - self.ty * self.iz; + const iy_tw = self.iy * self.tw - self.ty * self.iw; + const iz_jw = self.iz * self.jw - self.jz * self.iw; + const iz_kw = self.iz * self.kw - self.kz * self.iw; + const iz_tw = self.iz * self.tw - self.tz * self.iw; + const jy_kz = self.jy * self.kz - self.ky * self.jz; + const jy_kw = self.jy * self.kw - self.ky * self.jw; + const jy_tz = self.jy * self.tz - self.ty * self.jz; + const jy_tw = self.jy * self.tw - self.ty * self.jw; + const jz_kw = self.jz * self.kw - self.kz * self.jw; + const jz_tw = self.jz * self.tw - self.tz * self.jw; + const ky_tz = self.ky * self.tz - self.ty * self.kz; + const ky_tw = self.ky * self.tw - self.ty * self.kw; + const kz_tw = self.kz * self.tw - self.tz * self.kw; + + const det_ix = self.jy * kz_tw - self.ky * jz_tw + self.ty * jz_kw; + const det_jx = self.iy * kz_tw - self.ky * iz_tw + self.ty * iz_kw; + const det_kx = self.iy * jz_tw - self.jy * iz_tw + self.ty * iz_jw; + const det_tx = self.iy * jz_kw - self.jy * iz_kw + self.ky * iz_jw; + + const det_iy = self.jx * kz_tw - self.kx * jz_tw + self.tx * jz_kw; + const det_jy = self.ix * kz_tw - self.kx * iz_tw + self.tx * iz_kw; + const det_ky = self.ix * jz_tw - self.jz * iz_tw + self.tx * iz_jw; + const det_ty = self.ix * jz_kw - self.jx * iz_kw + self.kx * iz_jw; + + const det_iz = self.jx * ky_tw - self.kx * jy_tw + self.tx * jy_kw; + const det_jz = self.ix * ky_tw - self.kx * iy_tw + self.tx * iy_kw; + const det_kz = self.ix * jy_tw - self.jx * iy_tw + self.tx * iy_jw; + const det_tz = self.ix * jy_kw - self.jx * iy_kw + self.kx * iy_jw; + + const det_iw = self.jx * ky_tz - self.kx * jy_tz + self.tx * jy_kz; + const det_jw = self.ix * ky_tz - self.kx * iy_tz + self.tx * iy_kz; + const det_kw = self.ix * jy_tz - self.jx * iy_tz + self.tx * iy_jz; + const det_tw = self.ix * jy_kz - self.jx * iy_kz + self.kx * iy_jz; + + const det = self.ix * det_ix - self.jx * det_jx + self.kx * det_kx - self.tx * det_tx; + const inv_det = vm.ps(1.0) / det; + + return .{ + // zig fmt: off + .ix = inv_det * det_ix, .iy = -inv_det * det_jx, .iz = inv_det * det_kx, .iw = -inv_det * det_tx, + .jx = -inv_det * det_iy, .jy = inv_det * det_jy, .jz = -inv_det * det_ky, .jw = inv_det * det_ty, + .kx = inv_det * det_iz, .ky = -inv_det * det_jz, .kz = inv_det * det_kz, .kw = -inv_det * det_tz, + .tx = -inv_det * det_iw, .ty = inv_det * det_jw, .tz = -inv_det * det_kw, .tw = inv_det * det_tw, + // zig fmt: on + }; + } +}; diff --git a/packages/vecmath/src/root.zig b/packages/vecmath/src/root.zig index 6856ddb..10e49f5 100644 --- a/packages/vecmath/src/root.zig +++ b/packages/vecmath/src/root.zig @@ -59,6 +59,16 @@ pub const epu64 = simd.epu64; const trig = @import("trig.zig"); +pub const rad_per_turn = trig.rad_per_turn; +pub const deg_per_turn = trig.deg_per_turn; +pub const turns_per_rad = trig.turns_per_rad; +pub const turns_per_deg = trig.turns_per_deg; + +pub const turnsToRadians = trig.turnsToRadians; +pub const turnsToDegrees = trig.turnsToDegrees; +pub const radiansToTurns = trig.radiansToTurns; +pub const degreesToTurns = trig.degreesToTurns; + pub const cos = trig.cos; pub const cos_x8 = trig.cos_x8; pub const sin = trig.sin; @@ -68,8 +78,8 @@ pub const cossin_x8 = trig.cossin_x8; // ----------------------------------------------------------------------------- -pub inline fn lerp(a: f32, b: f32, t: f32) f32 { - return @mulAdd(f32, t, b, @mulAdd(f32, -t, a, a)); +pub inline fn lerp(comptime T: type, a: T, b: T, t: T) T { + return @mulAdd(T, t, b, @mulAdd(T, -t, a, a)); } test "refAllDecls" { diff --git a/packages/vecmath/src/rotors/Complex.zig b/packages/vecmath/src/rotors/Complex.zig index 11a1cc6..685e90d 100644 --- a/packages/vecmath/src/rotors/Complex.zig +++ b/packages/vecmath/src/rotors/Complex.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Complex = extern struct { re: f32, @@ -9,20 +9,14 @@ pub const Complex = extern struct { pub const identity = init(1, 0); - pub const @"0" = init(1, 0); - pub const @"90" = init(0, 1); - pub const @"180" = init(-1, 0); - pub const @"270" = init(0, -1); - - // --- INIT --- + // --- INIT ---------------------------------------------------------------- pub inline fn init(re: f32, im: f32) Complex { return .{ .re = re, .im = im }; } pub inline fn initRotation(angle_turns: f32) Complex { - const c, const s = vm.cossin(angle_turns).asArray(); - return .{ .re = c, .im = s }; + return vm.cossin(angle_turns); } pub inline fn initArray(array: Array) Complex { @@ -33,7 +27,7 @@ pub const Complex = extern struct { return @bitCast(vector); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Complex) Array { return @bitCast(self); @@ -42,4 +36,90 @@ pub const Complex = extern struct { pub inline fn asVector2(self: Complex) vm.Vector2 { return @bitCast(self); } + + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Complex, other: Complex) Complex { + return .{ + .re = self.re + other.re, + .im = self.im + other.im, + }; + } + + pub inline fn sub(self: Complex, other: Complex) Complex { + return .{ + .re = self.re - other.re, + .im = self.im - other.im, + }; + } + + pub inline fn mulScalar(self: Complex, scalar: f32) Complex { + return .{ + .re = self.re * scalar, + .im = self.im * scalar, + }; + } + + pub inline fn divScalar(self: Complex, scalar: f32) Complex { + return .{ + .re = self.re / scalar, + .im = self.im / scalar, + }; + } + + pub inline fn negate(self: Complex) Complex { + return .{ + .re = -self.re, + .im = -self.im, + }; + } + + pub inline fn conjugate(self: Complex) Complex { + return .{ + .re = self.re, + .im = -self.im, + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + pub inline fn mulComplex(self: Complex, other: Complex) Complex { + return .{ + .re = self.re * other.re - self.im * other.im, + .im = self.re * other.im + self.im * other.re, + }; + } + + pub inline fn inverseUnit(self: Complex) Complex { + std.debug.assert(@abs(self.mag() - 1.0) <= 0x1p-10); + return .{ + .re = self.re, + .im = -self.im, + }; + } + + pub inline fn inverseFull(self: Complex) Complex { + const inv_mag_squared = 1.0 / (self.re * self.re + self.im * self.im); + return .{ + .re = inv_mag_squared * self.re, + .im = -inv_mag_squared * self.im, + }; + } + + // --- OTHER --------------------------------------------------------------- + + pub inline fn mag(self: Complex) f32 { + return @sqrt(self.re * self.re + self.im * self.im); + } + + pub inline fn magSquared(self: Complex) f32 { + return self.re * self.re + self.im * self.im; + } + + pub inline fn lerp(a: Complex, b: Complex, t: f32) Complex { + return .{ + .re = @mulAdd(f32, t, b.re, @mulAdd(f32, -t, a.re, a.re)), + .im = @mulAdd(f32, t, b.im, @mulAdd(f32, -t, a.im, a.im)), + }; + } }; diff --git a/packages/vecmath/src/rotors/Complex_x8.zig b/packages/vecmath/src/rotors/Complex_x8.zig index 5e7f165..461e824 100644 --- a/packages/vecmath/src/rotors/Complex_x8.zig +++ b/packages/vecmath/src/rotors/Complex_x8.zig @@ -1,15 +1,13 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Complex_x8 = struct { re: vm.f32x8, im: vm.f32x8, - pub const Array = [16]f32; - pub const identity = initSingle(1, 0); - // --- INIT --- + // --- INIT ---------------------------------------------------------------- pub inline fn init(re: vm.f32x8, im: vm.f32x8) Complex_x8 { return .{ .re = re, .im = im }; @@ -20,36 +18,179 @@ pub const Complex_x8 = struct { } pub inline fn initRotation(angle_turns: vm.f32x8) Complex_x8 { - const c, const s = vm.cossin_x8(angle_turns).asArray(); - return .{ .re = c, .im = s }; + return vm.cossin_x8(angle_turns); } pub inline fn initRotationSingle(angle_turns: f32) Complex_x8 { - const c, const s = vm.cossin(angle_turns).asArray(); - return .{ .re = vm.ps(c), .im = vm.ps(s) }; - } - - pub inline fn initSplat(complex: vm.Complex) Complex_x8 { + const complex = vm.cossin(angle_turns); return .{ .re = vm.ps(complex.re), .im = vm.ps(complex.im) }; } - pub inline fn initArray(array: Array) Complex_x8 { - const re: vm.f32x8 = array[0..8].*; - const im: vm.f32x8 = array[8..16].*; - return .{ .re = re, .im = im }; + pub inline fn initArrayOfComplex(complex: [8]vm.Complex) Complex_x8 { + const vector: @Vector(16, f32) = @as([16]f32, @bitCast(complex)); + return .{ + .re = @shuffle(f32, vector, undefined, [_]i32{ 0, 2, 4, 6, 8, 10, 12, 14 }), + .im = @shuffle(f32, vector, undefined, [_]i32{ 1, 3, 5, 7, 9, 11, 13, 15 }), + }; } - pub inline fn initVector2(vector: vm.Vector2x8) Complex_x8 { - return @bitCast(vector); + pub inline fn splat(complex: vm.Complex) Complex_x8 { + return .{ .re = vm.ps(complex.re), .im = vm.ps(complex.im) }; } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- - pub inline fn asArray(self: Complex_x8) Array { - return @bitCast(self); + pub inline fn asArrayOfComplex(self: Complex_x8) [8]vm.Complex { + const vector: @Vector(16, f32) = self.re ++ self.im; + return @bitCast(@as([16]f32, @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, + 1, 9, + 2, 10, + 3, 11, + 4, 12, + 5, 13, + 6, 14, + 7, 15, + }))); } - pub inline fn asVector2(self: Complex_x8) vm.Vector2_x8 { - return @bitCast(self); + pub inline fn unpack(self: Complex_x8) [2]vm.f32x8 { + return .{ self.re, self.im }; + } + + // --- LOAD AND STORE ------------------------------------------------------ + + pub inline fn loadArrayOfComplex(self: *Complex_x8, array: *const [8]vm.Complex) void { + const vector: @Vector(16, f32) = @as(*const [16]f32, @ptrCast(array)).*; + self.re = @shuffle(f32, vector, undefined, [_]i32{ 0, 2, 4, 6, 8, 10, 12, 14 }); + self.im = @shuffle(f32, vector, undefined, [_]i32{ 1, 3, 5, 7, 9, 11, 13, 15 }); + } + + pub inline fn storeArrayOfComplex(self: *const Complex_x8, array: *[8]vm.Complex) void { + const vector: @Vector(16, f32) = self.re ++ self.im; + @as(*[16]f32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, + 1, 9, + 2, 10, + 3, 11, + 4, 12, + 5, 13, + 6, 14, + 7, 15, + }); + } + + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Complex_x8, other: Complex_x8) Complex_x8 { + return .{ + .re = self.re + other.re, + .im = self.im + other.im, + }; + } + + pub inline fn sub(self: Complex_x8, other: Complex_x8) Complex_x8 { + return .{ + .re = self.re - other.re, + .im = self.im - other.im, + }; + } + + pub inline fn mulScalar(self: Complex_x8, scalar: vm.f32x8) Complex_x8 { + return .{ + .re = self.re * scalar, + .im = self.im * scalar, + }; + } + + pub inline fn mulScalarSingle(self: Complex_x8, scalar: f32) Complex_x8 { + return .{ + .re = self.re * vm.ps(scalar), + .im = self.im * vm.ps(scalar), + }; + } + + pub inline fn divScalar(self: Complex_x8, scalar: vm.f32x8) Complex_x8 { + return .{ + .re = self.re / scalar, + .im = self.im / scalar, + }; + } + + pub inline fn divScalarSingle(self: Complex_x8, scalar: f32) Complex_x8 { + return .{ + .re = self.re / vm.ps(scalar), + .im = self.im / vm.ps(scalar), + }; + } + + pub inline fn negate(self: Complex_x8) Complex_x8 { + return .{ + .re = -self.re, + .im = -self.im, + }; + } + + pub inline fn conjugate(self: Complex_x8) Complex_x8 { + return .{ + .re = self.re, + .im = -self.im, + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + pub inline fn mulComplex(self: Complex_x8, other: Complex_x8) Complex_x8 { + return .{ + .re = self.re * other.re - self.im * other.im, + .im = self.re * other.im + self.im * other.re, + }; + } + + pub inline fn mulComplexSingle(self: Complex_x8, other: vm.Complex) Complex_x8 { + return .{ + .re = self.re * vm.ps(other.re) - self.im * vm.ps(other.im), + .im = self.re * vm.ps(other.im) + self.im * vm.ps(other.re), + }; + } + + pub inline fn inverseUnit(self: Complex_x8) Complex_x8 { + std.debug.assert(@reduce(.And, @abs(self.mag() - vm.ps(1.0)) <= vm.ps(0x1p-10))); + return .{ + .re = self.re, + .im = -self.im, + }; + } + + pub inline fn inverseFull(self: Complex_x8) Complex_x8 { + const inv_mag_squared = 1.0 / (self.re * self.re + self.im * self.im); + return .{ + .re = inv_mag_squared * self.re, + .im = -inv_mag_squared * self.im, + }; + } + + // --- OTHER --------------------------------------------------------------- + + pub inline fn mag(self: Complex_x8) vm.f32x8 { + return @sqrt(self.re * self.re + self.im * self.im); + } + + pub inline fn magSquared(self: Complex_x8) vm.f32x8 { + return self.re * self.re + self.im * self.im; + } + + pub inline fn lerp(a: Complex_x8, b: Complex_x8, t: vm.f32x8) Complex_x8 { + return .{ + .re = @mulAdd(vm.f32x8, t, b.re, @mulAdd(vm.f32x8, -t, a.re, a.re)), + .im = @mulAdd(vm.f32x8, t, b.im, @mulAdd(vm.f32x8, -t, a.im, a.im)), + }; + } + + pub inline fn lerpSingle(a: Complex_x8, b: Complex_x8, t: f32) Complex_x8 { + return .{ + .re = @mulAdd(vm.f32x8, vm.ps(t), b.re, @mulAdd(vm.f32x8, -vm.ps(t), a.re, a.re)), + .im = @mulAdd(vm.f32x8, vm.ps(t), b.im, @mulAdd(vm.f32x8, -vm.ps(t), a.im, a.im)), + }; } }; diff --git a/packages/vecmath/src/rotors/Quaternion.zig b/packages/vecmath/src/rotors/Quaternion.zig index 9210e86..d014461 100644 --- a/packages/vecmath/src/rotors/Quaternion.zig +++ b/packages/vecmath/src/rotors/Quaternion.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Quaternion = extern struct { x: f32, @@ -8,49 +8,31 @@ pub const Quaternion = extern struct { w: f32, pub const Array = [4]f32; + pub const Plane = enum { XY, XZ, YX, YZ, ZX, ZY }; pub const identity = init(0, 0, 0, 1); - // --- INIT --- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: f32, y: f32, z: f32, w: f32) Quaternion { return .{ .x = x, .y = y, .z = z, .w = w }; } - pub inline fn initRotationXY(angle_turns: f32) Quaternion { + pub inline fn initRotation(comptime plane: Plane, angle_turns: f32) Quaternion { const half_angle_turns = 0.5 * angle_turns; - const c, const s = vm.cossin(half_angle_turns).asArray(); - return .{ .x = 0, .y = 0, .z = s, .w = c }; + return initComplex(plane, vm.cossin(half_angle_turns)); } - pub inline fn initRotationXZ(angle_turns: f32) Quaternion { - const half_angle_turns = 0.5 * angle_turns; - const c, const s = vm.cossin(half_angle_turns).asArray(); - return .{ .x = 0, .y = -s, .z = 0, .w = c }; - } - - pub inline fn initRotationYX(angle_turns: f32) Quaternion { - const half_angle_turns = 0.5 * angle_turns; - const c, const s = vm.cossin(half_angle_turns).asArray(); - return .{ .x = 0, .y = 0, .z = -s, .w = c }; - } - - pub inline fn initRotationYZ(angle_turns: f32) Quaternion { - const half_angle_turns = 0.5 * angle_turns; - const c, const s = vm.cossin(half_angle_turns).asArray(); - return .{ .x = s, .y = 0, .z = 0, .w = c }; - } - - pub inline fn initRotationZX(angle_turns: f32) Quaternion { - const half_angle_turns = 0.5 * angle_turns; - const c, const s = vm.cossin(half_angle_turns).asArray(); - return .{ .x = 0, .y = s, .z = 0, .w = c }; - } - - pub inline fn initRotationZY(angle_turns: f32) Quaternion { - const half_angle_turns = 0.5 * angle_turns; - const c, const s = vm.cossin(half_angle_turns).asArray(); - return .{ .x = -s, .y = 0, .z = 0, .w = c }; + pub inline fn initComplex(comptime plane: Plane, complex: vm.Complex) Quaternion { + const c, const s = complex.asArray(); + return switch (plane) { + .XY => .{ .x = 0, .y = 0, .z = s, .w = c }, + .XZ => .{ .x = 0, .y = -s, .z = 0, .w = c }, + .YX => .{ .x = 0, .y = 0, .z = -s, .w = c }, + .YZ => .{ .x = s, .y = 0, .z = 0, .w = c }, + .ZX => .{ .x = 0, .y = s, .z = 0, .w = c }, + .ZY => .{ .x = -s, .y = 0, .z = 0, .w = c }, + }; } pub inline fn initArray(array: Array) Quaternion { @@ -61,7 +43,7 @@ pub const Quaternion = extern struct { return @bitCast(vector); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Quaternion) Array { return @bitCast(self); @@ -71,7 +53,7 @@ pub const Quaternion = extern struct { return @bitCast(self); } - // --- ACCESSORS --- + // --- ACCESSORS ----------------------------------------------------------- pub inline fn getVector(self: Quaternion) vm.Vector3 { return .{ .x = self.x, .y = self.y, .z = self.z }; @@ -80,4 +62,110 @@ pub const Quaternion = extern struct { pub inline fn getScalar(self: Quaternion) f32 { return self.w; } + + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Quaternion, other: Quaternion) Quaternion { + return .{ + .x = self.x + other.x, + .y = self.y + other.y, + .z = self.z + other.z, + .w = self.w + other.w, + }; + } + + pub inline fn sub(self: Quaternion, other: Quaternion) Quaternion { + return .{ + .x = self.x - other.x, + .y = self.y - other.y, + .z = self.z - other.z, + .w = self.w - other.w, + }; + } + + pub inline fn mulScalar(self: Quaternion, scalar: f32) Quaternion { + return .{ + .x = self.x * scalar, + .y = self.y * scalar, + .z = self.z * scalar, + .w = self.w * scalar, + }; + } + + pub inline fn divScalar(self: Quaternion, scalar: f32) Quaternion { + return .{ + .x = self.x / scalar, + .y = self.y / scalar, + .z = self.z / scalar, + .w = self.w / scalar, + }; + } + + pub inline fn negate(self: Quaternion) Quaternion { + return .{ + .x = -self.x, + .y = -self.y, + .z = -self.z, + .w = -self.w, + }; + } + + pub inline fn conjugate(self: Quaternion) Quaternion { + return .{ + .x = -self.x, + .y = -self.y, + .z = -self.z, + .w = self.w, + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + pub inline fn mulQuaternion(self: Quaternion, other: Quaternion) Quaternion { + return .{ + .x = self.w * other.x + self.x * other.w + self.y * other.z - self.z * other.y, + .y = self.w * other.y - self.x * other.z + self.y * other.w + self.z * other.x, + .z = self.w * other.z + self.x * other.y - self.y * other.x + self.z * other.w, + .w = self.w * other.w - self.x * other.x - self.y * other.y - self.z * other.z, + }; + } + + pub inline fn inverseUnit(self: Quaternion) Quaternion { + std.debug.assert(@abs(self.mag() - 1.0) <= 0x1p-10); + return .{ + .x = -self.x, + .y = -self.y, + .z = -self.z, + .w = self.w, + }; + } + + pub inline fn inverseFull(self: Quaternion) Quaternion { + const inv_mag_squared = 1.0 / (self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w); + return .{ + .x = -inv_mag_squared * self.x, + .y = -inv_mag_squared * self.y, + .z = -inv_mag_squared * self.z, + .w = inv_mag_squared * self.w, + }; + } + + // --- OTHER --------------------------------------------------------------- + + pub inline fn mag(self: Quaternion) f32 { + return @sqrt(self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w); + } + + pub inline fn magSquared(self: Quaternion) f32 { + return self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w; + } + + pub inline fn lerp(a: Quaternion, b: Quaternion, t: f32) Quaternion { + return .{ + .x = @mulAdd(f32, t, b.x, @mulAdd(f32, -t, a.x, a.x)), + .y = @mulAdd(f32, t, b.y, @mulAdd(f32, -t, a.y, a.y)), + .z = @mulAdd(f32, t, b.z, @mulAdd(f32, -t, a.z, a.z)), + .w = @mulAdd(f32, t, b.w, @mulAdd(f32, -t, a.w, a.w)), + }; + } }; diff --git a/packages/vecmath/src/rotors/Quaternion_x8.zig b/packages/vecmath/src/rotors/Quaternion_x8.zig index fb780c0..a532bdd 100644 --- a/packages/vecmath/src/rotors/Quaternion_x8.zig +++ b/packages/vecmath/src/rotors/Quaternion_x8.zig @@ -1,17 +1,17 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); -pub const Quaternion_x8 = extern struct { +pub const Quaternion_x8 = struct { x: vm.f32x8, y: vm.f32x8, z: vm.f32x8, w: vm.f32x8, - pub const Array = [32]f32; + pub const Plane = vm.Quaternion.Plane; pub const identity = initSingle(0, 0, 0, 1); - // --- INIT --- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: vm.f32x8, y: vm.f32x8, z: vm.f32x8, w: vm.f32x8) Quaternion_x8 { return .{ .x = x, .y = y, .z = z, .w = w }; @@ -21,59 +21,104 @@ pub const Quaternion_x8 = extern struct { return .{ .x = vm.ps(x), .y = vm.ps(y), .z = vm.ps(z), .w = vm.ps(w) }; } - pub inline fn initRotationXY(angle_turns: vm.f32x8) Quaternion_x8 { + pub inline fn initRotation(comptime plane: Plane, angle_turns: vm.f32x8) Quaternion_x8 { const half_angle_turns = vm.ps(0.5) * angle_turns; - const c, const s = vm.cossin_x8(half_angle_turns).unpack(); - return .{ .x = vm.ps(0), .y = vm.ps(0), .z = s, .w = c }; + return initComplex(plane, vm.cossin_x8(half_angle_turns)); } - pub inline fn initRotationXZ(angle_turns: vm.f32x8) Quaternion_x8 { - const half_angle_turns = vm.ps(0.5) * angle_turns; - const c, const s = vm.cossin_x8(half_angle_turns).unpack(); - return .{ .x = vm.ps(0), .y = -s, .z = vm.ps(0), .w = c }; + pub inline fn initRotationSingle(comptime plane: Plane, angle_turns: f32) Quaternion_x8 { + const half_angle_turns = 0.5 * angle_turns; + return initComplexSingle(plane, vm.cossin(half_angle_turns)); } - pub inline fn initRotationYX(angle_turns: vm.f32x8) Quaternion_x8 { - const half_angle_turns = vm.ps(0.5) * angle_turns; - const c, const s = vm.cossin_x8(half_angle_turns).unpack(); - return .{ .x = vm.ps(0), .y = vm.ps(0), .z = -s, .w = c }; + pub inline fn initComplex(comptime plane: Plane, complex: vm.Complex_x8) Quaternion_x8 { + const c, const s = complex.unpack(); + return switch (plane) { + .XY => .{ .x = vm.ps(0), .y = vm.ps(0), .z = s, .w = c }, + .XZ => .{ .x = vm.ps(0), .y = -s, .z = vm.ps(0), .w = c }, + .YX => .{ .x = vm.ps(0), .y = vm.ps(0), .z = -s, .w = c }, + .YZ => .{ .x = s, .y = vm.ps(0), .z = vm.ps(0), .w = c }, + .ZX => .{ .x = vm.ps(0), .y = s, .z = vm.ps(0), .w = c }, + .ZY => .{ .x = -s, .y = vm.ps(0), .z = vm.ps(0), .w = c }, + }; } - pub inline fn initRotationYZ(angle_turns: vm.f32x8) Quaternion_x8 { - const half_angle_turns = vm.ps(0.5) * angle_turns; - const c, const s = vm.cossin_x8(half_angle_turns).unpack(); - return .{ .x = s, .y = vm.ps(0), .z = vm.ps(0), .w = c }; + pub inline fn initComplexSingle(comptime plane: Plane, complex: vm.Complex) Quaternion_x8 { + const c, const s = complex.asArray(); + return switch (plane) { + .XY => .{ .x = vm.ps(0), .y = vm.ps(0), .z = vm.ps(s), .w = vm.ps(c) }, + .XZ => .{ .x = vm.ps(0), .y = vm.ps(-s), .z = vm.ps(0), .w = vm.ps(c) }, + .YX => .{ .x = vm.ps(0), .y = vm.ps(0), .z = vm.ps(-s), .w = vm.ps(c) }, + .YZ => .{ .x = vm.ps(s), .y = vm.ps(0), .z = vm.ps(0), .w = vm.ps(c) }, + .ZX => .{ .x = vm.ps(0), .y = vm.ps(s), .z = vm.ps(0), .w = vm.ps(c) }, + .ZY => .{ .x = vm.ps(-s), .y = vm.ps(0), .z = vm.ps(0), .w = vm.ps(c) }, + }; } - pub inline fn initRotationZX(angle_turns: vm.f32x8) Quaternion_x8 { - const half_angle_turns = vm.ps(0.5) * angle_turns; - const c, const s = vm.cossin_x8(half_angle_turns).unpack(); - return .{ .x = vm.ps(0), .y = s, .z = vm.ps(0), .w = c }; + pub inline fn initArrayOfQuaternions(quaternions: [8]vm.Quaternion) Quaternion_x8 { + const vector: @Vector(32, f32) = @as([32]f32, @bitCast(quaternions)); + return .{ + .x = @shuffle(f32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }), + .y = @shuffle(f32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }), + .z = @shuffle(f32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }), + .w = @shuffle(f32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }), + }; } - pub inline fn initRotationZY(angle_turns: vm.f32x8) Quaternion_x8 { - const half_angle_turns = vm.ps(0.5) * angle_turns; - const c, const s = vm.cossin_x8(half_angle_turns).unpack(); - return .{ .x = -s, .y = vm.ps(0), .z = vm.ps(0), .w = c }; + pub inline fn splat(quaternion: vm.Quaternion) Quaternion_x8 { + return .{ + .x = vm.ps(quaternion.x), + .y = vm.ps(quaternion.y), + .z = vm.ps(quaternion.z), + .w = vm.ps(quaternion.w), + }; } - //pub inline fn initArray() Quaternion_x8 {} + // --- CONVERSION ---------------------------------------------------------- - //pub inline fn initArrayTranspose() Quaternion_x8 {} - - //pub inline fn initArrayOfQuaternions() Quaternion_x8 {} - - //pub inline fn initVector4x8(vector: Vector4x8) Quaternion_x8 {} - - pub inline fn initSplat(quaternion: vm.Quaternion) Quaternion_x8 { - return .{ .x = vm.ps(quaternion.x), .y = vm.ps(quaternion.y), .z = vm.ps(quaternion.z), .w = vm.ps(quaternion.w) }; + pub inline fn asArrayOfQuaternions(self: Quaternion_x8) [8]vm.Quaternion { + const vector: @Vector(32, f32) = self.x ++ self.y ++ self.z ++ self.w; + return @bitCast(@as([32]f32, @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, 16, 24, + 1, 9, 17, 25, + 2, 10, 18, 26, + 3, 11, 19, 27, + 4, 12, 20, 28, + 5, 13, 21, 29, + 6, 14, 22, 30, + 7, 15, 23, 31, + }))); } - pub inline fn initSplatVector4(vector: vm.Vector4) Quaternion_x8 { - return .{ .x = vm.ps(vector.x), .y = vm.ps(vector.y), .z = vm.ps(vector.z), .w = vm.ps(vector.w) }; + pub inline fn unpack(self: Quaternion_x8) [4]vm.f32x8 { + return .{ self.x, self.y, self.z, self.w }; } - // --- ACCESSORS --- + // --- LOAD AND STORE ------------------------------------------------------ + + pub inline fn loadArrayOfQuaternions(self: *Quaternion_x8, array: *const [8]vm.Quaternion) void { + const vector: @Vector(32, f32) = @as(*const [32]f32, @ptrCast(array)).*; + self.x = @shuffle(f32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }); + self.y = @shuffle(f32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }); + self.z = @shuffle(f32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }); + self.w = @shuffle(f32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }); + } + + pub inline fn storeArrayOfQuaternions(self: *const Quaternion_x8, array: *[8]vm.Quaternion) void { + const vector: @Vector(32, f32) = self.x ++ self.y ++ self.z ++ self.w; + @as(*[32]f32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, 16, 24, + 1, 9, 17, 25, + 2, 10, 18, 26, + 3, 11, 19, 27, + 4, 12, 20, 28, + 5, 13, 21, 29, + 6, 14, 22, 30, + 7, 15, 23, 31, + }); + } + + // --- ACCESSORS ----------------------------------------------------------- pub inline fn getVector(self: Quaternion_x8) vm.Vector3x8 { return .{ .x = self.x, .y = self.y, .z = self.z }; @@ -82,4 +127,155 @@ pub const Quaternion_x8 = extern struct { pub inline fn getScalar(self: Quaternion_x8) vm.f32x8 { return self.w; } + + // --- COMPONENT-WISE ------------------------------------------------------ + + pub inline fn add(self: Quaternion_x8, other: Quaternion_x8) Quaternion_x8 { + return .{ + .x = self.x + other.x, + .y = self.y + other.y, + .z = self.z + other.z, + .w = self.w + other.w, + }; + } + + pub inline fn sub(self: Quaternion_x8, other: Quaternion_x8) Quaternion_x8 { + return .{ + .x = self.x - other.x, + .y = self.y - other.y, + .z = self.z - other.z, + .w = self.w - other.w, + }; + } + + pub inline fn mulScalar(self: Quaternion_x8, scalar: vm.f32x8) Quaternion_x8 { + return .{ + .x = self.x * scalar, + .y = self.y * scalar, + .z = self.z * scalar, + .w = self.w * scalar, + }; + } + + pub inline fn mulScalarSingle(self: Quaternion_x8, scalar: f32) Quaternion_x8 { + return .{ + .x = self.x * vm.ps(scalar), + .y = self.y * vm.ps(scalar), + .z = self.z * vm.ps(scalar), + .w = self.w * vm.ps(scalar), + }; + } + + pub inline fn divScalar(self: Quaternion_x8, scalar: vm.f32x8) Quaternion_x8 { + return .{ + .x = self.x / scalar, + .y = self.y / scalar, + .z = self.z / scalar, + .w = self.w / scalar, + }; + } + + pub inline fn divScalarSingle(self: Quaternion_x8, scalar: vm.f32x8) Quaternion_x8 { + return .{ + .x = self.x / vm.ps(scalar), + .y = self.y / vm.ps(scalar), + .z = self.z / vm.ps(scalar), + .w = self.w / vm.ps(scalar), + }; + } + + pub inline fn negate(self: Quaternion_x8) Quaternion_x8 { + return .{ + .x = -self.x, + .y = -self.y, + .z = -self.z, + .w = -self.w, + }; + } + + pub inline fn conjugate(self: Quaternion_x8) Quaternion_x8 { + return .{ + .x = -self.x, + .y = -self.y, + .z = -self.z, + .w = self.w, + }; + } + + // --- COMPOSE ------------------------------------------------------------- + + pub inline fn mulQuaternion(self: Quaternion_x8, other: Quaternion_x8) Quaternion_x8 { + return .{ + .x = self.w * other.x + self.x * other.w + self.y * other.z - self.z * other.y, + .y = self.w * other.y - self.x * other.z + self.y * other.w + self.z * other.x, + .z = self.w * other.z + self.x * other.y - self.y * other.x + self.z * other.w, + .w = self.w * other.w - self.x * other.x - self.y * other.y - self.z * other.z, + }; + } + + pub inline fn mulQuaternionSingle(self: Quaternion_x8, other: vm.Quaternion) Quaternion_x8 { + return .{ + .x = self.w * vm.ps(other.x) + self.x * vm.ps(other.w) + self.y * vm.ps(other.z) - self.z * vm.ps(other.y), + .y = self.w * vm.ps(other.y) - self.x * vm.ps(other.z) + self.y * vm.ps(other.w) + self.z * vm.ps(other.x), + .z = self.w * vm.ps(other.z) + self.x * vm.ps(other.y) - self.y * vm.ps(other.x) + self.z * vm.ps(other.w), + .w = self.w * vm.ps(other.w) - self.x * vm.ps(other.x) - self.y * vm.ps(other.y) - self.z * vm.ps(other.z), + }; + } + + pub inline fn premulQuaternionSingle(self: Quaternion_x8, other: vm.Quaternion) Quaternion_x8 { + return .{ + .x = vm.ps(other.w) * self.x + vm.ps(other.x) * self.w + vm.ps(other.y) * self.z - vm.ps(other.z) * self.y, + .y = vm.ps(other.w) * self.y - vm.ps(other.x) * self.z + vm.ps(other.y) * self.w + vm.ps(other.z) * self.x, + .z = vm.ps(other.w) * self.z + vm.ps(other.x) * self.y - vm.ps(other.y) * self.x + vm.ps(other.z) * self.w, + .w = vm.ps(other.w) * self.w - vm.ps(other.x) * self.x - vm.ps(other.y) * self.y - vm.ps(other.z) * self.z, + }; + } + + pub inline fn inverseUnit(self: Quaternion_x8) Quaternion_x8 { + std.debug.assert(@reduce(.And, @abs(self.mag() - vm.ps(1.0)) <= vm.ps(0x1p-10))); + return .{ + .x = -self.x, + .y = -self.y, + .z = -self.z, + .w = self.w, + }; + } + + pub inline fn inverseFull(self: Quaternion_x8) Quaternion_x8 { + const inv_mag_squared = vm.ps(1.0) / (self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w); + return .{ + .x = -inv_mag_squared * self.x, + .y = -inv_mag_squared * self.y, + .z = -inv_mag_squared * self.z, + .w = inv_mag_squared * self.w, + }; + } + + // --- OTHER --------------------------------------------------------------- + + pub inline fn mag(self: Quaternion_x8) vm.f32x8 { + return @sqrt(self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w); + } + + pub inline fn magSquared(self: Quaternion_x8) vm.f32x8 { + return self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w; + } + + pub inline fn lerp(a: Quaternion_x8, b: Quaternion_x8, t: vm.f32x8) Quaternion_x8 { + return .{ + .x = @mulAdd(vm.f32x8, t, b.x, @mulAdd(vm.f32x8, -t, a.x, a.x)), + .y = @mulAdd(vm.f32x8, t, b.y, @mulAdd(vm.f32x8, -t, a.y, a.y)), + .z = @mulAdd(vm.f32x8, t, b.z, @mulAdd(vm.f32x8, -t, a.z, a.z)), + .w = @mulAdd(vm.f32x8, t, b.w, @mulAdd(vm.f32x8, -t, a.w, a.w)), + }; + } + + pub inline fn lerpSingle(a: Quaternion_x8, b: Quaternion_x8, t: f32) Quaternion_x8 { + return .{ + .x = @mulAdd(vm.f32x8, vm.ps(t), b.x, @mulAdd(vm.f32x8, -vm.ps(t), a.x, a.x)), + .y = @mulAdd(vm.f32x8, vm.ps(t), b.y, @mulAdd(vm.f32x8, -vm.ps(t), a.y, a.y)), + .z = @mulAdd(vm.f32x8, vm.ps(t), b.z, @mulAdd(vm.f32x8, -vm.ps(t), a.z, a.z)), + .w = @mulAdd(vm.f32x8, vm.ps(t), b.w, @mulAdd(vm.f32x8, -vm.ps(t), a.w, a.w)), + }; + } }; diff --git a/packages/vecmath/src/simd.zig b/packages/vecmath/src/simd.zig index 8b992ba..a099199 100644 --- a/packages/vecmath/src/simd.zig +++ b/packages/vecmath/src/simd.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("root.zig"); pub const f32x8 = @Vector(8, f32); pub const i32x8 = @Vector(8, i32); diff --git a/packages/vecmath/src/trig.zig b/packages/vecmath/src/trig.zig index d513b56..36c1b72 100644 --- a/packages/vecmath/src/trig.zig +++ b/packages/vecmath/src/trig.zig @@ -1,11 +1,103 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("root.zig"); + +/// The number of radians per one turn (τ). +pub const rad_per_turn = 1.0 * std.math.tau; + +/// The number of degrees per one turn (360). +pub const deg_per_turn = 360.0; + +/// The number of turns per one radian (1 / τ). +pub const turns_per_rad = 1.0 / std.math.tau; + +/// The number of turns per one degree (1 / 360). +pub const turns_per_deg = 1.0 / 360.0; + +/// Converts an angle in turns to radians. `@TypeOf(ang)` must be a float or +/// comptime number or a vector of floats. +pub fn turnsToRadians(ang: anytype) if (@TypeOf(ang) == comptime_int) comptime_float else @TypeOf(ang) { + const T = @TypeOf(ang); + switch (@typeInfo(T)) { + .float, .comptime_float, .comptime_int => return ang * rad_per_turn, + .vector => |V| if (@typeInfo(V.child) == .float) return ang * @as(T, @splat(rad_per_turn)), + else => {}, + } + @compileError("Input must be float or a comptime number, or a vector of floats."); +} + +test turnsToRadians { + try std.testing.expectEqual(0, turnsToRadians(@as(f32, 0))); + try std.testing.expectApproxEqAbs(0.25 * std.math.tau, turnsToRadians(@as(f32, 0.25)), 0x1p-10); + try std.testing.expectApproxEqAbs(0.5 * std.math.tau, turnsToRadians(@as(f32, 0.5)), 0x1p-10); + try std.testing.expectApproxEqAbs(0.75 * std.math.tau, turnsToRadians(@as(f32, 0.75)), 0x1p-10); + try std.testing.expectApproxEqAbs(std.math.tau, turnsToRadians(@as(f32, 1)), 0x1p-10); +} + +/// Converts an angle in turns to degrees. `@TypeOf(ang)` must be a float or +/// comptime number or a vector of floats. +pub fn turnsToDegrees(ang: anytype) if (@TypeOf(ang) == comptime_int) comptime_float else @TypeOf(ang) { + const T = @TypeOf(ang); + switch (@typeInfo(T)) { + .float, .comptime_float, .comptime_int => return ang * deg_per_turn, + .vector => |V| if (@typeInfo(V.child) == .float) return ang * @as(T, @splat(deg_per_turn)), + else => {}, + } + @compileError("Input must be float or a comptime number, or a vector of floats."); +} + +test turnsToDegrees { + try std.testing.expectEqual(0, turnsToDegrees(@as(f32, 0))); + try std.testing.expectEqual(90, turnsToDegrees(@as(f32, 0.25))); + try std.testing.expectEqual(180, turnsToDegrees(@as(f32, 0.5))); + try std.testing.expectEqual(270, turnsToDegrees(@as(f32, 0.75))); + try std.testing.expectEqual(360, turnsToDegrees(@as(f32, 1))); +} + +/// Converts an angle in radians to turns. `@TypeOf(ang)` must be a float or +/// comptime number or a vector of floats. +pub fn radiansToTurns(ang: anytype) if (@TypeOf(ang) == comptime_int) comptime_float else @TypeOf(ang) { + const T = @TypeOf(ang); + switch (@typeInfo(T)) { + .float, .comptime_float, .comptime_int => return ang * turns_per_rad, + .vector => |V| if (@typeInfo(V.child) == .float) return ang * @as(T, @splat(turns_per_rad)), + else => {}, + } + @compileError("Input must be float or a comptime number, or a vector of floats."); +} + +test radiansToTurns { + try std.testing.expectEqual(0, radiansToTurns(@as(f32, 0))); + try std.testing.expectApproxEqAbs(0.25, radiansToTurns(@as(f32, 0.25 * std.math.tau)), 0x1p-10); + try std.testing.expectApproxEqAbs(0.5, radiansToTurns(@as(f32, 0.5 * std.math.tau)), 0x1p-10); + try std.testing.expectApproxEqAbs(0.75, radiansToTurns(@as(f32, 0.75 * std.math.tau)), 0x1p-10); + try std.testing.expectApproxEqAbs(1, radiansToTurns(@as(f32, std.math.tau)), 0x1p-10); +} + +/// Converts an angle in degrees to turns. `@TypeOf(ang)` must be a float or +/// comptime number or a vector of floats. +pub fn degreesToTurns(ang: anytype) if (@TypeOf(ang) == comptime_int) comptime_float else @TypeOf(ang) { + const T = @TypeOf(ang); + switch (@typeInfo(T)) { + .float, .comptime_float, .comptime_int => return ang / deg_per_turn, + .vector => |V| if (@typeInfo(V.child) == .float) return ang / @as(T, @splat(deg_per_turn)), + else => {}, + } + @compileError("Input must be float or a comptime number, or a vector of floats."); +} + +test degreesToTurns { + try std.testing.expectEqual(0, degreesToTurns(@as(f32, 0))); + try std.testing.expectEqual(0.25, degreesToTurns(@as(f32, 90))); + try std.testing.expectEqual(0.5, degreesToTurns(@as(f32, 180))); + try std.testing.expectEqual(0.75, degreesToTurns(@as(f32, 270))); + try std.testing.expectEqual(1, degreesToTurns(@as(f32, 360))); +} pub fn cos(angle_turns: f32) f32 { return cossin(angle_turns).re; } -test "cos" { +test cos { try std.testing.expectEqual(1, cos(-1)); try std.testing.expectEqual(0, cos(-0.75)); try std.testing.expectEqual(-1, cos(-0.5)); @@ -20,7 +112,7 @@ pub fn cos_x8(angle_turns: vm.f32x8) vm.f32x8 { return cossin_x8(angle_turns).re; } -test "cos_x8" { +test cos_x8 { try std.testing.expectEqual( .{ 1, 0, -1, 0, 1, 0, -1, 0 }, cos_x8(.{ -1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75 }), @@ -31,7 +123,7 @@ pub fn sin(angle_turns: f32) f32 { return cossin(angle_turns).im; } -test "sin" { +test sin { try std.testing.expectEqual(0, sin(-1)); try std.testing.expectEqual(1, sin(-0.75)); try std.testing.expectEqual(0, sin(-0.5)); @@ -46,7 +138,7 @@ pub fn sin_x8(angle_turns: vm.f32x8) vm.f32x8 { return cossin_x8(angle_turns).im; } -test "sin_x8" { +test sin_x8 { try std.testing.expectEqual( .{ 0, 1, 0, -1, 0, 1, 0, -1 }, sin_x8(.{ -1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75 }), @@ -69,31 +161,31 @@ pub fn cossin(angle_turns: f32) vm.Complex { const angle_04 = 4.0 * angle_01; const quadrant: u32 = @intFromFloat(angle_04); + const quadrant_odd = (quadrant & 1) * 0xFFFFFFFF; + const sign_mask_cos = ((quadrant + 1) & 0b10) << 30; + const sign_mask_sin = (quadrant & 0b10) << 30; const x = angle_04 - @floor(angle_04); const x2 = x * x; - const c = ((term_cos_6 * x2 + term_cos_4) * x2 + term_cos_2) * x2 + term_cos_0; - const s = ((term_sin_5 * x2 + term_sin_3) * x2 + term_sin_1) * x; + const c: u32 = @bitCast(((term_cos_6 * x2 + term_cos_4) * x2 + term_cos_2) * x2 + term_cos_0); + const s: u32 = @bitCast(((term_sin_5 * x2 + term_sin_3) * x2 + term_sin_1) * x); - return switch (quadrant) { - 0 => .init(c, s), - 1 => .init(-s, c), - 2 => .init(-c, -s), - 3 => .init(s, -c), - else => unreachable, - }; + const result_cos: f32 = @bitCast(((s & quadrant_odd) | (c & ~quadrant_odd)) ^ sign_mask_cos); + const result_sin: f32 = @bitCast(((c & quadrant_odd) | (s & ~quadrant_odd)) ^ sign_mask_sin); + + return .init(result_cos, result_sin); } -test "cossin" { - try std.testing.expectEqual(vm.Vector2.unit_x, cossin(-1)); - try std.testing.expectEqual(vm.Vector2.unit_y, cossin(-0.75)); - try std.testing.expectEqual(vm.Vector2.unit_nx, cossin(-0.5)); - try std.testing.expectEqual(vm.Vector2.unit_ny, cossin(-0.25)); - try std.testing.expectEqual(vm.Vector2.unit_x, cossin(0)); - try std.testing.expectEqual(vm.Vector2.unit_y, cossin(0.25)); - try std.testing.expectEqual(vm.Vector2.unit_nx, cossin(0.5)); - try std.testing.expectEqual(vm.Vector2.unit_ny, cossin(0.75)); +test cossin { + try std.testing.expectEqual(vm.Complex.initVector2(.unit_x), cossin(-1)); + try std.testing.expectEqual(vm.Complex.initVector2(.unit_y), cossin(-0.75)); + try std.testing.expectEqual(vm.Complex.initVector2(.unit_nx), cossin(-0.5)); + try std.testing.expectEqual(vm.Complex.initVector2(.unit_ny), cossin(-0.25)); + try std.testing.expectEqual(vm.Complex.initVector2(.unit_x), cossin(0)); + try std.testing.expectEqual(vm.Complex.initVector2(.unit_y), cossin(0.25)); + try std.testing.expectEqual(vm.Complex.initVector2(.unit_nx), cossin(0.5)); + try std.testing.expectEqual(vm.Complex.initVector2(.unit_ny), cossin(0.75)); } pub fn cossin_x8(angle_turns: vm.f32x8) vm.Complex_x8 { @@ -119,39 +211,27 @@ pub fn cossin_x8(angle_turns: vm.f32x8) vm.Complex_x8 { const x = angle_04 - @floor(angle_04); const x2 = x * x; - const c = ((term_cos_6 * x2 + term_cos_4) * x2 + term_cos_2) * x2 + term_cos_0; - const s = ((term_sin_5 * x2 + term_sin_3) * x2 + term_sin_1) * x; + const c: vm.u32x8 = @bitCast(((term_cos_6 * x2 + term_cos_4) * x2 + term_cos_2) * x2 + term_cos_0); + const s: vm.u32x8 = @bitCast(((term_sin_5 * x2 + term_sin_3) * x2 + term_sin_1) * x); - var result_cos = @select(f32, quadrant_odd, s, c); - var result_sin = @select(f32, quadrant_odd, c, s); - - result_cos = @bitCast(@as(vm.u32x8, @bitCast(result_cos)) ^ sign_mask_cos); - result_sin = @bitCast(@as(vm.u32x8, @bitCast(result_sin)) ^ sign_mask_sin); + const result_cos: vm.f32x8 = @bitCast(@select(u32, quadrant_odd, s, c) ^ sign_mask_cos); + const result_sin: vm.f32x8 = @bitCast(@select(u32, quadrant_odd, c, s) ^ sign_mask_sin); return .init(result_cos, result_sin); } -test "cossin_x8" { +test cossin_x8 { try std.testing.expectEqual( - vm.Vector2x8.initArrayOfVectors(.{ - vm.Vector2.unit_x, - vm.Vector2.unit_y, - vm.Vector2.unit_nx, - vm.Vector2.unit_ny, - vm.Vector2.unit_x, - vm.Vector2.unit_y, - vm.Vector2.unit_nx, - vm.Vector2.unit_ny, - }), - cossin_x8(.{ - -1, - -0.75, - -0.5, - -0.25, - 0, - 0.25, - 0.5, - 0.75, + vm.Complex_x8.initArrayOfComplex(.{ + .initVector2(.unit_x), + .initVector2(.unit_y), + .initVector2(.unit_nx), + .initVector2(.unit_ny), + .initVector2(.unit_x), + .initVector2(.unit_y), + .initVector2(.unit_nx), + .initVector2(.unit_ny), }), + cossin_x8(.{ -1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75 }), ); } diff --git a/packages/vecmath/src/vectors/Vector2.zig b/packages/vecmath/src/vectors/Vector2.zig index 945295c..d58f984 100644 --- a/packages/vecmath/src/vectors/Vector2.zig +++ b/packages/vecmath/src/vectors/Vector2.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector2 = extern struct { x: f32, @@ -14,7 +14,7 @@ pub const Vector2 = extern struct { pub const unit_nx = init(-1, 0); pub const unit_ny = init(0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: f32, y: f32) Vector2 { return .{ .x = x, .y = y }; @@ -28,21 +28,13 @@ pub const Vector2 = extern struct { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Vector2) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Vector2) *Array { - return @ptrCast(self); - } - - pub inline fn asArrayConstPtr(self: *const Vector2) *const Array { - return @ptrCast(self); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector2, other: Vector2) Vector2 { return .{ .x = self.x + other.x, .y = self.y + other.y }; @@ -96,7 +88,7 @@ pub const Vector2 = extern struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- pub inline fn len(self: Vector2) f32 { return @sqrt(self.x * self.x + self.y * self.y); @@ -121,11 +113,10 @@ pub const Vector2 = extern struct { }; } - pub inline fn rotate(self: Vector2, angle_turns: f32) Vector2 { - const c, const s = cossin(angle_turns).asArray(); + pub inline fn rotate(self: Vector2, complex: vm.Complex) Vector2 { return .{ - .x = self.x * c - self.y * s, - .y = self.x * s + self.y * c, + .x = self.x * complex.re - self.y * complex.im, + .y = self.x * complex.im + self.y * complex.re, }; } }; diff --git a/packages/vecmath/src/vectors/Vector2Int.zig b/packages/vecmath/src/vectors/Vector2Int.zig index 2b60b98..71d59ac 100644 --- a/packages/vecmath/src/vectors/Vector2Int.zig +++ b/packages/vecmath/src/vectors/Vector2Int.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector2Int = extern struct { x: i32, @@ -14,7 +14,7 @@ pub const Vector2Int = extern struct { pub const unit_nx = init(-1, 0); pub const unit_ny = init(0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: i32, y: i32) Vector2Int { return .{ .x = x, .y = y }; @@ -28,21 +28,13 @@ pub const Vector2Int = extern struct { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Vector2Int) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Vector2Int) *Array { - return @ptrCast(self); - } - - pub inline fn asArrayConstPtr(self: *const Vector2Int) *const Array { - return @ptrCast(self); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector2Int, other: Vector2Int) Vector2Int { return .{ .x = self.x + other.x, .y = self.y + other.y }; @@ -56,7 +48,7 @@ pub const Vector2Int = extern struct { return .{ .x = self.x * other.x, .y = self.y * other.y }; } - pub inline fn mulScalar(self: Vector2Int, scalar: f32) Vector2Int { + pub inline fn mulScalar(self: Vector2Int, scalar: i32) Vector2Int { return .{ .x = self.x * scalar, .y = self.y * scalar }; } @@ -64,7 +56,7 @@ pub const Vector2Int = extern struct { return .{ .x = @divFloor(self.x, other.x), .y = @divFloor(self.y, other.y) }; } - pub inline fn divScalar(self: Vector2Int, scalar: f32) Vector2Int { + pub inline fn divScalar(self: Vector2Int, scalar: i32) Vector2Int { return .{ .x = @divFloor(self.x, scalar), .y = @divFloor(self.y, scalar) }; } @@ -72,7 +64,7 @@ pub const Vector2Int = extern struct { return .{ .x = @mod(self.x, other.x), .y = @mod(self.y, other.y) }; } - pub inline fn modScalar(self: Vector2Int, scalar: f32) Vector2Int { + pub inline fn modScalar(self: Vector2Int, scalar: i32) Vector2Int { return .{ .x = @mod(self.x, scalar), .y = @mod(self.y, scalar) }; } @@ -92,7 +84,7 @@ pub const Vector2Int = extern struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- pub inline fn lenSquared(self: Vector2Int) i32 { return self.x * self.x + self.y * self.y; diff --git a/packages/vecmath/src/vectors/Vector2Int_x8.zig b/packages/vecmath/src/vectors/Vector2Int_x8.zig index 9894cd7..9387385 100644 --- a/packages/vecmath/src/vectors/Vector2Int_x8.zig +++ b/packages/vecmath/src/vectors/Vector2Int_x8.zig @@ -1,11 +1,9 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector2Int_x8 = struct { - x: i32x8, - y: i32x8, - - pub const Array = [16]i32; + x: vm.i32x8, + y: vm.i32x8, pub const zero = initScalarSingle(0); pub const one = initScalarSingle(1); @@ -14,101 +12,79 @@ pub const Vector2Int_x8 = struct { pub const unit_nx = initSingle(-1, 0); pub const unit_ny = initSingle(0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(x: i32x8, y: i32x8) Vector2Int_x8 { + pub inline fn init(x: vm.i32x8, y: vm.i32x8) Vector2Int_x8 { return .{ .x = x, .y = y }; } pub inline fn initSingle(x: i32, y: i32) Vector2Int_x8 { - return .{ .x = epi32(x), .y = epi32(y) }; + return .{ .x = vm.epi32(x), .y = vm.epi32(y) }; } - pub inline fn initScalar(scalar: i32x8) Vector2Int_x8 { + pub inline fn initScalar(scalar: vm.i32x8) Vector2Int_x8 { return .{ .x = scalar, .y = scalar }; } pub inline fn initScalarSingle(scalar: i32) Vector2Int_x8 { - return .{ .x = epi32(scalar), .y = epi32(scalar) }; + return .{ .x = vm.epi32(scalar), .y = vm.epi32(scalar) }; } - pub inline fn initSplat(vector: Vector2Int) Vector2Int_x8 { - return .{ .x = epi32(vector.x), .y = epi32(vector.y) }; + pub inline fn initArrayOfVectors(vectors: [8]vm.Vector2Int) Vector2Int_x8 { + const vector: @Vector(16, i32) = @as([16]i32, @bitCast(vectors)); + return .{ + .x = @shuffle(i32, vector, undefined, [_]i32{ 0, 2, 4, 6, 8, 10, 12, 14 }), + .y = @shuffle(i32, vector, undefined, [_]i32{ 1, 3, 5, 7, 9, 11, 13, 15 }), + }; } - pub inline fn initArray(array: Array) Vector2Int_x8 { - const x: i32x8 = array[0..8].*; - const y: i32x8 = array[8..16].*; - return .{ .x = x, .y = y }; + pub inline fn splat(vector: vm.Vector2Int) Vector2Int_x8 { + return .{ .x = vm.epi32(vector.x), .y = vm.epi32(vector.y) }; } - pub inline fn initArrayTranspose(array: Array) Vector2Int_x8 { - const a: i32x8 = array[0..8].*; - const b: i32x8 = array[8..16].*; - const x: i32x8 = @shuffle(i32, a, b, [_]i32{ 0, 2, 4, 6, ~@as(i32, 0), ~@as(i32, 2), ~@as(i32, 4), ~@as(i32, 6) }); - const y: i32x8 = @shuffle(i32, a, b, [_]i32{ 1, 3, 5, 7, ~@as(i32, 1), ~@as(i32, 3), ~@as(i32, 5), ~@as(i32, 7) }); - return .{ .x = x, .y = y }; + // --- CONVERSION ---------------------------------------------------------- + + pub inline fn asArrayOfVectors(self: Vector2Int_x8) [8]vm.Vector2Int { + const vector: @Vector(16, i32) = self.x ++ self.y; + return @bitCast(@as([16]i32, @shuffle(i32, vector, undefined, [_]i32{ + 0, 8, + 1, 9, + 2, 10, + 3, 11, + 4, 12, + 5, 13, + 6, 14, + 7, 15, + }))); } - pub inline fn initArrayOfVectors(vectors: [8]Vector2Int) Vector2Int_x8 { - return initArrayTranspose(@bitCast(vectors)); - } - - // --- CONVERSION --- - - pub inline fn asArray(self: Vector2Int_x8) Array { - const x: [8]i32 = self.x; - const y: [8]i32 = self.y; - return x ++ y; - } - - pub inline fn asArrayTranspose(self: Vector2Int_x8) Array { - const a = @shuffle(i32, self.x, self.y, [_]i32{ 0, ~@as(i32, 0), 1, ~@as(i32, 1), 2, ~@as(i32, 2), 3, ~@as(i32, 3) }); - const b = @shuffle(i32, self.x, self.y, [_]i32{ 4, ~@as(i32, 4), 5, ~@as(i32, 5), 6, ~@as(i32, 6), 7, ~@as(i32, 7) }); - return a ++ b; - } - - pub inline fn asArrayOfVectors(self: Vector2Int_x8) [8]Vector2Int { - return @bitCast(self.asArrayTranspose()); - } - - pub inline fn unpack(self: Vector2Int_x8) [2]i32x8 { + pub inline fn unpack(self: Vector2Int_x8) [2]vm.i32x8 { return .{ self.x, self.y }; } - // --- LOAD AND STORE --- + // --- LOAD AND STORE ------------------------------------------------------ - pub inline fn loadArray(self: *Vector2Int_x8, array: *const Array) void { - self.x = array[0..8].*; - self.y = array[8..16].*; + pub inline fn loadArrayOfVectors(self: *Vector2Int_x8, array: *const [8]vm.Vector2Int) void { + const vector: @Vector(16, i32) = @as(*const [16]i32, @ptrCast(array)).*; + self.x = @shuffle(i32, vector, undefined, [_]i32{ 0, 2, 4, 6, 8, 10, 12, 14 }); + self.y = @shuffle(i32, vector, undefined, [_]i32{ 1, 3, 5, 7, 9, 11, 13, 15 }); } - pub inline fn loadArrayTranspose(self: *Vector2Int_x8, array: *const Array) void { - const a: i32x8 = array[0..8].*; - const b: i32x8 = array[8..16].*; - self.x = @shuffle(i32, a, b, [_]i32{ 0, 2, 4, 6, ~@as(i32, 0), ~@as(i32, 2), ~@as(i32, 4), ~@as(i32, 6) }); - self.y = @shuffle(i32, a, b, [_]i32{ 1, 3, 5, 7, ~@as(i32, 1), ~@as(i32, 3), ~@as(i32, 5), ~@as(i32, 7) }); + pub inline fn storeArrayOfVectors(self: *const Vector2Int_x8, array: *[8]vm.Vector2Int) void { + const vector: @Vector(16, i32) = self.x ++ self.y; + @as(*[16]i32, @ptrCast(array)).* = @shuffle(i32, vector, undefined, [_]i32{ + 0, 8, + 1, 9, + 2, 10, + 3, 11, + 4, 12, + 5, 13, + 6, 14, + 7, 15, + }); } - pub inline fn loadArrayOfVectors(self: *Vector2Int_x8, vectors: *const [8]Vector2Int) void { - self.loadArrayTranspose(@ptrCast(vectors)); - } - - pub inline fn storeArray(self: *const Vector2Int_x8, array: *Array) void { - array[0..8].* = self.x; - array[8..16].* = self.y; - } - - pub inline fn storeArrayTranspose(self: *const Vector2Int_x8, array: *Array) void { - array[0..8].* = @shuffle(i32, self.x, self.y, [_]i32{ 0, ~@as(i32, 0), 1, ~@as(i32, 1), 2, ~@as(i32, 2), 3, ~@as(i32, 3) }); - array[8..16].* = @shuffle(i32, self.x, self.y, [_]i32{ 4, ~@as(i32, 4), 5, ~@as(i32, 5), 6, ~@as(i32, 6), 7, ~@as(i32, 7) }); - } - - pub inline fn storeArrayOfVectors(self: *const Vector2Int_x8, vectors: *[8]Vector2Int) void { - self.storeArrayTranspose(@ptrCast(vectors)); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector2Int_x8, other: Vector2Int_x8) Vector2Int_x8 { return .{ .x = self.x + other.x, .y = self.y + other.y }; @@ -122,36 +98,36 @@ pub const Vector2Int_x8 = struct { return .{ .x = self.x * other.x, .y = self.y * other.y }; } - pub inline fn mulScalar(self: Vector2Int_x8, scalar: i32x8) Vector2Int_x8 { + pub inline fn mulScalar(self: Vector2Int_x8, scalar: vm.i32x8) Vector2Int_x8 { return .{ .x = self.x * scalar, .y = self.y * scalar }; } pub inline fn mulScalarSingle(self: Vector2Int_x8, scalar: i32) Vector2Int_x8 { - return .{ .x = self.x * epi32(scalar), .y = self.y * epi32(scalar) }; + return .{ .x = self.x * vm.epi32(scalar), .y = self.y * vm.epi32(scalar) }; } pub inline fn div(self: Vector2Int_x8, other: Vector2Int_x8) Vector2Int_x8 { return .{ .x = @divFloor(self.x, other.x), .y = @divFloor(self.y, other.y) }; } - pub inline fn divScalar(self: Vector2Int_x8, scalar: i32x8) Vector2Int_x8 { + pub inline fn divScalar(self: Vector2Int_x8, scalar: vm.i32x8) Vector2Int_x8 { return .{ .x = @divFloor(self.x, scalar), .y = @divFloor(self.y, scalar) }; } pub inline fn divScalarSingle(self: Vector2Int_x8, scalar: i32) Vector2Int_x8 { - return .{ .x = @divFloor(self.x, epi32(scalar)), .y = @divFloor(self.y, epi32(scalar)) }; + return .{ .x = @divFloor(self.x, vm.epi32(scalar)), .y = @divFloor(self.y, vm.epi32(scalar)) }; } pub inline fn mod(self: Vector2Int_x8, other: Vector2Int_x8) Vector2Int_x8 { return .{ .x = @mod(self.x, other.x), .y = @mod(self.y, other.y) }; } - pub inline fn modScalar(self: Vector2Int_x8, scalar: i32x8) Vector2Int_x8 { + pub inline fn modScalar(self: Vector2Int_x8, scalar: vm.i32x8) Vector2Int_x8 { return .{ .x = @mod(self.x, scalar), .y = @mod(self.y, scalar) }; } pub inline fn modScalarSingle(self: Vector2Int_x8, scalar: i32) Vector2Int_x8 { - return .{ .x = @mod(self.x, epi32(scalar)), .y = @mod(self.y, epi32(scalar)) }; + return .{ .x = @mod(self.x, vm.epi32(scalar)), .y = @mod(self.y, vm.epi32(scalar)) }; } pub inline fn negate(self: Vector2Int_x8) Vector2Int_x8 { @@ -170,17 +146,17 @@ pub const Vector2Int_x8 = struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- - pub inline fn lenSquared(self: Vector2Int_x8) i32x8 { + pub inline fn lenSquared(self: Vector2Int_x8) vm.i32x8 { return self.x * self.x + self.y * self.y; } - pub inline fn dot(self: Vector2Int_x8, other: Vector2Int_x8) i32x8 { + pub inline fn dot(self: Vector2Int_x8, other: Vector2Int_x8) vm.i32x8 { return self.x * other.x + self.y * other.y; } - pub inline fn cross(self: Vector2Int_x8, other: Vector2Int_x8) i32x8 { + pub inline fn cross(self: Vector2Int_x8, other: Vector2Int_x8) vm.i32x8 { return self.x * other.y - self.y * other.x; } }; diff --git a/packages/vecmath/src/vectors/Vector2x8.zig b/packages/vecmath/src/vectors/Vector2x8.zig index 7bc6f5d..82b2698 100644 --- a/packages/vecmath/src/vectors/Vector2x8.zig +++ b/packages/vecmath/src/vectors/Vector2x8.zig @@ -1,11 +1,9 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector2x8 = struct { - x: f32x8, - y: f32x8, - - pub const Array = [16]f32; + x: vm.f32x8, + y: vm.f32x8, pub const zero = initScalarSingle(0); pub const one = initScalarSingle(1); @@ -14,101 +12,79 @@ pub const Vector2x8 = struct { pub const unit_nx = initSingle(-1, 0); pub const unit_ny = initSingle(0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(x: f32x8, y: f32x8) Vector2x8 { + pub inline fn init(x: vm.f32x8, y: vm.f32x8) Vector2x8 { return .{ .x = x, .y = y }; } pub inline fn initSingle(x: f32, y: f32) Vector2x8 { - return .{ .x = ps(x), .y = ps(y) }; + return .{ .x = vm.ps(x), .y = vm.ps(y) }; } - pub inline fn initScalar(scalar: f32x8) Vector2x8 { + pub inline fn initScalar(scalar: vm.f32x8) Vector2x8 { return .{ .x = scalar, .y = scalar }; } pub inline fn initScalarSingle(scalar: f32) Vector2x8 { - return .{ .x = ps(scalar), .y = ps(scalar) }; + return .{ .x = vm.ps(scalar), .y = vm.ps(scalar) }; } - pub inline fn initSplat(vector: Vector2) Vector2x8 { - return .{ .x = ps(vector.x), .y = ps(vector.y) }; + pub inline fn initArrayOfVectors(vectors: [8]vm.Vector2) Vector2x8 { + const vector: @Vector(16, f32) = @as([16]f32, @bitCast(vectors)); + return .{ + .x = @shuffle(f32, vector, undefined, [_]i32{ 0, 2, 4, 6, 8, 10, 12, 14 }), + .y = @shuffle(f32, vector, undefined, [_]i32{ 1, 3, 5, 7, 9, 11, 13, 15 }), + }; } - pub inline fn initArray(array: Array) Vector2x8 { - const x: f32x8 = array[0..8].*; - const y: f32x8 = array[8..16].*; - return .{ .x = x, .y = y }; + pub inline fn splat(vector: vm.Vector2) Vector2x8 { + return .{ .x = vm.ps(vector.x), .y = vm.ps(vector.y) }; } - pub inline fn initArrayTranspose(array: Array) Vector2x8 { - const a: f32x8 = array[0..8].*; - const b: f32x8 = array[8..16].*; - const x: f32x8 = @shuffle(f32, a, b, [_]i32{ 0, 2, 4, 6, ~@as(i32, 0), ~@as(i32, 2), ~@as(i32, 4), ~@as(i32, 6) }); - const y: f32x8 = @shuffle(f32, a, b, [_]i32{ 1, 3, 5, 7, ~@as(i32, 1), ~@as(i32, 3), ~@as(i32, 5), ~@as(i32, 7) }); - return .{ .x = x, .y = y }; + // --- CONVERSION ---------------------------------------------------------- + + pub inline fn asArrayOfVectors(self: Vector2x8) [8]vm.Vector2 { + const vector: @Vector(16, f32) = self.x ++ self.y; + return @bitCast(@as([16]f32, @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, + 1, 9, + 2, 10, + 3, 11, + 4, 12, + 5, 13, + 6, 14, + 7, 15, + }))); } - pub inline fn initArrayOfVectors(vectors: [8]Vector2) Vector2x8 { - return initArrayTranspose(@bitCast(vectors)); - } - - // --- CONVERSION --- - - pub inline fn asArray(self: Vector2x8) Array { - const x: [8]f32 = self.x; - const y: [8]f32 = self.y; - return x ++ y; - } - - pub inline fn asArrayTranspose(self: Vector2x8) Array { - const a = @shuffle(f32, self.x, self.y, [_]i32{ 0, ~@as(i32, 0), 1, ~@as(i32, 1), 2, ~@as(i32, 2), 3, ~@as(i32, 3) }); - const b = @shuffle(f32, self.x, self.y, [_]i32{ 4, ~@as(i32, 4), 5, ~@as(i32, 5), 6, ~@as(i32, 6), 7, ~@as(i32, 7) }); - return a ++ b; - } - - pub inline fn asArrayOfVectors(self: Vector2x8) [8]Vector2 { - return @bitCast(self.asArrayTranspose()); - } - - pub inline fn unpack(self: Vector2x8) [2]f32x8 { + pub inline fn unpack(self: Vector2x8) [2]vm.f32x8 { return .{ self.x, self.y }; } - // --- LOAD AND STORE --- + // --- LOAD AND STORE ------------------------------------------------------ - pub inline fn loadArray(self: *Vector2x8, array: *const Array) void { - self.x = array[0..8].*; - self.y = array[8..16].*; + pub inline fn loadArrayOfVectors(self: *Vector2x8, array: *const [8]vm.Vector2) void { + const vector: @Vector(16, f32) = @as(*const [16]f32, @ptrCast(array)).*; + self.x = @shuffle(f32, vector, undefined, [_]i32{ 0, 2, 4, 6, 8, 10, 12, 14 }); + self.y = @shuffle(f32, vector, undefined, [_]i32{ 1, 3, 5, 7, 9, 11, 13, 15 }); } - pub inline fn loadArrayTranspose(self: *Vector2x8, array: *const Array) void { - const a: f32x8 = array[0..8].*; - const b: f32x8 = array[8..16].*; - self.x = @shuffle(f32, a, b, [_]i32{ 0, 2, 4, 6, ~@as(i32, 0), ~@as(i32, 2), ~@as(i32, 4), ~@as(i32, 6) }); - self.y = @shuffle(f32, a, b, [_]i32{ 1, 3, 5, 7, ~@as(i32, 1), ~@as(i32, 3), ~@as(i32, 5), ~@as(i32, 7) }); + pub inline fn storeArrayOfVectors(self: *const Vector2x8, array: *[8]vm.Vector2) void { + const vector: @Vector(16, f32) = self.x ++ self.y; + @as(*[16]f32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ + 0, 8, + 1, 9, + 2, 10, + 3, 11, + 4, 12, + 5, 13, + 6, 14, + 7, 15, + }); } - pub inline fn loadArrayOfVectors(self: *Vector2x8, vectors: *const [8]Vector2) void { - self.loadArrayTranspose(@ptrCast(vectors)); - } - - pub inline fn storeArray(self: *const Vector2x8, array: *Array) void { - array[0..8].* = self.x; - array[8..16].* = self.y; - } - - pub inline fn storeArrayTranspose(self: *const Vector2x8, array: *Array) void { - array[0..8].* = @shuffle(f32, self.x, self.y, [_]i32{ 0, ~@as(i32, 0), 1, ~@as(i32, 1), 2, ~@as(i32, 2), 3, ~@as(i32, 3) }); - array[8..16].* = @shuffle(f32, self.x, self.y, [_]i32{ 4, ~@as(i32, 4), 5, ~@as(i32, 5), 6, ~@as(i32, 6), 7, ~@as(i32, 7) }); - } - - pub inline fn storeArrayOfVectors(self: *const Vector2x8, vectors: *[8]Vector2) void { - self.storeArrayTranspose(@ptrCast(vectors)); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector2x8, other: Vector2x8) Vector2x8 { return .{ .x = self.x + other.x, .y = self.y + other.y }; @@ -122,24 +98,24 @@ pub const Vector2x8 = struct { return .{ .x = self.x * other.x, .y = self.y * other.y }; } - pub inline fn mulScalar(self: Vector2x8, scalar: f32x8) Vector2x8 { + pub inline fn mulScalar(self: Vector2x8, scalar: vm.f32x8) Vector2x8 { return .{ .x = self.x * scalar, .y = self.y * scalar }; } pub inline fn mulScalarSingle(self: Vector2x8, scalar: f32) Vector2x8 { - return .{ .x = self.x * ps(scalar), .y = self.y * ps(scalar) }; + return .{ .x = self.x * vm.ps(scalar), .y = self.y * vm.ps(scalar) }; } pub inline fn div(self: Vector2x8, other: Vector2x8) Vector2x8 { return .{ .x = self.x / other.x, .y = self.y / other.y }; } - pub inline fn divScalar(self: Vector2x8, scalar: f32x8) Vector2x8 { + pub inline fn divScalar(self: Vector2x8, scalar: vm.f32x8) Vector2x8 { return .{ .x = self.x / scalar, .y = self.y / scalar }; } pub inline fn divScalarSingle(self: Vector2x8, scalar: f32) Vector2x8 { - return .{ .x = self.x / ps(scalar), .y = self.y / ps(scalar) }; + return .{ .x = self.x / vm.ps(scalar), .y = self.y / vm.ps(scalar) }; } pub inline fn negate(self: Vector2x8) Vector2x8 { @@ -170,36 +146,49 @@ pub const Vector2x8 = struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- - pub inline fn len(self: Vector2x8) f32x8 { + pub inline fn len(self: Vector2x8) vm.f32x8 { return @sqrt(self.x * self.x + self.y * self.y); } - pub inline fn lenSquared(self: Vector2x8) f32x8 { + pub inline fn lenSquared(self: Vector2x8) vm.f32x8 { return self.x * self.x + self.y * self.y; } - pub inline fn dot(self: Vector2x8, other: Vector2x8) f32x8 { + pub inline fn dot(self: Vector2x8, other: Vector2x8) vm.f32x8 { return self.x * other.x + self.y * other.y; } - pub inline fn cross(self: Vector2x8, other: Vector2x8) f32x8 { + pub inline fn cross(self: Vector2x8, other: Vector2x8) vm.f32x8 { return self.x * other.y - self.y * other.x; } - pub inline fn lerp(a: Vector2x8, b: Vector2x8, t: f32x8) Vector2x8 { + pub inline fn lerp(a: Vector2x8, b: Vector2x8, t: vm.f32x8) Vector2x8 { return .{ - .x = @mulAdd(f32x8, t, b.x, @mulAdd(f32x8, -t, a.x, a.x)), - .y = @mulAdd(f32x8, t, b.y, @mulAdd(f32x8, -t, a.y, a.y)), + .x = @mulAdd(vm.f32x8, t, b.x, @mulAdd(vm.f32x8, -t, a.x, a.x)), + .y = @mulAdd(vm.f32x8, t, b.y, @mulAdd(vm.f32x8, -t, a.y, a.y)), }; } - pub inline fn rotate(self: Vector2x8, angle_turns: f32x8) Vector2x8 { - const c, const s = cossin_x8(angle_turns).unpack(); + pub inline fn lerpSingle(a: Vector2x8, b: Vector2x8, t: f32) Vector2x8 { return .{ - .x = self.x * c - self.y * s, - .y = self.x * s + self.y * c, + .x = @mulAdd(vm.f32x8, vm.ps(t), b.x, @mulAdd(vm.f32x8, -vm.ps(t), a.x, a.x)), + .y = @mulAdd(vm.f32x8, vm.ps(t), b.y, @mulAdd(vm.f32x8, -vm.ps(t), a.y, a.y)), + }; + } + + pub inline fn rotate(self: Vector2x8, complex: vm.Complex_x8) Vector2x8 { + return .{ + .x = self.x * complex.re - self.y * complex.im, + .y = self.x * complex.im + self.y * complex.re, + }; + } + + pub inline fn rotateSingle(self: Vector2x8, complex: vm.Complex) Vector2x8 { + return .{ + .x = self.x * vm.ps(complex.re) - self.y * vm.ps(complex.im), + .y = self.x * vm.ps(complex.im) + self.y * vm.ps(complex.re), }; } }; diff --git a/packages/vecmath/src/vectors/Vector3.zig b/packages/vecmath/src/vectors/Vector3.zig index 1d656ee..a12a8f4 100644 --- a/packages/vecmath/src/vectors/Vector3.zig +++ b/packages/vecmath/src/vectors/Vector3.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector3 = extern struct { x: f32, @@ -17,7 +17,7 @@ pub const Vector3 = extern struct { pub const unit_ny = init(0, -1, 0); pub const unit_nz = init(0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: f32, y: f32, z: f32) Vector3 { return .{ .x = x, .y = y, .z = z }; @@ -31,21 +31,13 @@ pub const Vector3 = extern struct { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Vector3) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Vector3) *Array { - return @ptrCast(self); - } - - pub inline fn asArrayConstPtr(self: *const Vector3) *const Array { - return @ptrCast(self); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector3, other: Vector3) Vector3 { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z }; @@ -99,7 +91,7 @@ pub const Vector3 = extern struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- pub inline fn len(self: Vector3) f32 { return @sqrt(self.x * self.x + self.y * self.y + self.z * self.z); @@ -129,13 +121,16 @@ pub const Vector3 = extern struct { }; } - pub inline fn rotate(self: Vector3, quaternion: Quaternion) Vector3 { - const quaternion_scalar = quaternion.getScalar(); - const quaternion_vector = quaternion.getVector(); + pub inline fn rotate(self: Vector3, quaternion: vm.Quaternion) Vector3 { + const w = quaternion.getScalar(); + const xyz = quaternion.getVector(); - return .add(self, .cross( - .add(quaternion_vector, quaternion_vector), - .add(.cross(quaternion_vector, self), .mulScalar(self, quaternion_scalar)), - )); + return add( + self, + cross( + add(xyz, xyz), + add(cross(xyz, self), self.mulScalar(w)), + ), + ); } }; diff --git a/packages/vecmath/src/vectors/Vector3Int.zig b/packages/vecmath/src/vectors/Vector3Int.zig index fd015c1..2d4bfb4 100644 --- a/packages/vecmath/src/vectors/Vector3Int.zig +++ b/packages/vecmath/src/vectors/Vector3Int.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector3Int = extern struct { x: i32, @@ -17,7 +17,7 @@ pub const Vector3Int = extern struct { pub const unit_ny = init(0, -1, 0); pub const unit_nz = init(0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: i32, y: i32, z: i32) Vector3Int { return .{ .x = x, .y = y, .z = z }; @@ -31,21 +31,13 @@ pub const Vector3Int = extern struct { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Vector3Int) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Vector3Int) *Array { - return @ptrCast(self); - } - - pub inline fn asArrayConstPtr(self: *const Vector3Int) *const Array { - return @ptrCast(self); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector3Int, other: Vector3Int) Vector3Int { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z }; @@ -95,7 +87,7 @@ pub const Vector3Int = extern struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- pub inline fn lenSquared(self: Vector3Int) i32 { return self.x * self.x + self.y * self.y + self.z * self.z; diff --git a/packages/vecmath/src/vectors/Vector3Int_x8.zig b/packages/vecmath/src/vectors/Vector3Int_x8.zig index 4e5d69b..0a8e181 100644 --- a/packages/vecmath/src/vectors/Vector3Int_x8.zig +++ b/packages/vecmath/src/vectors/Vector3Int_x8.zig @@ -1,12 +1,10 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector3Int_x8 = struct { - x: i32x8, - y: i32x8, - z: i32x8, - - pub const Array = [24]i32; + x: vm.i32x8, + y: vm.i32x8, + z: vm.i32x8, pub const zero = initScalarSingle(0); pub const one = initScalarSingle(1); @@ -17,59 +15,42 @@ pub const Vector3Int_x8 = struct { pub const unit_ny = initSingle(0, -1, 0); pub const unit_nz = initSingle(0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(x: i32x8, y: i32x8, z: i32x8) Vector3Int_x8 { + pub inline fn init(x: vm.i32x8, y: vm.i32x8, z: vm.i32x8) Vector3Int_x8 { return .{ .x = x, .y = y, .z = z }; } pub inline fn initSingle(x: i32, y: i32, z: i32) Vector3Int_x8 { - return .{ .x = epi32(x), .y = epi32(y), .z = epi32(z) }; + return .{ .x = vm.epi32(x), .y = vm.epi32(y), .z = vm.epi32(z) }; } - pub inline fn initScalar(scalar: i32x8) Vector3Int_x8 { + pub inline fn initScalar(scalar: vm.i32x8) Vector3Int_x8 { return .{ .x = scalar, .y = scalar, .z = scalar }; } pub inline fn initScalarSingle(scalar: i32) Vector3Int_x8 { - return .{ .x = epi32(scalar), .y = epi32(scalar), .z = epi32(scalar) }; + return .{ .x = vm.epi32(scalar), .y = vm.epi32(scalar), .z = vm.epi32(scalar) }; } - pub inline fn initSplat(vector: Vector3Int) Vector3Int_x8 { - return .{ .x = epi32(vector.x), .y = epi32(vector.y), .z = epi32(vector.z) }; + pub inline fn initArrayOfVectors(vectors: [8]vm.Vector3Int) Vector3Int_x8 { + const vector: @Vector(24, i32) = @as([24]i32, @bitCast(vectors)); + return .{ + .x = @shuffle(i32, vector, undefined, [_]i32{ 0, 3, 6, 9, 12, 15, 18, 21 }), + .y = @shuffle(i32, vector, undefined, [_]i32{ 1, 4, 7, 10, 13, 16, 19, 22 }), + .z = @shuffle(i32, vector, undefined, [_]i32{ 2, 5, 8, 11, 14, 17, 20, 23 }), + }; } - pub inline fn initArray(array: Array) Vector3Int_x8 { - const x: i32x8 = array[0..8].*; - const y: i32x8 = array[8..16].*; - const z: i32x8 = array[16..24].*; - return .{ .x = x, .y = y, .z = z }; + pub inline fn splat(vector: vm.Vector3Int) Vector3Int_x8 { + return .{ .x = vm.epi32(vector.x), .y = vm.epi32(vector.y), .z = vm.epi32(vector.z) }; } - pub inline fn initArrayTranspose(array: Array) Vector3Int_x8 { - const vector: @Vector(24, i32) = array; - const x: i32x8 = @shuffle(i32, vector, undefined, [_]i32{ 0, 3, 6, 9, 12, 15, 18, 21 }); - const y: i32x8 = @shuffle(i32, vector, undefined, [_]i32{ 1, 4, 7, 10, 13, 16, 19, 22 }); - const z: i32x8 = @shuffle(i32, vector, undefined, [_]i32{ 2, 5, 8, 11, 14, 17, 20, 23 }); - return .{ .x = x, .y = y, .z = z }; - } + // --- CONVERSION ---------------------------------------------------------- - pub inline fn initArrayOfVectors(vectors: [8]Vector3Int) Vector3Int_x8 { - return initArrayTranspose(@bitCast(vectors)); - } - - // --- CONVERSION --- - - pub inline fn asArray(self: Vector3Int_x8) Array { - const x: [8]i32 = self.x; - const y: [8]i32 = self.y; - const z: [8]i32 = self.z; - return x ++ y ++ z; - } - - pub inline fn asArrayTranspose(self: Vector3Int_x8) Array { - const vector: @Vector(24, i32) = self.asArray(); - const transposed: @Vector(24, i32) = @shuffle(i32, vector, undefined, [_]i32{ + pub inline fn asArrayOfVectors(self: Vector3Int_x8) [8]vm.Vector3Int { + const vector: @Vector(24, i32) = self.x ++ self.y ++ self.z; + return @bitCast(@as([16]i32, @shuffle(i32, vector, undefined, [_]i32{ 0, 8, 16, 1, 9, 17, 2, 10, 18, @@ -78,46 +59,25 @@ pub const Vector3Int_x8 = struct { 5, 13, 21, 6, 14, 22, 7, 15, 23, - }); - return transposed; + }))); } - pub inline fn asArrayOfVectors(self: Vector3Int_x8) [8]Vector3Int { - return @bitCast(self.asArrayTranspose()); - } - - pub inline fn unpack(self: Vector3Int_x8) [3]i32x8 { + pub inline fn unpack(self: Vector3Int_x8) [3]vm.i32x8 { return .{ self.x, self.y, self.z }; } - // --- LOAD AND STORE --- + // --- LOAD AND STORE ------------------------------------------------------ - pub inline fn loadArray(self: *Vector3Int_x8, array: *const Array) void { - self.x = array[0..8].*; - self.y = array[8..16].*; - self.z = array[16..24].*; - } - - pub inline fn loadArrayTranspose(self: *Vector3Int_x8, array: *const Array) void { - const vector: @Vector(24, i32) = array; + pub inline fn loadArrayTranspose(self: *Vector3Int_x8, array: *const [8]vm.Vector3Int) void { + const vector: @Vector(24, i32) = @as(*const [24]i32, @ptrCast(array)).*; self.x = @shuffle(i32, vector, undefined, [_]i32{ 0, 3, 6, 9, 12, 15, 18, 21 }); self.y = @shuffle(i32, vector, undefined, [_]i32{ 1, 4, 7, 10, 13, 16, 19, 22 }); self.z = @shuffle(i32, vector, undefined, [_]i32{ 2, 5, 8, 11, 14, 17, 20, 23 }); } - pub inline fn loadArrayOfVectors(self: *Vector3Int_x8, vectors: *const [8]Vector3Int) void { - self.loadArrayTranspose(@ptrCast(vectors)); - } - - pub inline fn storeArray(self: *const Vector3Int_x8, array: *Array) void { - array[0..8].* = self.x; - array[8..16].* = self.y; - array[16..24].* = self.z; - } - - pub inline fn storeArrayTranspose(self: *const Vector3Int_x8, array: *Array) void { - const vector: @Vector(24, i32) = self.asArray(); - const transposed: @Vector(24, i32) = @shuffle(i32, vector, undefined, [_]i32{ + pub inline fn storeArrayOfVectors(self: *const Vector3Int_x8, array: *[8]vm.Vector3Int) void { + const vector: @Vector(24, i32) = self.x ++ self.y ++ self.z; + @as(*[24]i32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ 0, 8, 16, 1, 9, 17, 2, 10, 18, @@ -127,14 +87,9 @@ pub const Vector3Int_x8 = struct { 6, 14, 22, 7, 15, 23, }); - array.* = transposed; } - pub inline fn storeArrayOfVectors(self: *const Vector3Int_x8, vectors: *[8]Vector3Int) void { - self.storeArrayTranspose(@ptrCast(vectors)); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector3Int_x8, other: Vector3Int_x8) Vector3Int_x8 { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z }; @@ -148,36 +103,36 @@ pub const Vector3Int_x8 = struct { return .{ .x = self.x * other.x, .y = self.y * other.y, .z = self.z * other.z }; } - pub inline fn mulScalar(self: Vector3Int_x8, scalar: i32x8) Vector3Int_x8 { + pub inline fn mulScalar(self: Vector3Int_x8, scalar: vm.i32x8) Vector3Int_x8 { return .{ .x = self.x * scalar, .y = self.y * scalar, .z = self.z * scalar }; } pub inline fn mulScalarSingle(self: Vector3Int_x8, scalar: i32) Vector3Int_x8 { - return .{ .x = self.x * epi32(scalar), .y = self.y * epi32(scalar), .z = self.z * epi32(scalar) }; + return .{ .x = self.x * vm.epi32(scalar), .y = self.y * vm.epi32(scalar), .z = self.z * vm.epi32(scalar) }; } pub inline fn div(self: Vector3Int_x8, other: Vector3Int_x8) Vector3Int_x8 { return .{ .x = @divFloor(self.x, other.x), .y = @divFloor(self.y, other.y), .z = @divFloor(self.z, other.z) }; } - pub inline fn divScalar(self: Vector3Int_x8, scalar: i32x8) Vector3Int_x8 { + pub inline fn divScalar(self: Vector3Int_x8, scalar: vm.i32x8) Vector3Int_x8 { return .{ .x = @divFloor(self.x, scalar), .y = @divFloor(self.y, scalar), .z = @divFloor(self.z, scalar) }; } pub inline fn divScalarSingle(self: Vector3Int_x8, scalar: i32) Vector3Int_x8 { - return .{ .x = @divFloor(self.x, epi32(scalar)), .y = @divFloor(self.y, epi32(scalar)), .z = @divFloor(self.z, epi32(scalar)) }; + return .{ .x = @divFloor(self.x, vm.epi32(scalar)), .y = @divFloor(self.y, vm.epi32(scalar)), .z = @divFloor(self.z, vm.epi32(scalar)) }; } pub inline fn mod(self: Vector3Int_x8, other: Vector3Int_x8) Vector3Int_x8 { return .{ .x = @mod(self.x, other.x), .y = @mod(self.y, other.y), .z = @mod(self.z, other.z) }; } - pub inline fn modScalar(self: Vector3Int_x8, scalar: i32x8) Vector3Int_x8 { + pub inline fn modScalar(self: Vector3Int_x8, scalar: vm.i32x8) Vector3Int_x8 { return .{ .x = @mod(self.x, scalar), .y = @mod(self.y, scalar), .z = @mod(self.z, scalar) }; } pub inline fn modScalarSingle(self: Vector3Int_x8, scalar: i32) Vector3Int_x8 { - return .{ .x = @mod(self.x, epi32(scalar)), .y = @mod(self.y, epi32(scalar)), .z = @mod(self.z, epi32(scalar)) }; + return .{ .x = @mod(self.x, vm.epi32(scalar)), .y = @mod(self.y, vm.epi32(scalar)), .z = @mod(self.z, vm.epi32(scalar)) }; } pub inline fn negate(self: Vector3Int_x8) Vector3Int_x8 { @@ -196,17 +151,17 @@ pub const Vector3Int_x8 = struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- - pub inline fn lenSquared(self: Vector3Int_x8) i32x8 { + pub inline fn lenSquared(self: Vector3Int_x8) vm.i32x8 { return self.x * self.x + self.y * self.y + self.z * self.z; } - pub inline fn dot(self: Vector3Int_x8, other: Vector3Int_x8) i32x8 { + pub inline fn dot(self: Vector3Int_x8, other: Vector3Int_x8) vm.i32x8 { return self.x * other.x + self.y * other.y + self.z * other.z; } - pub inline fn cross(self: Vector3Int_x8, other: Vector3Int_x8) i32x8 { + pub inline fn cross(self: Vector3Int_x8, other: Vector3Int_x8) vm.i32x8 { return .{ .x = self.y * other.z - self.z * other.y, .y = self.z * other.x - self.x * other.z, diff --git a/packages/vecmath/src/vectors/Vector3x8.zig b/packages/vecmath/src/vectors/Vector3x8.zig index 8ff6942..f66bcf2 100644 --- a/packages/vecmath/src/vectors/Vector3x8.zig +++ b/packages/vecmath/src/vectors/Vector3x8.zig @@ -1,12 +1,10 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector3x8 = struct { - x: f32x8, - y: f32x8, - z: f32x8, - - pub const Array = [24]f32; + x: vm.f32x8, + y: vm.f32x8, + z: vm.f32x8, pub const zero = initScalarSingle(0); pub const one = initScalarSingle(1); @@ -17,59 +15,42 @@ pub const Vector3x8 = struct { pub const unit_ny = initSingle(0, -1, 0); pub const unit_nz = initSingle(0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(x: f32x8, y: f32x8, z: f32x8) Vector3x8 { + pub inline fn init(x: vm.f32x8, y: vm.f32x8, z: vm.f32x8) Vector3x8 { return .{ .x = x, .y = y, .z = z }; } pub inline fn initSingle(x: f32, y: f32, z: f32) Vector3x8 { - return .{ .x = ps(x), .y = ps(y), .z = ps(z) }; + return .{ .x = vm.ps(x), .y = vm.ps(y), .z = vm.ps(z) }; } - pub inline fn initScalar(scalar: f32x8) Vector3x8 { + pub inline fn initScalar(scalar: vm.f32x8) Vector3x8 { return .{ .x = scalar, .y = scalar, .z = scalar }; } pub inline fn initScalarSingle(scalar: f32) Vector3x8 { - return .{ .x = ps(scalar), .y = ps(scalar), .z = ps(scalar) }; + return .{ .x = vm.ps(scalar), .y = vm.ps(scalar), .z = vm.ps(scalar) }; } - pub inline fn initSplat(vector: Vector3) Vector3x8 { - return .{ .x = ps(vector.x), .y = ps(vector.y), .z = ps(vector.z) }; + pub inline fn initArrayOfVectors(vectors: [8]vm.Vector3) Vector3x8 { + const vector: @Vector(24, f32) = @as([24]f32, @bitCast(vectors)); + return .{ + .x = @shuffle(f32, vector, undefined, [_]i32{ 0, 3, 6, 9, 12, 15, 18, 21 }), + .y = @shuffle(f32, vector, undefined, [_]i32{ 1, 4, 7, 10, 13, 16, 19, 22 }), + .z = @shuffle(f32, vector, undefined, [_]i32{ 2, 5, 8, 11, 14, 17, 20, 23 }), + }; } - pub inline fn initArray(array: Array) Vector3x8 { - const x: f32x8 = array[0..8].*; - const y: f32x8 = array[8..16].*; - const z: f32x8 = array[16..24].*; - return .{ .x = x, .y = y, .z = z }; + pub inline fn splat(vector: vm.Vector3) Vector3x8 { + return .{ .x = vm.ps(vector.x), .y = vm.ps(vector.y), .z = vm.ps(vector.z) }; } - pub inline fn initArrayTranspose(array: Array) Vector3x8 { - const vector: @Vector(24, f32) = array; - const x: f32x8 = @shuffle(f32, vector, undefined, [_]i32{ 0, 3, 6, 9, 12, 15, 18, 21 }); - const y: f32x8 = @shuffle(f32, vector, undefined, [_]i32{ 1, 4, 7, 10, 13, 16, 19, 22 }); - const z: f32x8 = @shuffle(f32, vector, undefined, [_]i32{ 2, 5, 8, 11, 14, 17, 20, 23 }); - return .{ .x = x, .y = y, .z = z }; - } + // --- CONVERSION ---------------------------------------------------------- - pub inline fn initArrayOfVectors(vectors: [8]Vector3) Vector3x8 { - return initArrayTranspose(@bitCast(vectors)); - } - - // --- CONVERSION --- - - pub inline fn asArray(self: Vector3x8) Array { - const x: [8]f32 = self.x; - const y: [8]f32 = self.y; - const z: [8]f32 = self.z; - return x ++ y ++ z; - } - - pub inline fn asArrayTranspose(self: Vector3x8) Array { - const vector: @Vector(24, f32) = self.asArray(); - const transposed: @Vector(24, f32) = @shuffle(f32, vector, undefined, [_]i32{ + pub inline fn asArrayOfVectors(self: Vector3x8) [8]vm.Vector3 { + const vector: @Vector(24, f32) = self.x ++ self.y ++ self.z; + return @bitCast(@as([16]f32, @shuffle(f32, vector, undefined, [_]i32{ 0, 8, 16, 1, 9, 17, 2, 10, 18, @@ -78,46 +59,25 @@ pub const Vector3x8 = struct { 5, 13, 21, 6, 14, 22, 7, 15, 23, - }); - return transposed; + }))); } - pub inline fn asArrayOfVectors(self: Vector3x8) [8]Vector3 { - return @bitCast(self.asArrayTranspose()); - } - - pub inline fn unpack(self: Vector3x8) [3]f32x8 { + pub inline fn unpack(self: Vector3x8) [3]vm.f32x8 { return .{ self.x, self.y, self.z }; } - // --- LOAD AND STORE --- + // --- LOAD AND STORE ------------------------------------------------------ - pub inline fn loadArray(self: *Vector3x8, array: *const Array) void { - self.x = array[0..8].*; - self.y = array[8..16].*; - self.z = array[16..24].*; - } - - pub inline fn loadArrayTranspose(self: *Vector3x8, array: *const Array) void { - const vector: @Vector(24, f32) = array; + pub inline fn loadArrayTranspose(self: *Vector3x8, array: *const [8]vm.Vector3) void { + const vector: @Vector(24, f32) = @as(*const [24]f32, @ptrCast(array)).*; self.x = @shuffle(f32, vector, undefined, [_]i32{ 0, 3, 6, 9, 12, 15, 18, 21 }); self.y = @shuffle(f32, vector, undefined, [_]i32{ 1, 4, 7, 10, 13, 16, 19, 22 }); self.z = @shuffle(f32, vector, undefined, [_]i32{ 2, 5, 8, 11, 14, 17, 20, 23 }); } - pub inline fn loadArrayOfVectors(self: *Vector3x8, vectors: *const [8]Vector3) void { - self.loadArrayTranspose(@ptrCast(vectors)); - } - - pub inline fn storeArray(self: *const Vector3x8, array: *Array) void { - array[0..8].* = self.x; - array[8..16].* = self.y; - array[16..24].* = self.z; - } - - pub inline fn storeArrayTranspose(self: *const Vector3x8, array: *Array) void { - const vector: @Vector(24, f32) = self.asArray(); - const transposed: @Vector(24, f32) = @shuffle(f32, vector, undefined, [_]i32{ + pub inline fn storeArrayOfVectors(self: *const Vector3x8, array: *[8]vm.Vector3) void { + const vector: @Vector(24, f32) = self.x ++ self.y ++ self.z; + @as(*[24]f32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ 0, 8, 16, 1, 9, 17, 2, 10, 18, @@ -127,14 +87,9 @@ pub const Vector3x8 = struct { 6, 14, 22, 7, 15, 23, }); - array.* = transposed; } - pub inline fn storeArrayOfVectors(self: *const Vector3x8, vectors: *[8]Vector3) void { - self.storeArrayTranspose(@ptrCast(vectors)); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector3x8, other: Vector3x8) Vector3x8 { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z }; @@ -148,24 +103,24 @@ pub const Vector3x8 = struct { return .{ .x = self.x * other.x, .y = self.y * other.y, .z = self.z * other.z }; } - pub inline fn mulScalar(self: Vector3x8, scalar: f32x8) Vector3x8 { + pub inline fn mulScalar(self: Vector3x8, scalar: vm.f32x8) Vector3x8 { return .{ .x = self.x * scalar, .y = self.y * scalar, .z = self.z * scalar }; } pub inline fn mulScalarSingle(self: Vector3x8, scalar: f32) Vector3x8 { - return .{ .x = self.x * ps(scalar), .y = self.y * ps(scalar), .z = self.z * ps(scalar) }; + return .{ .x = self.x * vm.ps(scalar), .y = self.y * vm.ps(scalar), .z = self.z * vm.ps(scalar) }; } pub inline fn div(self: Vector3x8, other: Vector3x8) Vector3x8 { return .{ .x = self.x / other.x, .y = self.y / other.y, .z = self.z / other.z }; } - pub inline fn divScalar(self: Vector3x8, scalar: f32x8) Vector3x8 { + pub inline fn divScalar(self: Vector3x8, scalar: vm.f32x8) Vector3x8 { return .{ .x = self.x / scalar, .y = self.y / scalar, .z = self.z / scalar }; } pub inline fn divScalarSingle(self: Vector3x8, scalar: f32) Vector3x8 { - return .{ .x = self.x / ps(scalar), .y = self.y / ps(scalar), .z = self.z / ps(scalar) }; + return .{ .x = self.x / vm.ps(scalar), .y = self.y / vm.ps(scalar), .z = self.z / vm.ps(scalar) }; } pub inline fn negate(self: Vector3x8) Vector3x8 { @@ -196,17 +151,17 @@ pub const Vector3x8 = struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- - pub inline fn len(self: Vector3x8) f32x8 { + pub inline fn len(self: Vector3x8) vm.f32x8 { return @sqrt(self.x * self.x + self.y * self.y + self.z * self.z); } - pub inline fn lenSquared(self: Vector3x8) f32x8 { + pub inline fn lenSquared(self: Vector3x8) vm.f32x8 { return self.x * self.x + self.y * self.y + self.z * self.z; } - pub inline fn dot(self: Vector3x8, other: Vector3x8) f32x8 { + pub inline fn dot(self: Vector3x8, other: Vector3x8) vm.f32x8 { return self.x * other.x + self.y * other.y + self.z * other.z; } @@ -218,21 +173,45 @@ pub const Vector3x8 = struct { }; } - pub inline fn lerp(a: Vector3x8, b: Vector3x8, t: f32x8) Vector3x8 { + pub inline fn lerp(a: Vector3x8, b: Vector3x8, t: vm.f32x8) Vector3x8 { return .{ - .x = @mulAdd(f32x8, t, b.x, @mulAdd(f32x8, -t, a.x, a.x)), - .y = @mulAdd(f32x8, t, b.y, @mulAdd(f32x8, -t, a.y, a.y)), - .z = @mulAdd(f32x8, t, b.z, @mulAdd(f32x8, -t, a.z, a.z)), + .x = @mulAdd(vm.f32x8, t, b.x, @mulAdd(vm.f32x8, -t, a.x, a.x)), + .y = @mulAdd(vm.f32x8, t, b.y, @mulAdd(vm.f32x8, -t, a.y, a.y)), + .z = @mulAdd(vm.f32x8, t, b.z, @mulAdd(vm.f32x8, -t, a.z, a.z)), }; } - pub inline fn rotate(self: Vector3x8, quaternion: Quaternion_x8) Vector3x8 { - const quaternion_scalar = quaternion.getScalar(); - const quaternion_vector = quaternion.getVector(); + pub inline fn lerpSingle(a: Vector3x8, b: Vector3x8, t: f32) Vector3x8 { + return .{ + .x = @mulAdd(vm.f32x8, vm.ps(t), b.x, @mulAdd(vm.f32x8, -vm.ps(t), a.x, a.x)), + .y = @mulAdd(vm.f32x8, vm.ps(t), b.y, @mulAdd(vm.f32x8, -vm.ps(t), a.y, a.y)), + .z = @mulAdd(vm.f32x8, vm.ps(t), b.z, @mulAdd(vm.f32x8, -vm.ps(t), a.z, a.z)), + }; + } - return .add(self, .cross( - .add(quaternion_vector, quaternion_vector), - .add(.cross(quaternion_vector, self), .mulScalar(self, quaternion_scalar)), - )); + pub inline fn rotate(self: Vector3x8, quaternion: vm.Quaternion_x8) Vector3x8 { + const w = quaternion.getScalar(); + const xyz = quaternion.getVector(); + + return add( + self, + cross( + add(xyz, xyz), + add(cross(xyz, self), self.mulScalar(w)), + ), + ); + } + + pub inline fn rotateSingle(self: Vector3x8, quaternion: vm.Quaternion) Vector3x8 { + const w = quaternion.getScalar(); + const xyz = quaternion.getVector(); + + return add( + self, + cross( + splat(add(xyz, xyz)), + add(cross(splat(xyz), self), self.mulScalarSingle(w)), + ), + ); } }; diff --git a/packages/vecmath/src/vectors/Vector4.zig b/packages/vecmath/src/vectors/Vector4.zig index f3506b3..96f0e95 100644 --- a/packages/vecmath/src/vectors/Vector4.zig +++ b/packages/vecmath/src/vectors/Vector4.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector4 = extern struct { x: f32, @@ -20,7 +20,7 @@ pub const Vector4 = extern struct { pub const unit_nz = init(0, 0, -1, 0); pub const unit_nw = init(0, 0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: f32, y: f32, z: f32, w: f32) Vector4 { return .{ .x = x, .y = y, .z = z, .w = w }; @@ -34,21 +34,13 @@ pub const Vector4 = extern struct { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Vector4) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Vector4) *Array { - return @ptrCast(self); - } - - pub inline fn asArrayConstPtr(self: *const Vector4) *const Array { - return @ptrCast(self); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector4, other: Vector4) Vector4 { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z, .w = self.w + other.w }; @@ -102,7 +94,7 @@ pub const Vector4 = extern struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z), .w = @max(self.w, other.w) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- pub inline fn len(self: Vector4) f32 { return @sqrt(self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w); diff --git a/packages/vecmath/src/vectors/Vector4Int.zig b/packages/vecmath/src/vectors/Vector4Int.zig index 6a2f86d..2faad11 100644 --- a/packages/vecmath/src/vectors/Vector4Int.zig +++ b/packages/vecmath/src/vectors/Vector4Int.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector4Int = extern struct { x: i32, @@ -20,7 +20,7 @@ pub const Vector4Int = extern struct { pub const unit_nz = init(0, 0, -1, 0); pub const unit_nw = init(0, 0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- pub inline fn init(x: i32, y: i32, z: i32, w: i32) Vector4Int { return .{ .x = x, .y = y, .z = z, .w = w }; @@ -34,21 +34,13 @@ pub const Vector4Int = extern struct { return @bitCast(array); } - // --- CONVERSION --- + // --- CONVERSION ---------------------------------------------------------- pub inline fn asArray(self: Vector4Int) Array { return @bitCast(self); } - pub inline fn asArrayPtr(self: *Vector4Int) *Array { - return @ptrCast(self); - } - - pub inline fn asArrayConstPtr(self: *const Vector4Int) *const Array { - return @ptrCast(self); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector4Int, other: Vector4Int) Vector4Int { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z, .w = self.w + other.w }; @@ -98,7 +90,7 @@ pub const Vector4Int = extern struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z), .w = @max(self.w, other.w) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- pub inline fn lenSquared(self: Vector4Int) i32 { return self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w; diff --git a/packages/vecmath/src/vectors/Vector4Int_x8.zig b/packages/vecmath/src/vectors/Vector4Int_x8.zig index 135a478..e1a58b0 100644 --- a/packages/vecmath/src/vectors/Vector4Int_x8.zig +++ b/packages/vecmath/src/vectors/Vector4Int_x8.zig @@ -1,13 +1,11 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector4Int_x8 = struct { - x: i32x8, - y: i32x8, - z: i32x8, - w: i32x8, - - pub const Array = [32]i32; + x: vm.i32x8, + y: vm.i32x8, + z: vm.i32x8, + w: vm.i32x8, pub const zero = initScalarSingle(0); pub const one = initScalarSingle(1); @@ -20,62 +18,43 @@ pub const Vector4Int_x8 = struct { pub const unit_nz = initSingle(0, 0, -1, 0); pub const unit_nw = initSingle(0, 0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(x: i32x8, y: i32x8, z: i32x8, w: i32x8) Vector4Int_x8 { + pub inline fn init(x: vm.i32x8, y: vm.i32x8, z: vm.i32x8, w: vm.i32x8) Vector4Int_x8 { return .{ .x = x, .y = y, .z = z, .w = w }; } pub inline fn initSingle(x: i32, y: i32, z: i32, w: i32) Vector4Int_x8 { - return .{ .x = epi32(x), .y = epi32(y), .z = epi32(z), .w = epi32(w) }; + return .{ .x = vm.epi32(x), .y = vm.epi32(y), .z = vm.epi32(z), .w = vm.epi32(w) }; } - pub inline fn initScalar(scalar: i32x8) Vector4Int_x8 { + pub inline fn initScalar(scalar: vm.i32x8) Vector4Int_x8 { return .{ .x = scalar, .y = scalar, .z = scalar, .w = scalar }; } pub inline fn initScalarSingle(scalar: i32) Vector4Int_x8 { - return .{ .x = epi32(scalar), .y = epi32(scalar), .z = epi32(scalar), .w = epi32(scalar) }; + return .{ .x = vm.epi32(scalar), .y = vm.epi32(scalar), .z = vm.epi32(scalar), .w = vm.epi32(scalar) }; } - pub inline fn initSplat(vector: Vector4Int) Vector4Int_x8 { - return .{ .x = epi32(vector.x), .y = epi32(vector.y), .z = epi32(vector.z), .w = epi32(vector.w) }; + pub inline fn initArrayOfVectors(vectors: [8]vm.Vector4Int) Vector4Int_x8 { + const vector: @Vector(32, i32) = @as([32]i32, @bitCast(vectors)); + return .{ + .x = @shuffle(i32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }), + .y = @shuffle(i32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }), + .z = @shuffle(i32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }), + .w = @shuffle(i32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }), + }; } - pub inline fn initArray(array: Array) Vector4Int_x8 { - const x: i32x8 = array[0..8].*; - const y: i32x8 = array[8..16].*; - const z: i32x8 = array[16..24].*; - const w: i32x8 = array[24..32].*; - return .{ .x = x, .y = y, .z = z, .w = w }; + pub inline fn initSplat(vector: vm.Vector4Int) Vector4Int_x8 { + return .{ .x = vm.epi32(vector.x), .y = vm.epi32(vector.y), .z = vm.epi32(vector.z), .w = vm.epi32(vector.w) }; } - pub inline fn initArrayTranspose(array: Array) Vector4Int_x8 { - const vector: @Vector(32, i32) = array; - const x: i32x8 = @shuffle(i32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }); - const y: i32x8 = @shuffle(i32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }); - const z: i32x8 = @shuffle(i32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }); - const w: i32x8 = @shuffle(i32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }); - return .{ .x = x, .y = y, .z = z, .w = w }; - } + // --- CONVERSION ---------------------------------------------------------- - pub inline fn initArrayOfVectors(vectors: [8]Vector4Int) Vector4Int_x8 { - return initArrayTranspose(@bitCast(vectors)); - } - - // --- CONVERSION --- - - pub inline fn asArray(self: Vector4Int_x8) Array { - const x: [8]i32 = self.x; - const y: [8]i32 = self.y; - const z: [8]i32 = self.z; - const w: [8]i32 = self.w; - return x ++ y ++ z ++ w; - } - - pub inline fn asArrayTranspose(self: Vector4Int_x8) Array { - const vector: @Vector(32, i32) = self.asArray(); - const transposed: @Vector(32, i32) = @shuffle(i32, vector, undefined, [_]i32{ + pub inline fn asArrayOfVectors(self: Vector4Int_x8) [8]vm.Vector4 { + const vector: @Vector(32, i32) = self.x ++ self.y ++ self.z ++ self.w; + return @bitCast(@as([32]i32, @shuffle(i32, vector, undefined, [_]i32{ 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, @@ -84,49 +63,26 @@ pub const Vector4Int_x8 = struct { 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, - }); - return transposed; + }))); } - pub inline fn asArrayOfVectors(self: Vector4Int_x8) [8]Vector4Int { - return @bitCast(self.asArrayTranspose()); - } - - pub inline fn unpack(self: Vector4Int_x8) [4]i32x8 { + pub inline fn unpack(self: Vector4Int_x8) [4]vm.i32x8 { return .{ self.x, self.y, self.z, self.w }; } - // --- LOAD AND STORE --- + // --- LOAD AND STORE ------------------------------------------------------ - pub inline fn loadArray(self: *Vector4Int_x8, array: *const Array) void { - self.x = array[0..8].*; - self.y = array[8..16].*; - self.z = array[16..24].*; - self.w = array[24..32].*; - } - - pub inline fn loadArrayTranspose(self: *Vector4Int_x8, array: *const Array) void { - const vector: @Vector(24, i32) = array; + pub inline fn loadArrayOfVectors(self: *Vector4Int_x8, array: *const [8]vm.Vector4Int) void { + const vector: @Vector(32, i32) = @as(*const [32]i32, @ptrCast(array)).*; self.x = @shuffle(i32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }); self.y = @shuffle(i32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }); self.z = @shuffle(i32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }); self.w = @shuffle(i32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }); } - pub inline fn loadArrayOfVectors(self: *Vector4Int_x8, vectors: *const [8]Vector4Int) void { - self.loadArrayTranspose(@ptrCast(vectors)); - } - - pub inline fn storeArray(self: *const Vector4Int_x8, array: *Array) void { - array[0..8].* = self.x; - array[8..16].* = self.y; - array[16..24].* = self.z; - array[24..32].* = self.w; - } - - pub inline fn storeArrayTranspose(self: *const Vector4Int_x8, array: *Array) void { - const vector: @Vector(32, i32) = self.asArray(); - const transposed: @Vector(32, i32) = @shuffle(i32, vector, undefined, [_]i32{ + pub inline fn storeArrayOfVectors(self: *const Vector4Int_x8, array: *[8]vm.Vector4Int) void { + const vector: @Vector(32, i32) = self.x ++ self.y ++ self.z ++ self.w; + @as(*[32]i32, @ptrCast(array)).* = @shuffle(i32, vector, undefined, [_]i32{ 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, @@ -136,14 +92,9 @@ pub const Vector4Int_x8 = struct { 6, 14, 22, 30, 7, 15, 23, 31, }); - array.* = transposed; } - pub inline fn storeArrayOfVectors(self: *const Vector4Int_x8, vectors: *[8]Vector4Int) void { - self.storeArrayTranspose(@ptrCast(vectors)); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector4Int_x8, other: Vector4Int_x8) Vector4Int_x8 { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z, .w = self.w + other.w }; @@ -157,36 +108,36 @@ pub const Vector4Int_x8 = struct { return .{ .x = self.x * other.x, .y = self.y * other.y, .z = self.z * other.z, .w = self.w * other.w }; } - pub inline fn mulScalar(self: Vector4Int_x8, scalar: i32x8) Vector4Int_x8 { + pub inline fn mulScalar(self: Vector4Int_x8, scalar: vm.i32x8) Vector4Int_x8 { return .{ .x = self.x * scalar, .y = self.y * scalar, .z = self.z * scalar, .w = self.w * scalar }; } pub inline fn mulScalarSingle(self: Vector4Int_x8, scalar: i32) Vector4Int_x8 { - return .{ .x = self.x * epi32(scalar), .y = self.y * epi32(scalar), .z = self.z * epi32(scalar), .w = self.w * epi32(scalar) }; + return .{ .x = self.x * vm.epi32(scalar), .y = self.y * vm.epi32(scalar), .z = self.z * vm.epi32(scalar), .w = self.w * vm.epi32(scalar) }; } pub inline fn div(self: Vector4Int_x8, other: Vector4Int_x8) Vector4Int_x8 { return .{ .x = @divFloor(self.x, other.x), .y = @divFloor(self.y, other.y), .z = @divFloor(self.z, other.z), .w = @divFloor(self.w, other.w) }; } - pub inline fn divScalar(self: Vector4Int_x8, scalar: i32x8) Vector4Int_x8 { + pub inline fn divScalar(self: Vector4Int_x8, scalar: vm.i32x8) Vector4Int_x8 { return .{ .x = @divFloor(self.x, scalar), .y = @divFloor(self.y, scalar), .z = @divFloor(self.z, scalar), .w = @divFloor(self.w, scalar) }; } pub inline fn divScalarSingle(self: Vector4Int_x8, scalar: i32) Vector4Int_x8 { - return .{ .x = @divFloor(self.x, epi32(scalar)), .y = @divFloor(self.y, epi32(scalar)), .z = @divFloor(self.z, epi32(scalar)), .w = @divFloor(self.w, epi32(scalar)) }; + return .{ .x = @divFloor(self.x, vm.epi32(scalar)), .y = @divFloor(self.y, vm.epi32(scalar)), .z = @divFloor(self.z, vm.epi32(scalar)), .w = @divFloor(self.w, vm.epi32(scalar)) }; } pub inline fn mod(self: Vector4Int_x8, other: Vector4Int_x8) Vector4Int_x8 { return .{ .x = @mod(self.x, other.x), .y = @mod(self.y, other.y), .z = @mod(self.z, other.z), .w = @mod(self.w, other.w) }; } - pub inline fn modScalar(self: Vector4Int_x8, scalar: i32x8) Vector4Int_x8 { + pub inline fn modScalar(self: Vector4Int_x8, scalar: vm.i32x8) Vector4Int_x8 { return .{ .x = @mod(self.x, scalar), .y = @mod(self.y, scalar), .z = @mod(self.z, scalar), .w = @mod(self.w, scalar) }; } pub inline fn modScalarSingle(self: Vector4Int_x8, scalar: i32) Vector4Int_x8 { - return .{ .x = @mod(self.x, epi32(scalar)), .y = @mod(self.y, epi32(scalar)), .z = @mod(self.z, epi32(scalar)), .w = @mod(self.w, epi32(scalar)) }; + return .{ .x = @mod(self.x, vm.epi32(scalar)), .y = @mod(self.y, vm.epi32(scalar)), .z = @mod(self.z, vm.epi32(scalar)), .w = @mod(self.w, vm.epi32(scalar)) }; } pub inline fn negate(self: Vector4Int_x8) Vector4Int_x8 { @@ -205,13 +156,13 @@ pub const Vector4Int_x8 = struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z), .w = @max(self.w, other.w) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- - pub inline fn lenSquared(self: Vector4Int_x8) i32x8 { + pub inline fn lenSquared(self: Vector4Int_x8) vm.i32x8 { return self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w; } - pub inline fn dot(self: Vector4Int_x8, other: Vector4Int_x8) i32x8 { + pub inline fn dot(self: Vector4Int_x8, other: Vector4Int_x8) vm.i32x8 { return self.x * other.x + self.y * other.y + self.z * other.z + self.w * other.w; } }; diff --git a/packages/vecmath/src/vectors/Vector4x8.zig b/packages/vecmath/src/vectors/Vector4x8.zig index 301827b..2f889b5 100644 --- a/packages/vecmath/src/vectors/Vector4x8.zig +++ b/packages/vecmath/src/vectors/Vector4x8.zig @@ -1,13 +1,11 @@ const std = @import("std"); -const vm = @import("root"); +const vm = @import("../root.zig"); pub const Vector4x8 = struct { - x: f32x8, - y: f32x8, - z: f32x8, - w: f32x8, - - pub const Array = [32]f32; + x: vm.f32x8, + y: vm.f32x8, + z: vm.f32x8, + w: vm.f32x8, pub const zero = initScalarSingle(0); pub const one = initScalarSingle(1); @@ -20,62 +18,43 @@ pub const Vector4x8 = struct { pub const unit_nz = initSingle(0, 0, -1, 0); pub const unit_nw = initSingle(0, 0, 0, -1); - // --- INIT ---- + // --- INIT ---------------------------------------------------------------- - pub inline fn init(x: f32x8, y: f32x8, z: f32x8, w: f32x8) Vector4x8 { + pub inline fn init(x: vm.f32x8, y: vm.f32x8, z: vm.f32x8, w: vm.f32x8) Vector4x8 { return .{ .x = x, .y = y, .z = z, .w = w }; } pub inline fn initSingle(x: f32, y: f32, z: f32, w: f32) Vector4x8 { - return .{ .x = ps(x), .y = ps(y), .z = ps(z), .w = ps(w) }; + return .{ .x = vm.ps(x), .y = vm.ps(y), .z = vm.ps(z), .w = vm.ps(w) }; } - pub inline fn initScalar(scalar: f32x8) Vector4x8 { + pub inline fn initScalar(scalar: vm.f32x8) Vector4x8 { return .{ .x = scalar, .y = scalar, .z = scalar, .w = scalar }; } pub inline fn initScalarSingle(scalar: f32) Vector4x8 { - return .{ .x = ps(scalar), .y = ps(scalar), .z = ps(scalar), .w = ps(scalar) }; + return .{ .x = vm.ps(scalar), .y = vm.ps(scalar), .z = vm.ps(scalar), .w = vm.ps(scalar) }; } - pub inline fn initSplat(vector: Vector4) Vector4x8 { - return .{ .x = ps(vector.x), .y = ps(vector.y), .z = ps(vector.z), .w = ps(vector.w) }; + pub inline fn initArrayOfVectors(vectors: [8]vm.Vector4) Vector4x8 { + const vector: @Vector(32, f32) = @as([32]f32, @bitCast(vectors)); + return .{ + .x = @shuffle(f32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }), + .y = @shuffle(f32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }), + .z = @shuffle(f32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }), + .w = @shuffle(f32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }), + }; } - pub inline fn initArray(array: Array) Vector4x8 { - const x: f32x8 = array[0..8].*; - const y: f32x8 = array[8..16].*; - const z: f32x8 = array[16..24].*; - const w: f32x8 = array[24..32].*; - return .{ .x = x, .y = y, .z = z, .w = w }; + pub inline fn splat(vector: vm.Vector4) Vector4x8 { + return .{ .x = vm.ps(vector.x), .y = vm.ps(vector.y), .z = vm.ps(vector.z), .w = vm.ps(vector.w) }; } - pub inline fn initArrayTranspose(array: Array) Vector4x8 { - const vector: @Vector(32, f32) = array; - const x: f32x8 = @shuffle(f32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }); - const y: f32x8 = @shuffle(f32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }); - const z: f32x8 = @shuffle(f32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }); - const w: f32x8 = @shuffle(f32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }); - return .{ .x = x, .y = y, .z = z, .w = w }; - } + // --- CONVERSION ---------------------------------------------------------- - pub inline fn initArrayOfVectors(vectors: [8]Vector4) Vector4x8 { - return initArrayTranspose(@bitCast(vectors)); - } - - // --- CONVERSION --- - - pub inline fn asArray(self: Vector4x8) Array { - const x: [8]f32 = self.x; - const y: [8]f32 = self.y; - const z: [8]f32 = self.z; - const w: [8]f32 = self.w; - return x ++ y ++ z ++ w; - } - - pub inline fn asArrayTranspose(self: Vector4x8) Array { - const vector: @Vector(32, f32) = self.asArray(); - const transposed: @Vector(32, f32) = @shuffle(f32, vector, undefined, [_]i32{ + pub inline fn asArrayOfVectors(self: Vector4x8) [8]vm.Vector4 { + const vector: @Vector(32, f32) = self.x ++ self.y ++ self.z ++ self.w; + return @bitCast(@as([32]f32, @shuffle(f32, vector, undefined, [_]i32{ 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, @@ -84,49 +63,26 @@ pub const Vector4x8 = struct { 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, - }); - return transposed; + }))); } - pub inline fn asArrayOfVectors(self: Vector4x8) [8]Vector4 { - return @bitCast(self.asArrayTranspose()); - } - - pub inline fn unpack(self: Vector4x8) [4]f32x8 { + pub inline fn unpack(self: Vector4x8) [4]vm.f32x8 { return .{ self.x, self.y, self.z, self.w }; } - // --- LOAD AND STORE --- + // --- LOAD AND STORE ------------------------------------------------------ - pub inline fn loadArray(self: *Vector4x8, array: *const Array) void { - self.x = array[0..8].*; - self.y = array[8..16].*; - self.z = array[16..24].*; - self.w = array[24..32].*; - } - - pub inline fn loadArrayTranspose(self: *Vector4x8, array: *const Array) void { - const vector: @Vector(32, f32) = array; + pub inline fn loadArrayOfVectors(self: *Vector4x8, array: *const [8]vm.Vector4) void { + const vector: @Vector(32, f32) = @as(*const [32]f32, @ptrCast(array)).*; self.x = @shuffle(f32, vector, undefined, [_]i32{ 0, 4, 8, 12, 16, 20, 24, 28 }); self.y = @shuffle(f32, vector, undefined, [_]i32{ 1, 5, 9, 13, 17, 21, 25, 29 }); self.z = @shuffle(f32, vector, undefined, [_]i32{ 2, 6, 10, 14, 18, 22, 26, 30 }); self.w = @shuffle(f32, vector, undefined, [_]i32{ 3, 7, 11, 15, 19, 23, 27, 31 }); } - pub inline fn loadArrayOfVectors(self: *Vector4x8, vectors: *const [8]Vector4) void { - self.loadArrayTranspose(@ptrCast(vectors)); - } - - pub inline fn storeArray(self: *const Vector4x8, array: *Array) void { - array[0..8].* = self.x; - array[8..16].* = self.y; - array[16..24].* = self.z; - array[24..32].* = self.w; - } - - pub inline fn storeArrayTranspose(self: *const Vector4x8, array: *Array) void { - const vector: @Vector(32, f32) = self.asArray(); - const transposed: @Vector(32, f32) = @shuffle(f32, vector, undefined, [_]i32{ + pub inline fn storeArrayOfVectors(self: *const Vector4x8, array: *[8]vm.Vector4) void { + const vector: @Vector(32, f32) = self.x ++ self.y ++ self.z ++ self.w; + @as(*[32]f32, @ptrCast(array)).* = @shuffle(f32, vector, undefined, [_]i32{ 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, @@ -136,14 +92,9 @@ pub const Vector4x8 = struct { 6, 14, 22, 30, 7, 15, 23, 31, }); - array.* = transposed; } - pub inline fn storeArrayOfVectors(self: *const Vector4x8, vectors: *[8]Vector4) void { - self.storeArrayTranspose(@ptrCast(vectors)); - } - - // --- COMPONENT-WISE --- + // --- COMPONENT-WISE ------------------------------------------------------ pub inline fn add(self: Vector4x8, other: Vector4x8) Vector4x8 { return .{ .x = self.x + other.x, .y = self.y + other.y, .z = self.z + other.z, .w = self.w + other.w }; @@ -157,24 +108,24 @@ pub const Vector4x8 = struct { return .{ .x = self.x * other.x, .y = self.y * other.y, .z = self.z * other.z, .w = self.w * other.w }; } - pub inline fn mulScalar(self: Vector4x8, scalar: f32x8) Vector4x8 { + pub inline fn mulScalar(self: Vector4x8, scalar: vm.f32x8) Vector4x8 { return .{ .x = self.x * scalar, .y = self.y * scalar, .z = self.z * scalar, .w = self.w * scalar }; } pub inline fn mulScalarSingle(self: Vector4x8, scalar: f32) Vector4x8 { - return .{ .x = self.x * ps(scalar), .y = self.y * ps(scalar), .z = self.z * ps(scalar), .w = self.w * ps(scalar) }; + return .{ .x = self.x * vm.ps(scalar), .y = self.y * vm.ps(scalar), .z = self.z * vm.ps(scalar), .w = self.w * vm.ps(scalar) }; } pub inline fn div(self: Vector4x8, other: Vector4x8) Vector4x8 { return .{ .x = self.x / other.x, .y = self.y / other.y, .z = self.z / other.z, .w = self.w / other.w }; } - pub inline fn divScalar(self: Vector4x8, scalar: f32x8) Vector4x8 { + pub inline fn divScalar(self: Vector4x8, scalar: vm.f32x8) Vector4x8 { return .{ .x = self.x / scalar, .y = self.y / scalar, .z = self.z / scalar, .w = self.w / scalar }; } pub inline fn divScalarSingle(self: Vector4x8, scalar: f32) Vector4x8 { - return .{ .x = self.x / ps(scalar), .y = self.y / ps(scalar), .z = self.z / ps(scalar), .w = self.w / ps(scalar) }; + return .{ .x = self.x / vm.ps(scalar), .y = self.y / vm.ps(scalar), .z = self.z / vm.ps(scalar), .w = self.w / vm.ps(scalar) }; } pub inline fn negate(self: Vector4x8) Vector4x8 { @@ -205,26 +156,35 @@ pub const Vector4x8 = struct { return .{ .x = @max(self.x, other.x), .y = @max(self.y, other.y), .z = @max(self.z, other.z), .w = @max(self.w, other.w) }; } - // --- OTHER --- + // --- OTHER --------------------------------------------------------------- - pub inline fn len(self: Vector4x8) f32x8 { + pub inline fn len(self: Vector4x8) vm.f32x8 { return @sqrt(self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w); } - pub inline fn lenSquared(self: Vector4x8) f32x8 { + pub inline fn lenSquared(self: Vector4x8) vm.f32x8 { return self.x * self.x + self.y * self.y + self.z * self.z + self.w * self.w; } - pub inline fn dot(self: Vector4x8, other: Vector4x8) f32x8 { + pub inline fn dot(self: Vector4x8, other: Vector4x8) vm.f32x8 { return self.x * other.x + self.y * other.y + self.z * other.z + self.w * other.w; } - pub inline fn lerp(a: Vector4x8, b: Vector4x8, t: f32x8) Vector4x8 { + pub inline fn lerp(a: Vector4x8, b: Vector4x8, t: vm.f32x8) Vector4x8 { return .{ - .x = @mulAdd(f32x8, t, b.x, @mulAdd(f32x8, -t, a.x, a.x)), - .y = @mulAdd(f32x8, t, b.y, @mulAdd(f32x8, -t, a.y, a.y)), - .z = @mulAdd(f32x8, t, b.z, @mulAdd(f32x8, -t, a.z, a.z)), - .w = @mulAdd(f32x8, t, b.w, @mulAdd(f32x8, -t, a.w, a.w)), + .x = @mulAdd(vm.f32x8, t, b.x, @mulAdd(vm.f32x8, -t, a.x, a.x)), + .y = @mulAdd(vm.f32x8, t, b.y, @mulAdd(vm.f32x8, -t, a.y, a.y)), + .z = @mulAdd(vm.f32x8, t, b.z, @mulAdd(vm.f32x8, -t, a.z, a.z)), + .w = @mulAdd(vm.f32x8, t, b.w, @mulAdd(vm.f32x8, -t, a.w, a.w)), + }; + } + + pub inline fn lerpSingle(a: Vector4x8, b: Vector4x8, t: f32) Vector4x8 { + return .{ + .x = @mulAdd(vm.f32x8, vm.ps(t), b.x, @mulAdd(vm.f32x8, -vm.ps(t), a.x, a.x)), + .y = @mulAdd(vm.f32x8, vm.ps(t), b.y, @mulAdd(vm.f32x8, -vm.ps(t), a.y, a.y)), + .z = @mulAdd(vm.f32x8, vm.ps(t), b.z, @mulAdd(vm.f32x8, -vm.ps(t), a.z, a.z)), + .w = @mulAdd(vm.f32x8, vm.ps(t), b.w, @mulAdd(vm.f32x8, -vm.ps(t), a.w, a.w)), }; } };