From faddb1f35ebcd6ede6b411310c78c82135c8bdab Mon Sep 17 00:00:00 2001 From: Szymon Nowakowski Date: Tue, 2 Dec 2025 01:57:21 +0100 Subject: [PATCH] Vectorized noise (but was it a bottleneck?) --- src/Game.zig | 49 +++++++----- src/math.zig | 23 ++++++ src/math/Vector2.zig | 2 +- src/math/Vector2x8.zig | 165 +++++++++++++++++++++++++++++++++++++++++ src/math/Vector3.zig | 2 +- src/math/Vector4.zig | 2 +- src/math/noise.zig | 48 ++++++------ src/worldgen.zig | 20 +++++ 8 files changed, 262 insertions(+), 49 deletions(-) create mode 100644 src/math/Vector2x8.zig diff --git a/src/Game.zig b/src/Game.zig index 87689b2..a5cbd01 100644 --- a/src/Game.zig +++ b/src/Game.zig @@ -21,8 +21,10 @@ const Interator2 = math.Interator2; const Matrix4x4 = math.Matrix4x4; const Quaternion = math.Quaternion; const Vector2 = math.Vector2; +const Vector2x8 = math.Vector2x8; const Vector3 = math.Vector3; const Vector4 = math.Vector4; +const ps = math.ps; const PointLight = extern struct { positionWS: [3]f32, @@ -628,27 +630,36 @@ pub fn init(allocator: std.mem.Allocator, engine: *Engine, swapchain: *Swapchain ); const chunk = chunks.getPtr(chunk_coords3).?; - var it2 = Interator2(usize).init(0, 0, 15, 15); - while (it2.next()) |pos| { - const x, const y = pos; - const fpos = Vector2.init( - @floatFromInt(pos[0]), - @floatFromInt(pos[1]), - ).add(origin.asVector2()); + var y: usize = 0; + while (y < 16) : (y += 1) { + const fpos0 = Vector2x8.initScalars( + .{ 0, 1, 2, 3, 4, 5, 6, 7 }, + @splat(@floatFromInt(y)), + ).add(.initVector(origin.asVector2())); + const fpos1 = Vector2x8.initScalars( + .{ 8, 9, 10, 11, 12, 13, 14, 15 }, + @splat(@floatFromInt(y)), + ).add(.initVector(origin.asVector2())); - const iheight = worldgen.heightI(world_seed, fpos); + const iheight0 = worldgen.heightIv(world_seed, fpos0); + const iheight1 = worldgen.heightIv(world_seed, fpos1); - chunk.blocks[0][y][x] = block_bedrock; - var i: i32 = 0; - while (i < iheight) : (i += 1) { - const iz = i + 1; - const block = if (i + 1 == iheight) - block_grass - else if (i + 4 >= iheight) - block_dirt - else - block_stone; - chunk.blocks[@intCast(iz)][y][x] = block; + const iheightv: [16]i32 = @as([8]i32, iheight0) ++ @as([8]i32, iheight1); + var x: usize = 0; + while (x < 16) : (x += 1) { + chunk.blocks[0][y][x] = block_bedrock; + const iheight = iheightv[x]; + var i: i32 = 0; + while (i < iheight) : (i += 1) { + const iz = i + 1; + const block = if (i + 1 == iheight) + block_grass + else if (i + 4 >= iheight) + block_dirt + else + block_stone; + chunk.blocks[@intCast(iz)][y][x] = block; + } } } } diff --git a/src/math.zig b/src/math.zig index fb77496..d868b6f 100644 --- a/src/math.zig +++ b/src/math.zig @@ -4,12 +4,35 @@ pub const Iterator3 = @import("math/Iterator3.zig"); pub const Matrix4x4 = @import("math/Matrix4x4.zig").Matrix4x4; pub const Quaternion = @import("math/Quaternion.zig").Quaternion; pub const Vector2 = @import("math/Vector2.zig").Vector2; +pub const Vector2x8 = @import("math/Vector2x8.zig").Vector2x8; pub const Vector3 = @import("math/Vector3.zig").Vector3; pub const Vector4 = @import("math/Vector4.zig").Vector4; +pub const f32x8 = @Vector(8, f32); +pub const i32x8 = @Vector(8, i32); +pub const u32x8 = @Vector(8, u32); +pub const u64x8 = @Vector(8, u64); + +pub inline fn ps(value: f32) f32x8 { + return @splat(value); +} + +pub inline fn epi32(value: i32) i32x8 { + return @splat(value); +} + +pub inline fn epu32(value: u32) u32x8 { + return @splat(value); +} + +pub inline fn epu64x2(value: u64) u64x8 { + return @splat(value); +} + pub inline fn lerp(a: f32, b: f32, t: f32) f32 { const s = 1.0 - t; return a * t + b * s; } pub const noise2 = @import("math/noise.zig").noise2; +pub const noise2x8 = @import("math/noise.zig").noise2x8; diff --git a/src/math/Vector2.zig b/src/math/Vector2.zig index dd32912..879add3 100644 --- a/src/math/Vector2.zig +++ b/src/math/Vector2.zig @@ -138,7 +138,7 @@ pub const Vector2 = extern struct { const s = 1.0 - t; const t_vector: Vector = @splat(t); const s_vector: Vector = @splat(s); - return .{ .vector = self * t_vector + other * s_vector }; + return .{ .vector = self * s_vector + other * t_vector }; } pub inline fn rotate(self: Vector2, angle_rad: f32) Vector2 { diff --git a/src/math/Vector2x8.zig b/src/math/Vector2x8.zig new file mode 100644 index 0000000..af929af --- /dev/null +++ b/src/math/Vector2x8.zig @@ -0,0 +1,165 @@ +const std = @import("std"); +const math = @import("../math.zig"); + +const Vector2 = math.Vector2; + +const f32x8 = math.f32x8; + +const ps = math.ps; + +pub const Vector2x8 = extern struct { + x: f32x8, + y: f32x8, + + pub const zero = Vector2x8.init(ps(0), ps(0)); + pub const one = Vector2x8.init(ps(1), ps(1)); + pub const unit_x = Vector2x8.init(ps(1), ps(0)); + pub const unit_y = Vector2x8.init(ps(0), ps(1)); + + // --- INIT --- + + pub inline fn initScalar(x: f32, y: f32) Vector2x8 { + return .{ .x = ps(x), .y = ps(y) }; + } + + pub inline fn initScalars(x: f32x8, y: f32x8) Vector2x8 { + return .{ .x = x, .y = y }; + } + + pub inline fn initVector(vector: Vector2) Vector2x8 { + return .{ .x = ps(vector.getX()), .y = ps(vector.getY()) }; + } + + pub inline fn initVectors(vectors: [8]Vector2) Vector2x8 { + const v0: f32x8 = @as([8]f32, @bitCast(vectors[0..4].*)); + const v1: f32x8 = @as([8]f32, @bitCast(vectors[4..8].*)); + const x: f32x8 = @shuffle(f32, v0, v1, [_]i32{ 0, 2, 4, 6, ~@as(i32, 0), ~@as(i32, 2), ~@as(i32, 4), ~@as(i32, 6) }); + const y: f32x8 = @shuffle(f32, v0, v1, [_]i32{ 1, 3, 5, 7, ~@as(i32, 1), ~@as(i32, 3), ~@as(i32, 5), ~@as(i32, 7) }); + return .{ .x = x, .y = y }; + } + + // --- CONVERSION --- + + pub inline fn asVectors(self: Vector2x8) [8]Vector2 { + const v0: f32x8 = @shuffle(f32, self.x, self.y, [_]i32{ 0, ~@as(i32, 0), 1, ~@as(i32, 1), 2, ~@as(i32, 2), 3, ~@as(i32, 3) }); + const v1: f32x8 = @shuffle(f32, self.x, self.y, [_]i32{ 4, ~@as(i32, 4), 5, ~@as(i32, 5), 6, ~@as(i32, 6), 7, ~@as(i32, 7) }); + return @as([4]Vector2, @bitCast(@as([8]f32, v0))) ++ @as([4]Vector2, @bitCast(@as([8]f32, v1))); + } + + // --- COMPONENT-WISE --- + + pub inline fn add(self: Vector2x8, other: Vector2x8) Vector2x8 { + return .{ + .x = self.x + other.x, + .y = self.y + other.y, + }; + } + + pub inline fn sub(self: Vector2x8, other: Vector2x8) Vector2x8 { + return .{ + .x = self.x - other.x, + .y = self.y - other.y, + }; + } + + pub inline fn mul(self: Vector2x8, other: Vector2x8) Vector2x8 { + return .{ + .x = self.x * other.x, + .y = self.y * other.y, + }; + } + + pub inline fn div(self: Vector2x8, other: Vector2x8) Vector2x8 { + return .{ + .x = self.x / other.x, + .y = self.y / other.y, + }; + } + + pub inline fn negate(self: Vector2x8) Vector2x8 { + return .{ + .x = -self.x, + .y = -self.y, + }; + } + + pub inline fn mulScalar(self: Vector2x8, scalar: f32) Vector2x8 { + return .{ + .x = self.x * ps(scalar), + .y = self.y * ps(scalar), + }; + } + + pub inline fn mulScalars(self: Vector2x8, scalar: f32x8) Vector2x8 { + return .{ + .x = self.x * scalar, + .y = self.y * scalar, + }; + } + + pub inline fn divScalar(self: Vector2x8, scalar: f32) Vector2x8 { + return .{ + .x = self.x / ps(scalar), + .y = self.y / ps(scalar), + }; + } + + pub inline fn divScalars(self: Vector2x8, scalar: f32x8) Vector2x8 { + return .{ + .x = self.x / scalar, + .y = self.y / scalar, + }; + } + + // --- OTHER --- + + pub inline fn len(self: Vector2x8) f32x8 { + return @sqrt(self.x * self.x + self.y * self.y); + } + + pub inline fn lenSquared(self: Vector2x8) f32x8 { + return self.x * self.x + self.y * self.y; + } + + pub inline fn normalize(self: Vector2x8) Vector2x8 { + const len_vector: f32x8 = @sqrt(self.x * self.x + self.y * self.y); + return .{ + .x = self.x / len_vector, + .y = self.y / len_vector, + }; + } + + pub inline fn dot(self: Vector2x8, other: Vector2x8) f32x8 { + return self.x * other.x + self.y * other.y; + } + + pub inline fn cross(self: Vector2x8, other: Vector2x8) f32x8 { + return self.x * other.y - self.y * other.x; + } + + pub inline fn lerp(self: Vector2x8, other: Vector2x8, t: f32x8) Vector2x8 { + const s: f32x8 = ps(1.0) - t; + return .{ + .x = self.x * s + other.x * t, + .y = self.y * s + other.y * t, + }; + } + + pub inline fn rotate(self: Vector2x8, angle_rad: f32) Vector2x8 { + const c = @cos(angle_rad); + const s = @sin(angle_rad); + return .{ + .x = self.x * ps(c) - self.y * ps(s), + .y = self.x * ps(s) + self.y * ps(c), + }; + } + + pub inline fn rotatev(self: Vector2x8, angle_rad: f32x8) Vector2x8 { + const c = @cos(angle_rad); + const s = @sin(angle_rad); + return .{ + .x = self.x * c - self.y * s, + .y = self.x * s + self.y * c, + }; + } +}; diff --git a/src/math/Vector3.zig b/src/math/Vector3.zig index 0fb81fe..4711a24 100644 --- a/src/math/Vector3.zig +++ b/src/math/Vector3.zig @@ -154,7 +154,7 @@ pub const Vector3 = extern struct { const s = 1.0 - t; const t_vector: Vector = @splat(t); const s_vector: Vector = @splat(s); - return .{ .vector = self * t_vector + other * s_vector }; + return .{ .vector = self * s_vector + other * t_vector }; } pub inline fn rotate(self: Vector3, quaternion: Quaternion) Vector2 { diff --git a/src/math/Vector4.zig b/src/math/Vector4.zig index ff7c9dc..f84f85f 100644 --- a/src/math/Vector4.zig +++ b/src/math/Vector4.zig @@ -152,6 +152,6 @@ pub const Vector4 = extern struct { const s = 1.0 - t; const t_vector: Vector = @splat(t); const s_vector: Vector = @splat(s); - return .{ .vector = self * t_vector + other * s_vector }; + return .{ .vector = self * s_vector + other * t_vector }; } }; diff --git a/src/math/noise.zig b/src/math/noise.zig index 2ecbae7..94699ce 100644 --- a/src/math/noise.zig +++ b/src/math/noise.zig @@ -1,27 +1,18 @@ const std = @import("std"); +const math = @import("../math.zig"); -const Vector2 = @import("Vector2.zig").Vector2; +const Vector2 = math.Vector2; +const Vector2x8 = math.Vector2x8; -const f32x8 = @Vector(8, f32); -const i32x8 = @Vector(8, i32); -const u32x8 = @Vector(8, u32); -const u64x8 = @Vector(8, u64); +const f32x8 = math.f32x8; +const i32x8 = math.i32x8; +const u32x8 = math.u32x8; +const u64x8 = math.u64x8; -inline fn ps(value: f32) f32x8 { - return @splat(value); -} - -inline fn epi32(value: i32) i32x8 { - return @splat(value); -} - -inline fn epu32(value: u32) u32x8 { - return @splat(value); -} - -inline fn epu64x2(value: u64) u64x8 { - return @splat(value); -} +const ps = math.ps; +const epi32 = math.epi32; +const epu32 = math.epu32; +const epu64x2 = math.epu64x2; const prime_x: u64 = 17061574742423305691; const prime_y: u64 = 10555943830568207707; @@ -56,7 +47,7 @@ fn grad2(hash: i32, x: f32, y: f32) f32 { return (if (h & 0b001 != 0) -u else u) + (if (h & 0b010 != 0) -2 * v else 2 * v); } -fn grad2v(hash: i32x8, x: f32x8, y: f32x8) f32x8 { +fn grad2x8(hash: i32x8, x: f32x8, y: f32x8) f32x8 { const h: i32x8 = hash & epi32(0b111); const u: f32x8 = @select(f32, h < epi32(4), x, y); const v: f32x8 = @select(f32, h < epi32(4), y, x); @@ -73,7 +64,7 @@ const G2v: f32x8 = ps(G2); // NOTE No idea why this value, derived experimentally const noise2_scale: f32 = 34.11; -const noise2v_scale: f32x8 = ps(noise2_scale); +const noise2x8_scale: f32x8 = ps(noise2_scale); pub fn noise2(seed: u64, v: Vector2) f32 { const x: f32, const y: f32 = v.asArray(); @@ -115,7 +106,10 @@ pub fn noise2(seed: u64, v: Vector2) f32 { return ret; } -pub fn noise2v(seed: u64, x: f32x8, y: f32x8) f32x8 { +pub fn noise2x8(seed: u64, v: Vector2x8) f32x8 { + const x: f32x8 = v.x; + const y: f32x8 = v.y; + const s: f32x8 = (x + y) * F2v; const xs: f32x8 = x + s; const ys: f32x8 = y + s; @@ -144,11 +138,11 @@ pub fn noise2v(seed: u64, x: f32x8, y: f32x8) f32x8 { const gi1: i32x8 = permv(seed, i + _i1, j + _j1); const gi2: i32x8 = permv(seed, i + epi32(1), j + epi32(1)); - const n0: f32x8 = @select(f32, t0 < ps(0), ps(0), (t0 * t0) * (t0 * t0) * grad2(gi0, x0, y0)); - const n1: f32x8 = @select(f32, t1 < ps(0), ps(0), (t1 * t1) * (t1 * t1) * grad2(gi1, x1, y1)); - const n2: f32x8 = @select(f32, t2 < ps(0), ps(0), (t2 * t2) * (t2 * t2) * grad2(gi2, x2, y2)); + const n0: f32x8 = @select(f32, t0 < ps(0), ps(0), (t0 * t0) * (t0 * t0) * grad2x8(gi0, x0, y0)); + const n1: f32x8 = @select(f32, t1 < ps(0), ps(0), (t1 * t1) * (t1 * t1) * grad2x8(gi1, x1, y1)); + const n2: f32x8 = @select(f32, t2 < ps(0), ps(0), (t2 * t2) * (t2 * t2) * grad2x8(gi2, x2, y2)); - const ret: f32x8 = noise2v_scale * (n0 + n1 + n2); + const ret: f32x8 = noise2x8_scale * (n0 + n1 + n2); std.debug.assert(@reduce(.And, ret >= ps(-1)) and @reduce(.And, ret <= ps(1))); return ret; } diff --git a/src/worldgen.zig b/src/worldgen.zig index 486224b..e4351a3 100644 --- a/src/worldgen.zig +++ b/src/worldgen.zig @@ -2,6 +2,12 @@ const std = @import("std"); const math = @import("math.zig"); const Vector2 = math.Vector2; +const Vector2x8 = math.Vector2x8; + +const f32x8 = math.f32x8; +const i32x8 = math.i32x8; + +const ps = math.ps; const Noise = struct { horizontal_scale: f32, @@ -11,6 +17,10 @@ const Noise = struct { pub fn sample(self: Noise, seed: u64, pos: Vector2) f32 { return math.noise2(seed, pos.divScalar(self.horizontal_scale)) * self.value_amplitude + self.value_median; } + + pub fn samplev(self: Noise, seed: u64, pos: Vector2x8) f32x8 { + return math.noise2x8(seed, pos.divScalar(self.horizontal_scale)) * ps(self.value_amplitude) + ps(self.value_median); + } }; const noise_main: Noise = .{ @@ -31,6 +41,16 @@ pub fn heightF(seed: u64, pos: Vector2) f32 { return @round(main + secondary); } +pub fn heightFv(seed: u64, pos: Vector2x8) f32x8 { + const main = noise_main.samplev(seed, pos); + const secondary = noise_secondary.samplev(seed, pos); + return @round(main + secondary); +} + pub fn heightI(seed: u64, pos: Vector2) i32 { return @intFromFloat(heightF(seed, pos)); } + +pub fn heightIv(seed: u64, pos: Vector2x8) i32x8 { + return @intFromFloat(heightFv(seed, pos)); +}