Vectorized noise (but was it a bottleneck?)

This commit is contained in:
2025-12-02 01:57:21 +01:00
parent 320273b053
commit faddb1f35e
8 changed files with 262 additions and 49 deletions

View File

@@ -21,8 +21,10 @@ const Interator2 = math.Interator2;
const Matrix4x4 = math.Matrix4x4;
const Quaternion = math.Quaternion;
const Vector2 = math.Vector2;
const Vector2x8 = math.Vector2x8;
const Vector3 = math.Vector3;
const Vector4 = math.Vector4;
const ps = math.ps;
const PointLight = extern struct {
positionWS: [3]f32,
@@ -628,27 +630,36 @@ pub fn init(allocator: std.mem.Allocator, engine: *Engine, swapchain: *Swapchain
);
const chunk = chunks.getPtr(chunk_coords3).?;
var it2 = Interator2(usize).init(0, 0, 15, 15);
while (it2.next()) |pos| {
const x, const y = pos;
const fpos = Vector2.init(
@floatFromInt(pos[0]),
@floatFromInt(pos[1]),
).add(origin.asVector2());
var y: usize = 0;
while (y < 16) : (y += 1) {
const fpos0 = Vector2x8.initScalars(
.{ 0, 1, 2, 3, 4, 5, 6, 7 },
@splat(@floatFromInt(y)),
).add(.initVector(origin.asVector2()));
const fpos1 = Vector2x8.initScalars(
.{ 8, 9, 10, 11, 12, 13, 14, 15 },
@splat(@floatFromInt(y)),
).add(.initVector(origin.asVector2()));
const iheight = worldgen.heightI(world_seed, fpos);
const iheight0 = worldgen.heightIv(world_seed, fpos0);
const iheight1 = worldgen.heightIv(world_seed, fpos1);
chunk.blocks[0][y][x] = block_bedrock;
var i: i32 = 0;
while (i < iheight) : (i += 1) {
const iz = i + 1;
const block = if (i + 1 == iheight)
block_grass
else if (i + 4 >= iheight)
block_dirt
else
block_stone;
chunk.blocks[@intCast(iz)][y][x] = block;
const iheightv: [16]i32 = @as([8]i32, iheight0) ++ @as([8]i32, iheight1);
var x: usize = 0;
while (x < 16) : (x += 1) {
chunk.blocks[0][y][x] = block_bedrock;
const iheight = iheightv[x];
var i: i32 = 0;
while (i < iheight) : (i += 1) {
const iz = i + 1;
const block = if (i + 1 == iheight)
block_grass
else if (i + 4 >= iheight)
block_dirt
else
block_stone;
chunk.blocks[@intCast(iz)][y][x] = block;
}
}
}
}

View File

@@ -4,12 +4,35 @@ pub const Iterator3 = @import("math/Iterator3.zig");
pub const Matrix4x4 = @import("math/Matrix4x4.zig").Matrix4x4;
pub const Quaternion = @import("math/Quaternion.zig").Quaternion;
pub const Vector2 = @import("math/Vector2.zig").Vector2;
pub const Vector2x8 = @import("math/Vector2x8.zig").Vector2x8;
pub const Vector3 = @import("math/Vector3.zig").Vector3;
pub const Vector4 = @import("math/Vector4.zig").Vector4;
pub const f32x8 = @Vector(8, f32);
pub const i32x8 = @Vector(8, i32);
pub const u32x8 = @Vector(8, u32);
pub const u64x8 = @Vector(8, u64);
pub inline fn ps(value: f32) f32x8 {
return @splat(value);
}
pub inline fn epi32(value: i32) i32x8 {
return @splat(value);
}
pub inline fn epu32(value: u32) u32x8 {
return @splat(value);
}
pub inline fn epu64x2(value: u64) u64x8 {
return @splat(value);
}
pub inline fn lerp(a: f32, b: f32, t: f32) f32 {
const s = 1.0 - t;
return a * t + b * s;
}
pub const noise2 = @import("math/noise.zig").noise2;
pub const noise2x8 = @import("math/noise.zig").noise2x8;

View File

@@ -138,7 +138,7 @@ pub const Vector2 = extern struct {
const s = 1.0 - t;
const t_vector: Vector = @splat(t);
const s_vector: Vector = @splat(s);
return .{ .vector = self * t_vector + other * s_vector };
return .{ .vector = self * s_vector + other * t_vector };
}
pub inline fn rotate(self: Vector2, angle_rad: f32) Vector2 {

165
src/math/Vector2x8.zig Normal file
View File

@@ -0,0 +1,165 @@
const std = @import("std");
const math = @import("../math.zig");
const Vector2 = math.Vector2;
const f32x8 = math.f32x8;
const ps = math.ps;
pub const Vector2x8 = extern struct {
x: f32x8,
y: f32x8,
pub const zero = Vector2x8.init(ps(0), ps(0));
pub const one = Vector2x8.init(ps(1), ps(1));
pub const unit_x = Vector2x8.init(ps(1), ps(0));
pub const unit_y = Vector2x8.init(ps(0), ps(1));
// --- INIT ---
pub inline fn initScalar(x: f32, y: f32) Vector2x8 {
return .{ .x = ps(x), .y = ps(y) };
}
pub inline fn initScalars(x: f32x8, y: f32x8) Vector2x8 {
return .{ .x = x, .y = y };
}
pub inline fn initVector(vector: Vector2) Vector2x8 {
return .{ .x = ps(vector.getX()), .y = ps(vector.getY()) };
}
pub inline fn initVectors(vectors: [8]Vector2) Vector2x8 {
const v0: f32x8 = @as([8]f32, @bitCast(vectors[0..4].*));
const v1: f32x8 = @as([8]f32, @bitCast(vectors[4..8].*));
const x: f32x8 = @shuffle(f32, v0, v1, [_]i32{ 0, 2, 4, 6, ~@as(i32, 0), ~@as(i32, 2), ~@as(i32, 4), ~@as(i32, 6) });
const y: f32x8 = @shuffle(f32, v0, v1, [_]i32{ 1, 3, 5, 7, ~@as(i32, 1), ~@as(i32, 3), ~@as(i32, 5), ~@as(i32, 7) });
return .{ .x = x, .y = y };
}
// --- CONVERSION ---
pub inline fn asVectors(self: Vector2x8) [8]Vector2 {
const v0: f32x8 = @shuffle(f32, self.x, self.y, [_]i32{ 0, ~@as(i32, 0), 1, ~@as(i32, 1), 2, ~@as(i32, 2), 3, ~@as(i32, 3) });
const v1: f32x8 = @shuffle(f32, self.x, self.y, [_]i32{ 4, ~@as(i32, 4), 5, ~@as(i32, 5), 6, ~@as(i32, 6), 7, ~@as(i32, 7) });
return @as([4]Vector2, @bitCast(@as([8]f32, v0))) ++ @as([4]Vector2, @bitCast(@as([8]f32, v1)));
}
// --- COMPONENT-WISE ---
pub inline fn add(self: Vector2x8, other: Vector2x8) Vector2x8 {
return .{
.x = self.x + other.x,
.y = self.y + other.y,
};
}
pub inline fn sub(self: Vector2x8, other: Vector2x8) Vector2x8 {
return .{
.x = self.x - other.x,
.y = self.y - other.y,
};
}
pub inline fn mul(self: Vector2x8, other: Vector2x8) Vector2x8 {
return .{
.x = self.x * other.x,
.y = self.y * other.y,
};
}
pub inline fn div(self: Vector2x8, other: Vector2x8) Vector2x8 {
return .{
.x = self.x / other.x,
.y = self.y / other.y,
};
}
pub inline fn negate(self: Vector2x8) Vector2x8 {
return .{
.x = -self.x,
.y = -self.y,
};
}
pub inline fn mulScalar(self: Vector2x8, scalar: f32) Vector2x8 {
return .{
.x = self.x * ps(scalar),
.y = self.y * ps(scalar),
};
}
pub inline fn mulScalars(self: Vector2x8, scalar: f32x8) Vector2x8 {
return .{
.x = self.x * scalar,
.y = self.y * scalar,
};
}
pub inline fn divScalar(self: Vector2x8, scalar: f32) Vector2x8 {
return .{
.x = self.x / ps(scalar),
.y = self.y / ps(scalar),
};
}
pub inline fn divScalars(self: Vector2x8, scalar: f32x8) Vector2x8 {
return .{
.x = self.x / scalar,
.y = self.y / scalar,
};
}
// --- OTHER ---
pub inline fn len(self: Vector2x8) f32x8 {
return @sqrt(self.x * self.x + self.y * self.y);
}
pub inline fn lenSquared(self: Vector2x8) f32x8 {
return self.x * self.x + self.y * self.y;
}
pub inline fn normalize(self: Vector2x8) Vector2x8 {
const len_vector: f32x8 = @sqrt(self.x * self.x + self.y * self.y);
return .{
.x = self.x / len_vector,
.y = self.y / len_vector,
};
}
pub inline fn dot(self: Vector2x8, other: Vector2x8) f32x8 {
return self.x * other.x + self.y * other.y;
}
pub inline fn cross(self: Vector2x8, other: Vector2x8) f32x8 {
return self.x * other.y - self.y * other.x;
}
pub inline fn lerp(self: Vector2x8, other: Vector2x8, t: f32x8) Vector2x8 {
const s: f32x8 = ps(1.0) - t;
return .{
.x = self.x * s + other.x * t,
.y = self.y * s + other.y * t,
};
}
pub inline fn rotate(self: Vector2x8, angle_rad: f32) Vector2x8 {
const c = @cos(angle_rad);
const s = @sin(angle_rad);
return .{
.x = self.x * ps(c) - self.y * ps(s),
.y = self.x * ps(s) + self.y * ps(c),
};
}
pub inline fn rotatev(self: Vector2x8, angle_rad: f32x8) Vector2x8 {
const c = @cos(angle_rad);
const s = @sin(angle_rad);
return .{
.x = self.x * c - self.y * s,
.y = self.x * s + self.y * c,
};
}
};

View File

@@ -154,7 +154,7 @@ pub const Vector3 = extern struct {
const s = 1.0 - t;
const t_vector: Vector = @splat(t);
const s_vector: Vector = @splat(s);
return .{ .vector = self * t_vector + other * s_vector };
return .{ .vector = self * s_vector + other * t_vector };
}
pub inline fn rotate(self: Vector3, quaternion: Quaternion) Vector2 {

View File

@@ -152,6 +152,6 @@ pub const Vector4 = extern struct {
const s = 1.0 - t;
const t_vector: Vector = @splat(t);
const s_vector: Vector = @splat(s);
return .{ .vector = self * t_vector + other * s_vector };
return .{ .vector = self * s_vector + other * t_vector };
}
};

View File

@@ -1,27 +1,18 @@
const std = @import("std");
const math = @import("../math.zig");
const Vector2 = @import("Vector2.zig").Vector2;
const Vector2 = math.Vector2;
const Vector2x8 = math.Vector2x8;
const f32x8 = @Vector(8, f32);
const i32x8 = @Vector(8, i32);
const u32x8 = @Vector(8, u32);
const u64x8 = @Vector(8, u64);
const f32x8 = math.f32x8;
const i32x8 = math.i32x8;
const u32x8 = math.u32x8;
const u64x8 = math.u64x8;
inline fn ps(value: f32) f32x8 {
return @splat(value);
}
inline fn epi32(value: i32) i32x8 {
return @splat(value);
}
inline fn epu32(value: u32) u32x8 {
return @splat(value);
}
inline fn epu64x2(value: u64) u64x8 {
return @splat(value);
}
const ps = math.ps;
const epi32 = math.epi32;
const epu32 = math.epu32;
const epu64x2 = math.epu64x2;
const prime_x: u64 = 17061574742423305691;
const prime_y: u64 = 10555943830568207707;
@@ -56,7 +47,7 @@ fn grad2(hash: i32, x: f32, y: f32) f32 {
return (if (h & 0b001 != 0) -u else u) + (if (h & 0b010 != 0) -2 * v else 2 * v);
}
fn grad2v(hash: i32x8, x: f32x8, y: f32x8) f32x8 {
fn grad2x8(hash: i32x8, x: f32x8, y: f32x8) f32x8 {
const h: i32x8 = hash & epi32(0b111);
const u: f32x8 = @select(f32, h < epi32(4), x, y);
const v: f32x8 = @select(f32, h < epi32(4), y, x);
@@ -73,7 +64,7 @@ const G2v: f32x8 = ps(G2);
// NOTE No idea why this value, derived experimentally
const noise2_scale: f32 = 34.11;
const noise2v_scale: f32x8 = ps(noise2_scale);
const noise2x8_scale: f32x8 = ps(noise2_scale);
pub fn noise2(seed: u64, v: Vector2) f32 {
const x: f32, const y: f32 = v.asArray();
@@ -115,7 +106,10 @@ pub fn noise2(seed: u64, v: Vector2) f32 {
return ret;
}
pub fn noise2v(seed: u64, x: f32x8, y: f32x8) f32x8 {
pub fn noise2x8(seed: u64, v: Vector2x8) f32x8 {
const x: f32x8 = v.x;
const y: f32x8 = v.y;
const s: f32x8 = (x + y) * F2v;
const xs: f32x8 = x + s;
const ys: f32x8 = y + s;
@@ -144,11 +138,11 @@ pub fn noise2v(seed: u64, x: f32x8, y: f32x8) f32x8 {
const gi1: i32x8 = permv(seed, i + _i1, j + _j1);
const gi2: i32x8 = permv(seed, i + epi32(1), j + epi32(1));
const n0: f32x8 = @select(f32, t0 < ps(0), ps(0), (t0 * t0) * (t0 * t0) * grad2(gi0, x0, y0));
const n1: f32x8 = @select(f32, t1 < ps(0), ps(0), (t1 * t1) * (t1 * t1) * grad2(gi1, x1, y1));
const n2: f32x8 = @select(f32, t2 < ps(0), ps(0), (t2 * t2) * (t2 * t2) * grad2(gi2, x2, y2));
const n0: f32x8 = @select(f32, t0 < ps(0), ps(0), (t0 * t0) * (t0 * t0) * grad2x8(gi0, x0, y0));
const n1: f32x8 = @select(f32, t1 < ps(0), ps(0), (t1 * t1) * (t1 * t1) * grad2x8(gi1, x1, y1));
const n2: f32x8 = @select(f32, t2 < ps(0), ps(0), (t2 * t2) * (t2 * t2) * grad2x8(gi2, x2, y2));
const ret: f32x8 = noise2v_scale * (n0 + n1 + n2);
const ret: f32x8 = noise2x8_scale * (n0 + n1 + n2);
std.debug.assert(@reduce(.And, ret >= ps(-1)) and @reduce(.And, ret <= ps(1)));
return ret;
}

View File

@@ -2,6 +2,12 @@ const std = @import("std");
const math = @import("math.zig");
const Vector2 = math.Vector2;
const Vector2x8 = math.Vector2x8;
const f32x8 = math.f32x8;
const i32x8 = math.i32x8;
const ps = math.ps;
const Noise = struct {
horizontal_scale: f32,
@@ -11,6 +17,10 @@ const Noise = struct {
pub fn sample(self: Noise, seed: u64, pos: Vector2) f32 {
return math.noise2(seed, pos.divScalar(self.horizontal_scale)) * self.value_amplitude + self.value_median;
}
pub fn samplev(self: Noise, seed: u64, pos: Vector2x8) f32x8 {
return math.noise2x8(seed, pos.divScalar(self.horizontal_scale)) * ps(self.value_amplitude) + ps(self.value_median);
}
};
const noise_main: Noise = .{
@@ -31,6 +41,16 @@ pub fn heightF(seed: u64, pos: Vector2) f32 {
return @round(main + secondary);
}
pub fn heightFv(seed: u64, pos: Vector2x8) f32x8 {
const main = noise_main.samplev(seed, pos);
const secondary = noise_secondary.samplev(seed, pos);
return @round(main + secondary);
}
pub fn heightI(seed: u64, pos: Vector2) i32 {
return @intFromFloat(heightF(seed, pos));
}
pub fn heightIv(seed: u64, pos: Vector2x8) i32x8 {
return @intFromFloat(heightFv(seed, pos));
}