Vectorized noise (but was it a bottleneck?)
This commit is contained in:
49
src/Game.zig
49
src/Game.zig
@@ -21,8 +21,10 @@ const Interator2 = math.Interator2;
|
||||
const Matrix4x4 = math.Matrix4x4;
|
||||
const Quaternion = math.Quaternion;
|
||||
const Vector2 = math.Vector2;
|
||||
const Vector2x8 = math.Vector2x8;
|
||||
const Vector3 = math.Vector3;
|
||||
const Vector4 = math.Vector4;
|
||||
const ps = math.ps;
|
||||
|
||||
const PointLight = extern struct {
|
||||
positionWS: [3]f32,
|
||||
@@ -628,27 +630,36 @@ pub fn init(allocator: std.mem.Allocator, engine: *Engine, swapchain: *Swapchain
|
||||
);
|
||||
const chunk = chunks.getPtr(chunk_coords3).?;
|
||||
|
||||
var it2 = Interator2(usize).init(0, 0, 15, 15);
|
||||
while (it2.next()) |pos| {
|
||||
const x, const y = pos;
|
||||
const fpos = Vector2.init(
|
||||
@floatFromInt(pos[0]),
|
||||
@floatFromInt(pos[1]),
|
||||
).add(origin.asVector2());
|
||||
var y: usize = 0;
|
||||
while (y < 16) : (y += 1) {
|
||||
const fpos0 = Vector2x8.initScalars(
|
||||
.{ 0, 1, 2, 3, 4, 5, 6, 7 },
|
||||
@splat(@floatFromInt(y)),
|
||||
).add(.initVector(origin.asVector2()));
|
||||
const fpos1 = Vector2x8.initScalars(
|
||||
.{ 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
@splat(@floatFromInt(y)),
|
||||
).add(.initVector(origin.asVector2()));
|
||||
|
||||
const iheight = worldgen.heightI(world_seed, fpos);
|
||||
const iheight0 = worldgen.heightIv(world_seed, fpos0);
|
||||
const iheight1 = worldgen.heightIv(world_seed, fpos1);
|
||||
|
||||
chunk.blocks[0][y][x] = block_bedrock;
|
||||
var i: i32 = 0;
|
||||
while (i < iheight) : (i += 1) {
|
||||
const iz = i + 1;
|
||||
const block = if (i + 1 == iheight)
|
||||
block_grass
|
||||
else if (i + 4 >= iheight)
|
||||
block_dirt
|
||||
else
|
||||
block_stone;
|
||||
chunk.blocks[@intCast(iz)][y][x] = block;
|
||||
const iheightv: [16]i32 = @as([8]i32, iheight0) ++ @as([8]i32, iheight1);
|
||||
var x: usize = 0;
|
||||
while (x < 16) : (x += 1) {
|
||||
chunk.blocks[0][y][x] = block_bedrock;
|
||||
const iheight = iheightv[x];
|
||||
var i: i32 = 0;
|
||||
while (i < iheight) : (i += 1) {
|
||||
const iz = i + 1;
|
||||
const block = if (i + 1 == iheight)
|
||||
block_grass
|
||||
else if (i + 4 >= iheight)
|
||||
block_dirt
|
||||
else
|
||||
block_stone;
|
||||
chunk.blocks[@intCast(iz)][y][x] = block;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
23
src/math.zig
23
src/math.zig
@@ -4,12 +4,35 @@ pub const Iterator3 = @import("math/Iterator3.zig");
|
||||
pub const Matrix4x4 = @import("math/Matrix4x4.zig").Matrix4x4;
|
||||
pub const Quaternion = @import("math/Quaternion.zig").Quaternion;
|
||||
pub const Vector2 = @import("math/Vector2.zig").Vector2;
|
||||
pub const Vector2x8 = @import("math/Vector2x8.zig").Vector2x8;
|
||||
pub const Vector3 = @import("math/Vector3.zig").Vector3;
|
||||
pub const Vector4 = @import("math/Vector4.zig").Vector4;
|
||||
|
||||
pub const f32x8 = @Vector(8, f32);
|
||||
pub const i32x8 = @Vector(8, i32);
|
||||
pub const u32x8 = @Vector(8, u32);
|
||||
pub const u64x8 = @Vector(8, u64);
|
||||
|
||||
pub inline fn ps(value: f32) f32x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
|
||||
pub inline fn epi32(value: i32) i32x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
|
||||
pub inline fn epu32(value: u32) u32x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
|
||||
pub inline fn epu64x2(value: u64) u64x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
|
||||
pub inline fn lerp(a: f32, b: f32, t: f32) f32 {
|
||||
const s = 1.0 - t;
|
||||
return a * t + b * s;
|
||||
}
|
||||
|
||||
pub const noise2 = @import("math/noise.zig").noise2;
|
||||
pub const noise2x8 = @import("math/noise.zig").noise2x8;
|
||||
|
||||
@@ -138,7 +138,7 @@ pub const Vector2 = extern struct {
|
||||
const s = 1.0 - t;
|
||||
const t_vector: Vector = @splat(t);
|
||||
const s_vector: Vector = @splat(s);
|
||||
return .{ .vector = self * t_vector + other * s_vector };
|
||||
return .{ .vector = self * s_vector + other * t_vector };
|
||||
}
|
||||
|
||||
pub inline fn rotate(self: Vector2, angle_rad: f32) Vector2 {
|
||||
|
||||
165
src/math/Vector2x8.zig
Normal file
165
src/math/Vector2x8.zig
Normal file
@@ -0,0 +1,165 @@
|
||||
const std = @import("std");
|
||||
const math = @import("../math.zig");
|
||||
|
||||
const Vector2 = math.Vector2;
|
||||
|
||||
const f32x8 = math.f32x8;
|
||||
|
||||
const ps = math.ps;
|
||||
|
||||
pub const Vector2x8 = extern struct {
|
||||
x: f32x8,
|
||||
y: f32x8,
|
||||
|
||||
pub const zero = Vector2x8.init(ps(0), ps(0));
|
||||
pub const one = Vector2x8.init(ps(1), ps(1));
|
||||
pub const unit_x = Vector2x8.init(ps(1), ps(0));
|
||||
pub const unit_y = Vector2x8.init(ps(0), ps(1));
|
||||
|
||||
// --- INIT ---
|
||||
|
||||
pub inline fn initScalar(x: f32, y: f32) Vector2x8 {
|
||||
return .{ .x = ps(x), .y = ps(y) };
|
||||
}
|
||||
|
||||
pub inline fn initScalars(x: f32x8, y: f32x8) Vector2x8 {
|
||||
return .{ .x = x, .y = y };
|
||||
}
|
||||
|
||||
pub inline fn initVector(vector: Vector2) Vector2x8 {
|
||||
return .{ .x = ps(vector.getX()), .y = ps(vector.getY()) };
|
||||
}
|
||||
|
||||
pub inline fn initVectors(vectors: [8]Vector2) Vector2x8 {
|
||||
const v0: f32x8 = @as([8]f32, @bitCast(vectors[0..4].*));
|
||||
const v1: f32x8 = @as([8]f32, @bitCast(vectors[4..8].*));
|
||||
const x: f32x8 = @shuffle(f32, v0, v1, [_]i32{ 0, 2, 4, 6, ~@as(i32, 0), ~@as(i32, 2), ~@as(i32, 4), ~@as(i32, 6) });
|
||||
const y: f32x8 = @shuffle(f32, v0, v1, [_]i32{ 1, 3, 5, 7, ~@as(i32, 1), ~@as(i32, 3), ~@as(i32, 5), ~@as(i32, 7) });
|
||||
return .{ .x = x, .y = y };
|
||||
}
|
||||
|
||||
// --- CONVERSION ---
|
||||
|
||||
pub inline fn asVectors(self: Vector2x8) [8]Vector2 {
|
||||
const v0: f32x8 = @shuffle(f32, self.x, self.y, [_]i32{ 0, ~@as(i32, 0), 1, ~@as(i32, 1), 2, ~@as(i32, 2), 3, ~@as(i32, 3) });
|
||||
const v1: f32x8 = @shuffle(f32, self.x, self.y, [_]i32{ 4, ~@as(i32, 4), 5, ~@as(i32, 5), 6, ~@as(i32, 6), 7, ~@as(i32, 7) });
|
||||
return @as([4]Vector2, @bitCast(@as([8]f32, v0))) ++ @as([4]Vector2, @bitCast(@as([8]f32, v1)));
|
||||
}
|
||||
|
||||
// --- COMPONENT-WISE ---
|
||||
|
||||
pub inline fn add(self: Vector2x8, other: Vector2x8) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x + other.x,
|
||||
.y = self.y + other.y,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn sub(self: Vector2x8, other: Vector2x8) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x - other.x,
|
||||
.y = self.y - other.y,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn mul(self: Vector2x8, other: Vector2x8) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x * other.x,
|
||||
.y = self.y * other.y,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn div(self: Vector2x8, other: Vector2x8) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x / other.x,
|
||||
.y = self.y / other.y,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn negate(self: Vector2x8) Vector2x8 {
|
||||
return .{
|
||||
.x = -self.x,
|
||||
.y = -self.y,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn mulScalar(self: Vector2x8, scalar: f32) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x * ps(scalar),
|
||||
.y = self.y * ps(scalar),
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn mulScalars(self: Vector2x8, scalar: f32x8) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x * scalar,
|
||||
.y = self.y * scalar,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn divScalar(self: Vector2x8, scalar: f32) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x / ps(scalar),
|
||||
.y = self.y / ps(scalar),
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn divScalars(self: Vector2x8, scalar: f32x8) Vector2x8 {
|
||||
return .{
|
||||
.x = self.x / scalar,
|
||||
.y = self.y / scalar,
|
||||
};
|
||||
}
|
||||
|
||||
// --- OTHER ---
|
||||
|
||||
pub inline fn len(self: Vector2x8) f32x8 {
|
||||
return @sqrt(self.x * self.x + self.y * self.y);
|
||||
}
|
||||
|
||||
pub inline fn lenSquared(self: Vector2x8) f32x8 {
|
||||
return self.x * self.x + self.y * self.y;
|
||||
}
|
||||
|
||||
pub inline fn normalize(self: Vector2x8) Vector2x8 {
|
||||
const len_vector: f32x8 = @sqrt(self.x * self.x + self.y * self.y);
|
||||
return .{
|
||||
.x = self.x / len_vector,
|
||||
.y = self.y / len_vector,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn dot(self: Vector2x8, other: Vector2x8) f32x8 {
|
||||
return self.x * other.x + self.y * other.y;
|
||||
}
|
||||
|
||||
pub inline fn cross(self: Vector2x8, other: Vector2x8) f32x8 {
|
||||
return self.x * other.y - self.y * other.x;
|
||||
}
|
||||
|
||||
pub inline fn lerp(self: Vector2x8, other: Vector2x8, t: f32x8) Vector2x8 {
|
||||
const s: f32x8 = ps(1.0) - t;
|
||||
return .{
|
||||
.x = self.x * s + other.x * t,
|
||||
.y = self.y * s + other.y * t,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn rotate(self: Vector2x8, angle_rad: f32) Vector2x8 {
|
||||
const c = @cos(angle_rad);
|
||||
const s = @sin(angle_rad);
|
||||
return .{
|
||||
.x = self.x * ps(c) - self.y * ps(s),
|
||||
.y = self.x * ps(s) + self.y * ps(c),
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn rotatev(self: Vector2x8, angle_rad: f32x8) Vector2x8 {
|
||||
const c = @cos(angle_rad);
|
||||
const s = @sin(angle_rad);
|
||||
return .{
|
||||
.x = self.x * c - self.y * s,
|
||||
.y = self.x * s + self.y * c,
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -154,7 +154,7 @@ pub const Vector3 = extern struct {
|
||||
const s = 1.0 - t;
|
||||
const t_vector: Vector = @splat(t);
|
||||
const s_vector: Vector = @splat(s);
|
||||
return .{ .vector = self * t_vector + other * s_vector };
|
||||
return .{ .vector = self * s_vector + other * t_vector };
|
||||
}
|
||||
|
||||
pub inline fn rotate(self: Vector3, quaternion: Quaternion) Vector2 {
|
||||
|
||||
@@ -152,6 +152,6 @@ pub const Vector4 = extern struct {
|
||||
const s = 1.0 - t;
|
||||
const t_vector: Vector = @splat(t);
|
||||
const s_vector: Vector = @splat(s);
|
||||
return .{ .vector = self * t_vector + other * s_vector };
|
||||
return .{ .vector = self * s_vector + other * t_vector };
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,27 +1,18 @@
|
||||
const std = @import("std");
|
||||
const math = @import("../math.zig");
|
||||
|
||||
const Vector2 = @import("Vector2.zig").Vector2;
|
||||
const Vector2 = math.Vector2;
|
||||
const Vector2x8 = math.Vector2x8;
|
||||
|
||||
const f32x8 = @Vector(8, f32);
|
||||
const i32x8 = @Vector(8, i32);
|
||||
const u32x8 = @Vector(8, u32);
|
||||
const u64x8 = @Vector(8, u64);
|
||||
const f32x8 = math.f32x8;
|
||||
const i32x8 = math.i32x8;
|
||||
const u32x8 = math.u32x8;
|
||||
const u64x8 = math.u64x8;
|
||||
|
||||
inline fn ps(value: f32) f32x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
|
||||
inline fn epi32(value: i32) i32x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
|
||||
inline fn epu32(value: u32) u32x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
|
||||
inline fn epu64x2(value: u64) u64x8 {
|
||||
return @splat(value);
|
||||
}
|
||||
const ps = math.ps;
|
||||
const epi32 = math.epi32;
|
||||
const epu32 = math.epu32;
|
||||
const epu64x2 = math.epu64x2;
|
||||
|
||||
const prime_x: u64 = 17061574742423305691;
|
||||
const prime_y: u64 = 10555943830568207707;
|
||||
@@ -56,7 +47,7 @@ fn grad2(hash: i32, x: f32, y: f32) f32 {
|
||||
return (if (h & 0b001 != 0) -u else u) + (if (h & 0b010 != 0) -2 * v else 2 * v);
|
||||
}
|
||||
|
||||
fn grad2v(hash: i32x8, x: f32x8, y: f32x8) f32x8 {
|
||||
fn grad2x8(hash: i32x8, x: f32x8, y: f32x8) f32x8 {
|
||||
const h: i32x8 = hash & epi32(0b111);
|
||||
const u: f32x8 = @select(f32, h < epi32(4), x, y);
|
||||
const v: f32x8 = @select(f32, h < epi32(4), y, x);
|
||||
@@ -73,7 +64,7 @@ const G2v: f32x8 = ps(G2);
|
||||
|
||||
// NOTE No idea why this value, derived experimentally
|
||||
const noise2_scale: f32 = 34.11;
|
||||
const noise2v_scale: f32x8 = ps(noise2_scale);
|
||||
const noise2x8_scale: f32x8 = ps(noise2_scale);
|
||||
|
||||
pub fn noise2(seed: u64, v: Vector2) f32 {
|
||||
const x: f32, const y: f32 = v.asArray();
|
||||
@@ -115,7 +106,10 @@ pub fn noise2(seed: u64, v: Vector2) f32 {
|
||||
return ret;
|
||||
}
|
||||
|
||||
pub fn noise2v(seed: u64, x: f32x8, y: f32x8) f32x8 {
|
||||
pub fn noise2x8(seed: u64, v: Vector2x8) f32x8 {
|
||||
const x: f32x8 = v.x;
|
||||
const y: f32x8 = v.y;
|
||||
|
||||
const s: f32x8 = (x + y) * F2v;
|
||||
const xs: f32x8 = x + s;
|
||||
const ys: f32x8 = y + s;
|
||||
@@ -144,11 +138,11 @@ pub fn noise2v(seed: u64, x: f32x8, y: f32x8) f32x8 {
|
||||
const gi1: i32x8 = permv(seed, i + _i1, j + _j1);
|
||||
const gi2: i32x8 = permv(seed, i + epi32(1), j + epi32(1));
|
||||
|
||||
const n0: f32x8 = @select(f32, t0 < ps(0), ps(0), (t0 * t0) * (t0 * t0) * grad2(gi0, x0, y0));
|
||||
const n1: f32x8 = @select(f32, t1 < ps(0), ps(0), (t1 * t1) * (t1 * t1) * grad2(gi1, x1, y1));
|
||||
const n2: f32x8 = @select(f32, t2 < ps(0), ps(0), (t2 * t2) * (t2 * t2) * grad2(gi2, x2, y2));
|
||||
const n0: f32x8 = @select(f32, t0 < ps(0), ps(0), (t0 * t0) * (t0 * t0) * grad2x8(gi0, x0, y0));
|
||||
const n1: f32x8 = @select(f32, t1 < ps(0), ps(0), (t1 * t1) * (t1 * t1) * grad2x8(gi1, x1, y1));
|
||||
const n2: f32x8 = @select(f32, t2 < ps(0), ps(0), (t2 * t2) * (t2 * t2) * grad2x8(gi2, x2, y2));
|
||||
|
||||
const ret: f32x8 = noise2v_scale * (n0 + n1 + n2);
|
||||
const ret: f32x8 = noise2x8_scale * (n0 + n1 + n2);
|
||||
std.debug.assert(@reduce(.And, ret >= ps(-1)) and @reduce(.And, ret <= ps(1)));
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,12 @@ const std = @import("std");
|
||||
const math = @import("math.zig");
|
||||
|
||||
const Vector2 = math.Vector2;
|
||||
const Vector2x8 = math.Vector2x8;
|
||||
|
||||
const f32x8 = math.f32x8;
|
||||
const i32x8 = math.i32x8;
|
||||
|
||||
const ps = math.ps;
|
||||
|
||||
const Noise = struct {
|
||||
horizontal_scale: f32,
|
||||
@@ -11,6 +17,10 @@ const Noise = struct {
|
||||
pub fn sample(self: Noise, seed: u64, pos: Vector2) f32 {
|
||||
return math.noise2(seed, pos.divScalar(self.horizontal_scale)) * self.value_amplitude + self.value_median;
|
||||
}
|
||||
|
||||
pub fn samplev(self: Noise, seed: u64, pos: Vector2x8) f32x8 {
|
||||
return math.noise2x8(seed, pos.divScalar(self.horizontal_scale)) * ps(self.value_amplitude) + ps(self.value_median);
|
||||
}
|
||||
};
|
||||
|
||||
const noise_main: Noise = .{
|
||||
@@ -31,6 +41,16 @@ pub fn heightF(seed: u64, pos: Vector2) f32 {
|
||||
return @round(main + secondary);
|
||||
}
|
||||
|
||||
pub fn heightFv(seed: u64, pos: Vector2x8) f32x8 {
|
||||
const main = noise_main.samplev(seed, pos);
|
||||
const secondary = noise_secondary.samplev(seed, pos);
|
||||
return @round(main + secondary);
|
||||
}
|
||||
|
||||
pub fn heightI(seed: u64, pos: Vector2) i32 {
|
||||
return @intFromFloat(heightF(seed, pos));
|
||||
}
|
||||
|
||||
pub fn heightIv(seed: u64, pos: Vector2x8) i32x8 {
|
||||
return @intFromFloat(heightFv(seed, pos));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user