Thanks for using Compiler Explorer
Sponsors
C with Coccinelle
C++ with Coccinelle
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Nim
Numba
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
Python
Racket
Ruby
Rust
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
zig source #3
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
zig 0.10.0
zig 0.11.0
zig 0.12.0
zig 0.12.1
zig 0.13.0
zig 0.14.0
zig 0.2.0
zig 0.3.0
zig 0.4.0
zig 0.5.0
zig 0.6.0
zig 0.7.0
zig 0.7.1
zig 0.8.0
zig 0.9.0
zig trunk
Options
Source code
const std = @import("std"); const builtin = @import("builtin"); const WIDTH = 16; // How many elements to operate on at once /// Compresses the elements in `data` corresponding to the `condition` vector. /// Writes to `dest`, including a number of undefined bytes. /// In total, this expression gives the number of bytes written past `dest`: /// switch (WIDTH) { /// 8, 16, 32 => (64 - WIDTH) + 32, /// 64 => 64, /// } export fn compress(data: @Vector(64, u8), condition: @Vector(64, u8), dest: [*]u8) u8 { const U = std.meta.Int(.unsigned, WIDTH*2); const indices = comptime @as(@Vector(64, u8), @bitCast(std.simd.deinterlace(4, std.simd.iota(u8, 64) & @as(@Vector(64, u8), @splat(WIDTH - 1))))); var prefix_sums = @select(u8, condition != @as(@Vector(64, u8), @splat(0)), @as(@Vector(64, u8), @splat(255)), @as(@Vector(64, u8), @splat(0)), ); // Next, shift elements right by 1, 2, 4, 8, 16, and 32, and accumulate at each step inline for (0..std.math.log2(WIDTH)) |i| { prefix_sums +%= shiftInterleavedElementsRight(prefix_sums, 1 << i, U); } comptime var prefix_sum_multiplier = 0; inline for (0..64 / WIDTH) |i| prefix_sum_multiplier |= 1 << i*WIDTH; const prefix_sum_of_offsets: [8]u8 = @bitCast( @as([2]u64, @bitCast( uzp2( neg( @as([4]@Vector(16, u8), @bitCast(prefix_sums))[3] ) ) ))[0] *% prefix_sum_multiplier); // Now take the identity indices and add it to the prefix_sums. // This value tells us how far each value should be left-shifted var travel_distances = indices +% shiftInterleavedElementsRight(prefix_sums, 1, U); var compressed_data = data; inline for (0..std.math.log2(WIDTH)) |x| { const i = 1 << x; const shifted_left = shiftInterleavedElementsLeft(travel_distances, i, U); const shifted_compressed_data = shiftInterleavedElementsLeft(compressed_data, i, U); const selector = cmtst(shifted_left, @splat(i)); travel_distances = bsl(selector, shifted_left, travel_distances); compressed_data = bsl(selector, shifted_compressed_data, compressed_data); } inline for (0..64 / WIDTH) |i| { (if (WIDTH == 64) st4 else st4_first_32)( dest[if (i == 0) 0 else prefix_sum_of_offsets[i*(WIDTH / 8) - 1]..], shiftInterleavedElementsLeft(compressed_data, WIDTH*i, u128) ); } return prefix_sum_of_offsets[7]; } fn bsl(selector: anytype, a: @TypeOf(selector), b: @TypeOf(selector)) @TypeOf(selector) { return (a & selector) | (b & ~selector); } fn cmtst(a: anytype, comptime b: @TypeOf(a)) @TypeOf(a) { return @select(u8, (a & b) != @as(@TypeOf(a), @splat(0)), @as(@TypeOf(a), @splat(0xff)), @as(@TypeOf(a), @splat(0))); } fn uzp2(x: @Vector(16, u8)) @Vector(16, u8) { return @shuffle(u8, x, undefined, [_]i32{ 1, 3, 5, 7, 9, 11, 13, 15, 1, 3, 5, 7, 9, 11, 13, 15 }); } fn neg(x: anytype) @TypeOf(x) { return @as(@TypeOf(x), @splat(1)) +% ~x; } const HAS_ARM_NEON = switch (builtin.cpu.arch) { .aarch64, .aarch64_be => std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon), .arm, .armeb => std.Target.arm.featureSetHas(builtin.cpu.features, .neon), else => false, }; fn st4(ptr: [*]u8, vec: @Vector(64, u8)) void { const chunks: [4]@Vector(16, u8) = @bitCast(vec); if (!HAS_ARM_NEON or @inComptime()) { ptr[0..64].* = std.simd.interlace(chunks); } else struct { extern fn @"llvm.aarch64.neon.st4.v16i8.p0"(@Vector(16, u8), @Vector(16, u8), @Vector(16, u8), @Vector(16, u8), [*]u8) void; }.@"llvm.aarch64.neon.st4.v16i8.p0"(chunks[0], chunks[1], chunks[2], chunks[3], ptr); } fn st4_first_32(ptr: [*]u8, vec: @Vector(64, u8)) void { const chunks: [4]@Vector(16, u8) = @bitCast(vec); if (!HAS_ARM_NEON or @inComptime()) { ptr[0..32].* = std.simd.interlace(.{ @as([2]@Vector(8, u8), @bitCast(chunks[0]))[0], @as([2]@Vector(8, u8), @bitCast(chunks[1]))[0], @as([2]@Vector(8, u8), @bitCast(chunks[2]))[0], @as([2]@Vector(8, u8), @bitCast(chunks[3]))[0], }); } else struct { extern fn @"llvm.aarch64.neon.st4.v16i8.p0"(@Vector(8, u8), @Vector(8, u8), @Vector(8, u8), @Vector(8, u8), [*]u8) void; }.@"llvm.aarch64.neon.st4.v16i8.p0"( @as([2]@Vector(8, u8), @bitCast(chunks[0]))[0], @as([2]@Vector(8, u8), @bitCast(chunks[1]))[0], @as([2]@Vector(8, u8), @bitCast(chunks[2]))[0], @as([2]@Vector(8, u8), @bitCast(chunks[3]))[0], ptr, ); } fn shiftElementsLeft(vec: @Vector(16, u8), comptime amount: std.simd.VectorCount(@Vector(64, u8)), comptime boundary: type) @Vector(16, u8) { return if (boundary == u128) std.simd.shiftElementsLeft(vec, amount, 0) else @bitCast(@as(@Vector(16 / @sizeOf(boundary), boundary), @bitCast(vec)) >> @splat(8*amount)); } fn shiftInterleavedElementsLeft(vecs: @Vector(64, u8), comptime amount: std.simd.VectorCount(@Vector(64, u8)), boundary: type) @Vector(64, u8) { var new_vecs: [4]@Vector(16, u8) = @bitCast(vecs); if ((amount & 1) == 1) { const n = shiftElementsLeft(new_vecs[0], 1, boundary); new_vecs[0] = new_vecs[1]; new_vecs[1] = new_vecs[2]; new_vecs[2] = new_vecs[3]; new_vecs[3] = n; } if ((amount & 2) == 2) { const n0 = shiftElementsLeft(new_vecs[0], 1, boundary); const n1 = shiftElementsLeft(new_vecs[1], 1, boundary); new_vecs[0] = new_vecs[2]; new_vecs[1] = new_vecs[3]; new_vecs[2] = n0; new_vecs[3] = n1; } const remaining_amt = amount >> 2; if (remaining_amt > 0) { new_vecs = .{ shiftElementsLeft(new_vecs[0], remaining_amt, boundary), shiftElementsLeft(new_vecs[1], remaining_amt, boundary), shiftElementsLeft(new_vecs[2], remaining_amt, boundary), shiftElementsLeft(new_vecs[3], remaining_amt, boundary) }; } return @bitCast(new_vecs); } fn shiftElementsRight(vec: @Vector(16, u8), comptime amount: std.simd.VectorCount(@Vector(64, u8)), comptime boundary: type) @Vector(16, u8) { return if (boundary == u128) std.simd.shiftElementsRight(vec, amount, 0) else @bitCast(@as(@Vector(16 / @sizeOf(boundary), boundary), @bitCast(vec)) << @splat(8*amount)); } fn shiftInterleavedElementsRight(vecs: @Vector(64, u8), comptime amount: std.simd.VectorCount(@Vector(64, u8)), boundary: type) @Vector(64, u8) { var new_vecs: [4]@Vector(16, u8) = @bitCast(vecs); if ((amount & 1) == 1) { const n = shiftElementsRight(new_vecs[3], 1, boundary); new_vecs[3] = new_vecs[2]; new_vecs[2] = new_vecs[1]; new_vecs[1] = new_vecs[0]; new_vecs[0] = n; } if ((amount & 2) == 2) { const n1 = shiftElementsRight(new_vecs[3], 1, boundary); const n0 = shiftElementsRight(new_vecs[2], 1, boundary); new_vecs[3] = new_vecs[1]; new_vecs[2] = new_vecs[0]; new_vecs[1] = n1; new_vecs[0] = n0; } const remaining_amt = amount >> 2; if (remaining_amt > 0) { new_vecs = .{ shiftElementsRight(new_vecs[0], remaining_amt, boundary), shiftElementsRight(new_vecs[1], remaining_amt, boundary), shiftElementsRight(new_vecs[2], remaining_amt, boundary), shiftElementsRight(new_vecs[3], remaining_amt, boundary) }; } return @bitCast(new_vecs); } fn main() void { comptime { @setEvalBranchQuota(100000); const ptr = "0123456789abcdefghijklmnopqrstuvwxyz%$ABCDEFGHIJKLMNOPQRSTUVWXYZ"; for (ptr, 0..) |c, i| { if (c == ' ') @compileLog(i); } const vec: @Vector(64, u8) = @bitCast(std.simd.deinterlace(4, ptr[0..64].*)); var selected: @Vector(64, u8) = @splat(0); for (.{ '3', '1', '2', 'a', 'd', 'r', 'D', '$', '%', 'I', 'T', 'C', 'B', 'A', 'Q', 'Z', 'V', 'U', }) |c| { selected |= @select(u8, vec == @as(@Vector(64, u8), @splat(c)), @as(@Vector(64, u8), @splat(0xFF)), @as(@Vector(64, u8), @splat(0))); } //const mask = vmovmaskq_u8(selected); var dest = std.mem.zeroes([64]u8); _ = compress(vec, selected, &dest); @compileLog(std.fmt.comptimePrint("{s}", .{dest})); } }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
About the author
Statistics
Changelog
Version tree