Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
zig source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
zig 0.10.0
zig 0.11.0
zig 0.12.0
zig 0.12.1
zig 0.13.0
zig 0.14.0
zig 0.14.1
zig 0.15.1
zig 0.2.0
zig 0.3.0
zig 0.4.0
zig 0.5.0
zig 0.6.0
zig 0.7.0
zig 0.7.1
zig 0.8.0
zig 0.9.0
zig trunk
Options
Source code
//! The ASCII character encoding standard. //! //! See also: https://en.wikipedia.org/wiki/ASCII#Character_set // I could have taken only a u7 to make this clear, but it would be slower // It is my opinion that encodings other than UTF-8 should not be supported. // // (and 128 bytes is not much to pay). // Also does not handle Unicode character classes. const std = @import("std"); const mem = std.mem; const testing = std.testing; /// Contains constants for the C0 control codes of the ASCII encoding. /// /// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `is_control` pub const control = struct { pub const NUL = 0x00; pub const SOH = 0x01; pub const STX = 0x02; pub const ETX = 0x03; pub const EOT = 0x04; pub const ENQ = 0x05; pub const ACK = 0x06; pub const BEL = 0x07; pub const BS = 0x08; pub const TAB = 0x09; pub const LF = 0x0A; pub const VT = 0x0B; pub const FF = 0x0C; pub const CR = 0x0D; pub const SO = 0x0E; pub const SI = 0x0F; pub const DLE = 0x10; pub const DC1 = 0x11; pub const DC2 = 0x12; pub const DC3 = 0x13; pub const DC4 = 0x14; pub const NAK = 0x15; pub const SYN = 0x16; pub const ETB = 0x17; pub const CAN = 0x18; pub const EM = 0x19; pub const SUB = 0x1A; pub const ESC = 0x1B; pub const FS = 0x1C; pub const GS = 0x1D; pub const RS = 0x1E; pub const US = 0x1F; pub const DEL = 0x7F; /// An alias to `DC1`. pub const XON = 0x11; /// An alias to `DC3`. pub const XOFF = 0x13; }; // These naive functions are used to generate the lookup table // and as fallbacks for if the lookup table isn't available. // // Note that some functions like for example `isDigit` don't use a table because it's slower. // Using a table is generally only useful if not all `true` values in the table would be in one row. fn isControlNaive(char: u8) bool { return char <= control.US or char == control.DEL; } fn isAlphabeticNaive(char: u8) bool { return (char >= 'a' and char <= 'z') or (char >= 'Z' and char <= 'Z'); } fn isHexadecimalNaive(char: u8) bool { return (char >= '0' and char <= '9') or (char >= 'a' and char <= 'f') or (char >= 'A' and char <= 'F'); } fn isAlphanumericNaive(char: u8) bool { return (char >= '0' and char <= '9') or (char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z'); } fn isDigitNaive(char: u8) bool { return char >= '0' and char <= '9'; } fn isWhitespaceNaive(char: u8) bool { @setEvalBranchQuota(4000); return mem.indexOfScalar(u8, &whitespace, char) != null; } /// A lookup table. const CombinedTable = struct { table: [256]u8, const Index = enum { control, alphabetic, hexadecimal, alphanumeric, digit, whitespace, }; /// Generates a table which is filled with the results of the given function for all characters. fn getBoolTable(comptime condition: fn (u8) bool) [128]bool { @setEvalBranchQuota(2000); comptime var table: [128]bool = undefined; comptime var index = 0; while (index < 128) : (index += 1) { table[index] = condition(index); } return table; } fn init() CombinedTable { comptime var table = [_]u8{0} ** 256; const control_table = comptime getBoolTable(isControlNaive); const alpha_table = comptime getBoolTable(isAlphabeticNaive); const hex_table = comptime getBoolTable(isHexadecimalNaive); const alphanumeric_table = comptime getBoolTable(isAlphanumericNaive); const digit_table = comptime getBoolTable(isDigitNaive); const whitespace_table = comptime getBoolTable(isWhitespaceNaive); comptime var i = 0; inline while (i < 128) : (i += 1) { table[i] = @boolToInt(control_table[i]) << @enumToInt(Index.control) | @boolToInt(alpha_table[i]) << @enumToInt(Index.alphabetic) | @boolToInt(hex_table[i]) << @enumToInt(Index.hexadecimal) | @boolToInt(alphanumeric_table[i]) << @enumToInt(Index.alphanumeric) | @boolToInt(digit_table[i]) << @enumToInt(Index.digit) | @boolToInt(whitespace_table[i]) << @enumToInt(Index.whitespace); } return .{ .table = table }; } fn contains(self: CombinedTable, char: u8, index: Index) bool { return (self.table[char] & (@as(u8, 1) << @enumToInt(index))) != 0; } }; /// The combined table for fast lookup. /// /// This is not used in `ReleaseSmall` to save 256 bytes at the cost of /// a small decrease in performance. const combined_table: ?CombinedTable = if (@import("builtin").mode == .ReleaseSmall) null else CombinedTable.init(); /// Returns `true` if the character is a control character. /// /// See also: `control` pub fn isControl(char: u8) bool { if (combined_table) |table| return table.contains(char, .control) else return isControlNaive(char); } /// Returns `true` if the character is alphanumeric. This is case-insensitive. pub fn isAlphanumeric(char: u8) bool { if (combined_table) |table| return table.contains(char, .alphanumeric) else return isAlphanumericNaive(char); } /// Returns `true` if the character is alphabetic. This is case-insensitive. pub fn isAlphabetic(char: u8) bool { if (combined_table) |table| return table.contains(char, .alphabetic) else return isAlphabeticNaive(char); } export fn isDigitNaiveWithoutTable(char: u8) bool { return char >= '0' and char <= '9'; } export fn isDigitWithTable(char: u8) bool { return combined_table.?.contains(char, .digit); } /// Returns `true` if the character has some graphical representation and can be printed. pub fn isPrintable(char: u8) bool { return char >= ' ' and char <= '~'; } pub fn isLower(char: u8) bool { return char >= 'a' and char <= 'z'; } pub fn isUpper(char: u8) bool { return char >= 'A' and char <= 'Z'; } pub fn isWhitespace(char: u8) bool { if (combined_table) |table| return table.contains(char, .whitespace) else return isWhitespaceNaive(char); } /// All the values for which `isWhitespace()` returns `true`. /// This may be used with e.g. `std.mem.trim()` to trim whitespace. pub const whitespace = [_]u8{ ' ', '\t', '\n', '\r', control.VT, control.FF }; test "whitespace" { for (whitespace) |char| try testing.expect(isWhitespace(char)); var i: u8 = 0; while (isASCII(i)) : (i += 1) { if (isWhitespace(i)) try testing.expect(std.mem.indexOfScalar(u8, &whitespace, i) != null); } } /// Returns `true` if the character is a hexadecimal digit. This is case-insensitive. pub fn isHexadecimal(char: u8) bool { if (combined_table) |table| return table.contains(char, .hexadecimal) else return isHexadecimalNaive(char); } pub fn isASCII(c: u8) bool { return c < 128; } pub fn toUpper(c: u8) u8 { if (isLower(c)) { return c & 0b11011111; } else { return c; } } pub fn toLower(c: u8) u8 { if (isUpper(c)) { return c | 0b00100000; } else { return c; } } test "ascii character classes" { try testing.expect(!isControl('a')); try testing.expect(!isControl('z')); try testing.expect(isControl(control.NUL)); try testing.expect(isControl(control.FF)); try testing.expect(isControl(control.US)); try testing.expect('C' == toUpper('c')); try testing.expect(':' == toUpper(':')); try testing.expect('\xab' == toUpper('\xab')); try testing.expect(!isUpper('z')); try testing.expect('c' == toLower('c')); try testing.expect(':' == toLower(':')); try testing.expect('\xab' == toLower('\xab')); try testing.expect(!isLower('Z')); try testing.expect(isAlphanumeric('Z')); try testing.expect(isAlphanumeric('z')); try testing.expect(isAlphanumeric('5')); try testing.expect(isAlphanumeric('5')); try testing.expect(!isAlphanumeric('!')); try testing.expect(!isAlphabetic('5')); try testing.expect(isAlphabetic('c')); try testing.expect(!isAlphabetic('5')); try testing.expect(isWhitespace(' ')); try testing.expect(isWhitespace('\t')); try testing.expect(isWhitespace('\r')); try testing.expect(isWhitespace('\n')); try testing.expect(!isWhitespace('.')); try testing.expect(!isHexadecimal('g')); try testing.expect(isHexadecimal('b')); try testing.expect(isHexadecimal('9')); try testing.expect(isPrintable(' ')); try testing.expect(isPrintable('@')); try testing.expect(isPrintable('~')); try testing.expect(!isPrintable(control.ESC)); } /// Writes a lower case copy of `ascii_string` to `output`. /// Asserts `output.len >= ascii_string.len`. pub fn lowerString(output: []u8, ascii_string: []const u8) []u8 { std.debug.assert(output.len >= ascii_string.len); for (ascii_string) |c, i| { output[i] = toLower(c); } return output[0..ascii_string.len]; } test "lowerString" { var buf: [1024]u8 = undefined; const result = lowerString(&buf, "aBcDeFgHiJkLmNOPqrst0234+💩!"); try std.testing.expectEqualStrings("abcdefghijklmnopqrst0234+💩!", result); } /// Allocates a lower case copy of `ascii_string`. /// Caller owns returned string and must free with `allocator`. pub fn allocLowerString(allocator: std.mem.Allocator, ascii_string: []const u8) ![]u8 { const result = try allocator.alloc(u8, ascii_string.len); return lowerString(result, ascii_string); } test "allocLowerString" { const result = try allocLowerString(testing.allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!"); defer testing.allocator.free(result); try testing.expectEqualStrings("abcdefghijklmnopqrst0234+💩!", result); } /// Writes an upper case copy of `ascii_string` to `output`. /// Asserts `output.len >= ascii_string.len`. pub fn upperString(output: []u8, ascii_string: []const u8) []u8 { std.debug.assert(output.len >= ascii_string.len); for (ascii_string) |c, i| { output[i] = toUpper(c); } return output[0..ascii_string.len]; } test "upperString" { var buf: [1024]u8 = undefined; const result = upperString(&buf, "aBcDeFgHiJkLmNOPqrst0234+💩!"); try std.testing.expectEqualStrings("ABCDEFGHIJKLMNOPQRST0234+💩!", result); } /// Allocates an upper case copy of `ascii_string`. /// Caller owns returned string and must free with `allocator`. pub fn allocUpperString(allocator: std.mem.Allocator, ascii_string: []const u8) ![]u8 { const result = try allocator.alloc(u8, ascii_string.len); return upperString(result, ascii_string); } test "allocUpperString" { const result = try allocUpperString(testing.allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!"); defer testing.allocator.free(result); try testing.expectEqualStrings("ABCDEFGHIJKLMNOPQRST0234+💩!", result); } /// Compares strings `a` and `b` case-insensitively and returns whether they are equal. pub fn eqlInsensitive(a: []const u8, b: []const u8) bool { if (a.len != b.len) return false; for (a) |a_c, i| { if (toLower(a_c) != toLower(b[i])) return false; } return true; } test "eqlInsensitive" { try std.testing.expect(eqlInsensitive("HEl💩Lo!", "hel💩lo!")); try std.testing.expect(!eqlInsensitive("hElLo!", "hello! ")); try std.testing.expect(!eqlInsensitive("hElLo!", "helro!")); } pub fn startsWithInsensitive(haystack: []const u8, needle: []const u8) bool { return if (needle.len > haystack.len) false else eqlInsensitive(haystack[0..needle.len], needle); } test "ascii.startsWithInsensitive" { try std.testing.expect(startsWithInsensitive("boB", "Bo")); try std.testing.expect(!startsWithInsensitive("Needle in hAyStAcK", "haystack")); } pub fn endsWithInsensitive(haystack: []const u8, needle: []const u8) bool { return if (needle.len > haystack.len) false else eqlInsensitive(haystack[haystack.len - needle.len ..], needle); } test "ascii.endsWithInsensitive" { try std.testing.expect(endsWithInsensitive("Needle in HaYsTaCk", "haystack")); try std.testing.expect(!endsWithInsensitive("BoB", "Bo")); } /// Finds `substr` in `container`, ignoring case, starting at `start_index`. /// TODO boyer-moore algorithm pub fn indexOfInsensitivePos(container: []const u8, start_index: usize, substr: []const u8) ?usize { if (substr.len > container.len) return null; var i: usize = start_index; const end = container.len - substr.len; while (i <= end) : (i += 1) { if (eqlInsensitive(container[i .. i + substr.len], substr)) return i; } return null; } /// Finds `substr` in `container`, ignoring case, starting at index 0. pub fn indexOfInsensitive(container: []const u8, substr: []const u8) ?usize { return indexOfInsensitivePos(container, 0, substr); } test "indexOfInsensitive" { try std.testing.expect(indexOfInsensitive("one Two Three Four", "foUr").? == 14); try std.testing.expect(indexOfInsensitive("one two three FouR", "gOur") == null); try std.testing.expect(indexOfInsensitive("foO", "Foo").? == 0); try std.testing.expect(indexOfInsensitive("foo", "fool") == null); try std.testing.expect(indexOfInsensitive("FOO foo", "fOo").? == 0); } /// Compares two slices of numbers lexicographically. O(n). pub fn orderInsensitive(lhs: []const u8, rhs: []const u8) std.math.Order { const n = std.math.min(lhs.len, rhs.len); var i: usize = 0; while (i < n) : (i += 1) { switch (std.math.order(toLower(lhs[i]), toLower(rhs[i]))) { .eq => continue, .lt => return .lt, .gt => return .gt, } } return std.math.order(lhs.len, rhs.len); } /// Returns true if lhs < rhs, false otherwise pub fn lessThanInsensitive(lhs: []const u8, rhs: []const u8) bool { return orderInsensitive(lhs, rhs) == .lt; }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree