From: Ayo Reis Date: Fri, 13 Feb 2026 01:08:29 +0000 (+0000) Subject: Implement scanner X-Git-Url: https://git.ayoreis.com/zlox.git/commitdiff_plain/bebff07499e6ddad8a7b028e38db539372afc2f3?ds=inline;hp=c24edf577000aeb94a80189751501e9b35fbaa6b Implement scanner --- diff --git a/Lox.zig b/Lox.zig new file mode 100644 index 0000000..56a1572 --- /dev/null +++ b/Lox.zig @@ -0,0 +1,76 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Scanner = @import("Scanner.zig"); + +var hadError = false; + +pub fn main() !u8 { + var gpa: std.heap.DebugAllocator(.{}) = .init; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + + if (args.len > 2) { + var stdout_writer = std.fs.File.stdout().writer(&.{}); + const stdout = &stdout_writer.interface; + try stdout.writeAll("Usage: zlox [script]\n"); + return 64; + } else if (args.len == 2) { + return runFile(allocator, args[1]); + } else { + try runPrompt(allocator); + } + + return 0; +} + +fn runFile(allocator: Allocator, path: []const u8) !u8 { + const bytes = try std.fs.cwd().readFileAlloc(allocator, path, std.math.maxInt(usize)); + defer allocator.free(bytes); + try run(allocator, bytes); + // Indicate an error in the exit code. + if (hadError) return 65; + return 0; +} + +fn runPrompt(allocator: Allocator) !void { + var stdin_buffer: [1024]u8 = undefined; + var stdin_reader = std.fs.File.stdin().reader(&stdin_buffer); + const stdin = &stdin_reader.interface; + var stdout_writer = std.fs.File.stdout().writer(&.{}); + const stdout = &stdout_writer.interface; + + while (true) { + try stdout.writeAll("> "); + const line = try stdin.takeDelimiter('\n'); + if (line == null) break; + try run(allocator, line.?); + hadError = false; + } +} + +fn run(allocator: Allocator, source: []const u8) !void { + var scanner = Scanner.init(source); + const tokens = try scanner.scanTokens(allocator); + defer allocator.free(tokens); + + // For now, just print the tokens. + for (tokens) |token| { + std.debug.print("{f}\n", .{token}); + } +} + +pub fn @"error"(line: u32, message: []const u8) !void { + try report(line, "", message); +} + +var stderr_buffer: [1024]u8 = undefined; +var stderr_writer = std.fs.File.stderr().writer(&stderr_buffer); +const stderr = &stderr_writer.interface; + +fn report(line: u32, where: []const u8, message: []const u8) !void { + try stderr.print("[line {}] Error{s}: {s}\n", .{ line, where, message }); + try stderr.flush(); + hadError = true; +} diff --git a/README.md b/README.md index 84b40e2..c2b0cfa 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ # Zlox An implementation of the [Lox language](https://craftinginterpreters.com/the-lox-language.html) in Zig. + +```shell +zig run Lox.zig +``` diff --git a/Scanner.zig b/Scanner.zig new file mode 100644 index 0000000..d971ec5 --- /dev/null +++ b/Scanner.zig @@ -0,0 +1,173 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Token = @import("Token.zig"); +const Literal = Token.Literal; +const Scanner = @This(); +const TokenType = @import("token-type.zig").TokenType; +const Lox = @import("Lox.zig"); + +source: []const u8, +tokens: std.ArrayList(Token) = .empty, +start: u32 = 0, +current: u32 = 0, +line: u32 = 1, + +const keyword: std.StaticStringMap(TokenType) = .initComptime(.{ + .{ "and", .@"and" }, + .{ "class", .class }, + .{ "else", .@"else" }, + .{ "false", .false }, + .{ "for", .@"for" }, + .{ "fun", .fun }, + .{ "if", .@"if" }, + .{ "nil", .nil }, + .{ "or", .@"or" }, + .{ "print", .print }, + .{ "return", .@"return" }, + .{ "super", .super }, + .{ "this", .this }, + .{ "true", .true }, + .{ "var", .@"var" }, + .{ "while", .@"while" }, +}); + +pub fn init(source: []const u8) Scanner { + return .{ + .source = source, + }; +} + +pub fn scanTokens(self: *Scanner, allocator: Allocator) ![]Token { + while (!isAtEnd(self)) { + // We are at the beginning of the next lexeme. + self.start = self.current; + try self.scanToken(allocator); + } + + try self.tokens.append(allocator, .init(.eof, "", null, self.line)); + return try self.tokens.toOwnedSlice(allocator); +} + +fn scanToken(self: *Scanner, allocator: Allocator) !void { + const c = self.advance(); + + switch (c) { + '(' => try self.addToken(allocator, .left_paren, null), + ')' => try self.addToken(allocator, .right_paren, null), + '{' => try self.addToken(allocator, .left_brace, null), + '}' => try self.addToken(allocator, .right_brace, null), + ',' => try self.addToken(allocator, .comma, null), + '.' => try self.addToken(allocator, .dot, null), + '-' => try self.addToken(allocator, .minus, null), + '+' => try self.addToken(allocator, .plus, null), + ';' => try self.addToken(allocator, .semicolon, null), + '*' => try self.addToken(allocator, .star, null), + '!' => try self.addToken(allocator, if (self.match('=')) .bang_equal else .bang, null), + '=' => try self.addToken(allocator, if (self.match('=')) .equal_equal else .equal, null), + '<' => try self.addToken(allocator, if (self.match('=')) .less_equal else .less, null), + '>' => try self.addToken(allocator, if (self.match('=')) .greater_equal else .greater, null), + + '/' => if (self.match('/')) { + while (self.peek() != '\n' and !self.isAtEnd()) _ = self.advance(); + } else { + try self.addToken(allocator, .slash, null); + }, + + ' ', '\r', '\t' => {}, + '\n' => self.line += 1, + '"' => try self.string(allocator), + + else => if (isDigit(c)) { + try self.number(allocator); + } else if (isAlpha(c)) { + try self.identifier(allocator); + } else { + try Lox.@"error"(self.line, "Unexpected character."); + }, + } +} + +fn identifier(self: *Scanner, allocator: Allocator) !void { + while (isAlphanumeric(self.peek())) _ = self.advance(); + const text = self.source[self.start..self.current]; + var @"type" = keyword.get(text); + if (@"type" == null) @"type" = .identifier; + try self.addToken(allocator, @"type".?, null); +} + +fn number(self: *Scanner, allocator: Allocator) !void { + while (isDigit(self.peek())) _ = self.advance(); + + // Look for a fractional part. + if (self.peek() == '.' and isDigit(self.peekNext())) { + // Consume the "." + _ = self.advance(); + + while (isDigit(self.peek())) _ = self.advance(); + } + + try self.addToken(allocator, .number, .{ .number = std.fmt.parseFloat(f64, self.source[self.start..self.current]) catch unreachable }); +} + +fn string(self: *Scanner, allocator: Allocator) !void { + while (self.peek() != '"' and !self.isAtEnd()) { + if (self.peek() == '\n') self.line += 1; + _ = self.advance(); + } + + if (self.isAtEnd()) { + try Lox.@"error"(self.line, "Unterminated string."); + return; + } + + // The closing ". + _ = self.advance(); + + const value = self.source[self.start + 1 .. self.current - 1]; + try self.addToken(allocator, .string, .{ .string = value }); +} + +fn match(self: *Scanner, expected: u8) bool { + if (self.isAtEnd()) return false; + if (self.source[self.current] != expected) return false; + self.current += 1; + return true; +} + +fn peek(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current]; +} + +fn peekNext(self: *Scanner) u8 { + if (self.current + 1 >= self.source.len) return 0; + return self.source[self.current + 1]; +} + +fn isAlpha(c: u8) bool { + return (c >= 'a' and c <= 'z') or + (c >= 'A' and c <= 'Z') or + c == '_'; +} + +fn isAlphanumeric(c: u8) bool { + return isAlpha(c) or isDigit(c); +} + +fn isDigit(c: u8) bool { + return c >= '0' and c <= '9'; +} + +fn isAtEnd(self: *Scanner) bool { + return self.current >= self.source.len; +} + +fn advance(self: *Scanner) u8 { + defer self.current += 1; + return self.source[self.current]; +} + +fn addToken(self: *Scanner, allocator: Allocator, @"type": TokenType, literal: ?Literal) !void { + const text = self.source[self.start..self.current]; + try self.tokens.append(allocator, .init(@"type", text, literal, self.line)); +} diff --git a/Token.zig b/Token.zig new file mode 100644 index 0000000..bf988d8 --- /dev/null +++ b/Token.zig @@ -0,0 +1,26 @@ +const std = @import("std"); +const TokenType = @import("token-type.zig").TokenType; +const Token = @This(); + +type: TokenType, +lexeme: []const u8, +literal: ?Literal, +line: u32, + +pub const Literal = union { + string: []const u8, + number: f64, +}; + +pub fn init(@"type": TokenType, lexeme: []const u8, literal: ?Literal, line: u32) Token { + return .{ + .type = @"type", + .lexeme = lexeme, + .literal = literal, + .line = line, + }; +} + +pub fn format(self: Token, writer: *std.io.Writer) !void { + try writer.print("{} {s} {any}", .{ self.type, self.lexeme, self.literal }); +} diff --git a/main.zig b/main.zig deleted file mode 100644 index f0966fc..0000000 --- a/main.zig +++ /dev/null @@ -1,7 +0,0 @@ -const std = @import("std"); - -pub fn main() !void { - var stdout_writer = std.fs.File.stdout().writer(&.{}); - const stdout = &stdout_writer.interface; - try stdout.writeAll("Hello, Zlox!\n"); -} diff --git a/token-type.zig b/token-type.zig new file mode 100644 index 0000000..8972dfd --- /dev/null +++ b/token-type.zig @@ -0,0 +1,49 @@ +pub const TokenType = enum { + // Single-character tokens. + left_paren, + right_paren, + left_brace, + right_brace, + comma, + dot, + minus, + plus, + semicolon, + slash, + star, + + // One or two character tokens. + bang, + bang_equal, + equal, + equal_equal, + greater, + greater_equal, + less, + less_equal, + + // Literals. + identifier, + string, + number, + + // Keywords. + @"and", + class, + @"else", + false, + fun, + @"for", + @"if", + nil, + @"or", + print, + @"return", + super, + this, + true, + @"var", + @"while", + + eof, +};