From 3cb132517649893392be7cb9785d568693900648 Mon Sep 17 00:00:00 2001 From: Jeeves Date: Fri, 14 Feb 2025 16:56:40 -0700 Subject: [PATCH] init --- .gitattributes | 1 + .gitignore | 19 ++ build.zig | 36 +++ build.zig.zon | 11 + flake.lock | 78 ++++++ flake.nix | 41 ++++ src/main.zig | 280 +++++++++++++++++++++ src/xml.zig | 646 +++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1112 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 src/main.zig create mode 100644 src/xml.zig diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6313b56 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f27e682 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +# This file is for zig-specific build artifacts. +# If you have OS-specific or editor-specific files to ignore, +# such as *.swp or .DS_Store, put those in your global +# ~/.gitignore and put this in your ~/.gitconfig: +# +# [core] +# excludesfile = ~/.gitignore +# +# Cheers! +# -andrewrk + +.zig-cache/ +zig-cache/ +zig-out/ +/release/ +/debug/ +/build/ +/build-*/ +/docgen_tmp/ diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..31c3f52 --- /dev/null +++ b/build.zig @@ -0,0 +1,36 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const exe_mod = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const exe = b.addExecutable(.{ + .name = "streamboy", + .root_module = exe_mod, + }); + b.installArtifact(exe); + + const run_cmd = b.addRunArtifact(exe); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + const exe_unit_tests = b.addTest(.{ + .root_module = exe_mod, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_exe_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..e2f4d43 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,11 @@ +.{ + .name = "streamboy", + .version = "0.0.0", + .minimum_zig_version = "0.14.0", + .dependencies = .{}, + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + }, +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..2cdb40e --- /dev/null +++ b/flake.lock @@ -0,0 +1,78 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1736817698, + "narHash": "sha256-1m+JP9RUsbeLVv/tF1DX3Ew9Vl/fatXnlh/g5k3jcSk=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "2b1fca3296ddd1602d2c4f104a4050e006f4b0cb", + "type": "github" + }, + "original": { + "owner": "nixos", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "zig2nix": "zig2nix" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "zig2nix": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1739496552, + "narHash": "sha256-if34rjhH/CXZQTnAl629tVdz/mAx/fifjTPRPQsJ1tg=", + "owner": "Cloudef", + "repo": "zig2nix", + "rev": "0dae566efe9a0ed18c07b76a5ed8ff2c546bdd56", + "type": "github" + }, + "original": { + "owner": "Cloudef", + "repo": "zig2nix", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..5b36224 --- /dev/null +++ b/flake.nix @@ -0,0 +1,41 @@ +{ + inputs = { + zig2nix.url = "github:Cloudef/zig2nix"; + }; + + outputs = { zig2nix, ... }: let + flake-utils = zig2nix.inputs.flake-utils; + in (flake-utils.lib.eachDefaultSystem (system: let + env = zig2nix.outputs.zig-env.${system} { zig = zig2nix.outputs.packages.${system}.zig.master.bin; }; + system-triple = env.lib.zigTripleFromString system; + in with builtins; with env.lib; with env.pkgs.lib; rec { + packages.target = genAttrs allTargetTriples (target: env.packageForTarget target ({ + src = cleanSource ./.; + + nativeBuildInputs = with env.pkgs; []; + buildInputs = with env.pkgsForTarget target; []; + + zigPreferMusl = true; + zigDisableWrap = true; + })); + + packages.default = packages.target.${system-triple}.override { + zigPreferMusl = false; + zigDisableWrap = false; + }; + + apps.bundle.default = apps.bundle.target.${system-triple}; + + apps.default = env.app [] "zig build run -- \"$@\""; + apps.build = env.app [] "zig build \"$@\""; + apps.test = env.app [] "zig build test -- \"$@\""; + apps.docs = env.app [] "zig build docs -- \"$@\""; + apps.deps = env.showExternalDeps; + + apps.zon2json = env.app [env.zon2json] "zon2json \"$@\""; + apps.zon2json-lock = env.app [env.zon2json-lock] "zon2json-lock \"$@\""; + apps.zon2nix = env.app [env.zon2nix] "zon2nix \"$@\""; + + devShells.default = env.mkShell {}; + })); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..40e22ff --- /dev/null +++ b/src/main.zig @@ -0,0 +1,280 @@ +const std = @import("std"); +const xml = @import("./xml.zig"); + +// Music Player: +// - Launch VLC and control it via HTTP interface +// - Play music randomly from a VLC playlist +// - Allow media control (play pause, next, prev) +// - Expose OBS Browser Source to display current track information. +// + Add and remove songs from playlist +// + Allow voting on songs (web interface? chat bot?) +// +// Chat bot: +// - Support Twitch chat +// + Support YouTube +// +// A - means immediately, a + means eventually. + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + // var vlc = std.process.Child.init(&[_][]const u8{ + // "vlc", + // "--intf", + // "http", + // "--http-host", + // "localhost", + // "--http-password", + // "1234", + // }, allocator); + // try vlc.spawn(); + + // const buf = try allocator.alloc(u8, 1024 * 32); + // defer allocator.free(buf); + + var tz_file = try std.fs.openFileAbsolute("/etc/localtime", .{}); + defer tz_file.close(); + var tz = try std.tz.Tz.parse(allocator, tz_file.reader()); + defer tz.deinit(); + + while (true) : (std.time.sleep(1_000_000_000)) { // sleep 500ms + try updateTime(tz); + try updateStatus(allocator); + scroll += 1; + } + // ♪ Red Soul (Super Metroid) - Nostalvania, The OC Jazz Collective + + // try vlc.kill(); +} + +const base64_encoder = std.base64.standard.Encoder; + +const topic = "Something Fun For Everyone With Streamboy!!!!!!!!"; + +var scroll: usize = 0; + +fn updateStatus(allocator: std.mem.Allocator) !void { + var stream_info_file = try std.fs.createFileAbsolute("/tmp/streaminfo", .{ .truncate = true }); + defer stream_info_file.close(); + + var song_info = try getSongInfo(allocator); + defer song_info.deinit(allocator); + + const string = try std.fmt.allocPrint(allocator, "{s} | ♪ {s} - {s} | ", .{ + topic, + song_info.title orelse "Unknown Title", + song_info.artist orelse "Unknown Artist", + }); + defer allocator.free(string); + + if (scroll > string.len) scroll = 0; + if (scroll == 0) try stream_info_file.writeAll(string) else { + for (string[scroll..]) |char| try stream_info_file.writer().writeByte(char); + for (string[0..scroll]) |char| try stream_info_file.writer().writeByte(char); + } +} + +const SongInfo = struct { + title: ?[]const u8, + album: ?[]const u8, + artist: ?[]const u8, + + pub fn deinit(self: *SongInfo, allocator: std.mem.Allocator) void { + if (self.title) |b| allocator.free(b); + if (self.album) |b| allocator.free(b); + if (self.artist) |b| allocator.free(b); + } +}; + +fn getSongInfo(allocator: std.mem.Allocator) !SongInfo { + var http = std.http.Client{ .allocator = allocator }; + defer http.deinit(); + + const userpass = try std.fmt.allocPrint(allocator, ":{s}", .{"1234"}); + defer allocator.free(userpass); + + const base64_userpass = try allocator.alloc(u8, base64_encoder.calcSize(userpass.len)); + defer allocator.free(base64_userpass); + + const final_userpass = try std.fmt.allocPrint(allocator, "Basic {s}", .{base64_encoder.encode(base64_userpass, userpass)}); + defer allocator.free(final_userpass); + + var response = std.ArrayList(u8).init(allocator); + defer response.deinit(); + + const result = try http.fetch(.{ + .location = .{ .url = "http://localhost:8080/requests/status.xml" }, + .headers = .{ .authorization = .{ .override = final_userpass } }, + .response_storage = .{ .dynamic = &response }, + }); + + std.debug.print("{any}\n{s}\n", .{ result, response.items }); + + const document = try xml.parse(allocator, response.items); + defer document.deinit(); + + var title: ?[]const u8 = null; + var album: ?[]const u8 = null; + var artist: ?[]const u8 = null; + + if (document.root.findChildByTag("information")) |information| { + var categories_it = information.findChildrenByTag("category"); + while (categories_it.next()) |category| { + if (std.mem.eql(u8, category.getAttribute("name").?, "meta")) { + var info_it = category.findChildrenByTag("info"); + while (info_it.next()) |info| { + const info_name = info.getAttribute("name").?; + if (std.mem.eql(u8, info_name, "title")) + title = try processHtmlString(allocator, info.children[0].char_data) + else if (std.mem.eql(u8, info_name, "album")) + album = try processHtmlString(allocator, info.children[0].char_data) + else if (std.mem.eql(u8, info_name, "artist")) + artist = try processHtmlString(allocator, info.children[0].char_data); + } + } + } + } + + // var new_title: ?[]const u8 = null; + // if (title) |t| { + // std.debug.print("{s}\n", .{t}); + + // // if (std.mem.indexOf(u8, t, "&#")) |amp| { + // // if (std.mem.indexOfScalarPos(u8, t, amp, ';')) |semi| { + // // const int = try std.fmt.parseInt(u8, t[amp + 2 .. semi], 10); + // // const new = try allocator.alloc(u8, std.mem.replacementSize(u8, t, t[amp .. semi + 1], &[1]u8{int})); + // // errdefer allocator.free(new); + // // _ = std.mem.replace(u8, t, t[amp .. semi + 1], &[1]u8{int}, new); + // // new_title = new; + // // } + // // } + // } + + // if (title) |t| title = try allocator.dupe(u8, t); + // errdefer if (title) |t| allocator.free(t); + // if (album) |a| album = try allocator.dupe(u8, a); + // errdefer if (album) |a| allocator.free(a); + // if (artist) |a| artist = try allocator.dupe(u8, a); + // errdefer if (artist) |a| allocator.free(a); + + return .{ + .title = title, + .album = album, + .artist = artist, + }; +} + +fn processHtmlString(allocator: std.mem.Allocator, string: []const u8) ![]const u8 { + var new: []u8 = try allocator.dupe(u8, string); + errdefer allocator.free(new); + while (true) { + if (std.mem.indexOf(u8, new, "&#")) |amp| { + if (std.mem.indexOfScalarPos(u8, new, amp, ';')) |semi| { + const int = try std.fmt.parseInt(u8, new[amp + 2 .. semi], 10); + const nnew = try allocator.alloc(u8, std.mem.replacementSize(u8, new, new[amp .. semi + 1], &[1]u8{int})); + _ = std.mem.replace(u8, new, new[amp .. semi + 1], &[1]u8{int}, nnew); + allocator.free(new); + new = nnew; + } + } else break; + } + std.debug.print("{s}\n", .{new}); + return new; +} + +fn updateTime(tz: std.Tz) !void { + const original_timestamp = std.time.timestamp(); + + var timetype: *std.tz.Timetype = undefined; + for (tz.transitions, 0..) |trans, i| + if (trans.ts >= original_timestamp) { + timetype = tz.transitions[i - 1].timetype; + break; + }; + + const timestamp = std.time.timestamp() + timetype.offset + std.time.s_per_day; + + const epoch_seconds = std.time.epoch.EpochSeconds{ .secs = @intCast(timestamp) }; + const day_seconds = epoch_seconds.getDaySeconds(); + const epoch_day = epoch_seconds.getEpochDay(); + const year_day = epoch_day.calculateYearDay(); + const month_day = year_day.calculateMonthDay(); + + const ampm = getAmPm(day_seconds); + + var file = try std.fs.createFileAbsolute("/tmp/time", .{ .truncate = true }); + defer file.close(); + + try file.writer().print("{s}, {s} {d} {d}, {d:0>2}:{d:0>2}:{d:0>2} {s}", .{ + getDayOfWeekName(getDayOfWeek(epoch_day)), + getMonthName(month_day.month), + month_day.day_index, + year_day.year, + ampm.hour, + day_seconds.getMinutesIntoHour(), + day_seconds.getSecondsIntoMinute(), + if (ampm.is_pm) "PM" else "AM", + }); +} + +// example format +// Friday 14 February 2025 10:32:48 AM + +fn getMonthName(month: std.time.epoch.Month) []const u8 { + return switch (month) { + .jan => "January", + .feb => "February", + .mar => "March", + .apr => "April", + .may => "May", + .jun => "June", + .jul => "July", + .aug => "August", + .sep => "September", + .oct => "October", + .nov => "November", + .dec => "December", + }; +} + +// Jan 1 1970 was a Thursday, so we make that 1 +const DayOfWeek = enum(u3) { + wed = 0, + thu, + fri, + sat, + sun, + mon, + tue, +}; + +fn getDayOfWeek(epoch_day: std.time.epoch.EpochDay) DayOfWeek { + return @enumFromInt(@mod(epoch_day.day, 7)); +} + +fn getDayOfWeekName(day_of_week: DayOfWeek) []const u8 { + return switch (day_of_week) { + .sun => "Sunday", + .mon => "Monday", + .tue => "Tuesday", + .wed => "Wednesday", + .thu => "Thursday", + .fri => "Friday", + .sat => "Saturday", + }; +} + +const AmPm = struct { + hour: u4, + is_pm: bool, +}; + +fn getAmPm(day_seconds: std.time.epoch.DaySeconds) AmPm { + const hour = day_seconds.getHoursIntoDay(); + return .{ + .hour = if (hour < 13) @intCast(hour) else @intCast(hour - 12), + .is_pm = hour != 0 and hour > 11, + }; +} diff --git a/src/xml.zig b/src/xml.zig new file mode 100644 index 0000000..94f8719 --- /dev/null +++ b/src/xml.zig @@ -0,0 +1,646 @@ +// Copyright © 2020-2022 Robin Voetter +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; +const Allocator = mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; + +pub const Attribute = struct { + name: []const u8, + value: []const u8, +}; + +pub const Content = union(enum) { + char_data: []const u8, + comment: []const u8, + element: *Element, +}; + +pub const Element = struct { + tag: []const u8, + attributes: []Attribute = &.{}, + children: []Content = &.{}, + + pub fn getAttribute(self: Element, attrib_name: []const u8) ?[]const u8 { + for (self.attributes) |child| { + if (mem.eql(u8, child.name, attrib_name)) { + return child.value; + } + } + + return null; + } + + pub fn getCharData(self: Element, child_tag: []const u8) ?[]const u8 { + const child = self.findChildByTag(child_tag) orelse return null; + if (child.children.len != 1) { + return null; + } + + return switch (child.children[0]) { + .char_data => |char_data| char_data, + else => null, + }; + } + + pub fn iterator(self: Element) ChildIterator { + return .{ + .items = self.children, + .i = 0, + }; + } + + pub fn elements(self: Element) ChildElementIterator { + return .{ + .inner = self.iterator(), + }; + } + + pub fn findChildByTag(self: Element, tag: []const u8) ?*Element { + var it = self.findChildrenByTag(tag); + return it.next(); + } + + pub fn findChildrenByTag(self: Element, tag: []const u8) FindChildrenByTagIterator { + return .{ + .inner = self.elements(), + .tag = tag, + }; + } + + pub const ChildIterator = struct { + items: []Content, + i: usize, + + pub fn next(self: *ChildIterator) ?*Content { + if (self.i < self.items.len) { + self.i += 1; + return &self.items[self.i - 1]; + } + + return null; + } + }; + + pub const ChildElementIterator = struct { + inner: ChildIterator, + + pub fn next(self: *ChildElementIterator) ?*Element { + while (self.inner.next()) |child| { + if (child.* != .element) { + continue; + } + + return child.*.element; + } + + return null; + } + }; + + pub const FindChildrenByTagIterator = struct { + inner: ChildElementIterator, + tag: []const u8, + + pub fn next(self: *FindChildrenByTagIterator) ?*Element { + while (self.inner.next()) |child| { + if (!mem.eql(u8, child.tag, self.tag)) { + continue; + } + + return child; + } + + return null; + } + }; +}; + +pub const Document = struct { + arena: ArenaAllocator, + xml_decl: ?*Element, + root: *Element, + + pub fn deinit(self: Document) void { + var arena = self.arena; // Copy to stack so self can be taken by value. + arena.deinit(); + } +}; + +const Parser = struct { + source: []const u8, + offset: usize, + line: usize, + column: usize, + + fn init(source: []const u8) Parser { + return .{ + .source = source, + .offset = 0, + .line = 0, + .column = 0, + }; + } + + fn peek(self: *Parser) ?u8 { + return if (self.offset < self.source.len) self.source[self.offset] else null; + } + + fn consume(self: *Parser) !u8 { + if (self.offset < self.source.len) { + return self.consumeNoEof(); + } + + return error.UnexpectedEof; + } + + fn consumeNoEof(self: *Parser) u8 { + std.debug.assert(self.offset < self.source.len); + const c = self.source[self.offset]; + self.offset += 1; + + if (c == '\n') { + self.line += 1; + self.column = 0; + } else { + self.column += 1; + } + + return c; + } + + fn eat(self: *Parser, char: u8) bool { + self.expect(char) catch return false; + return true; + } + + fn expect(self: *Parser, expected: u8) !void { + if (self.peek()) |actual| { + if (expected != actual) { + return error.UnexpectedCharacter; + } + + _ = self.consumeNoEof(); + return; + } + + return error.UnexpectedEof; + } + + fn eatStr(self: *Parser, text: []const u8) bool { + self.expectStr(text) catch return false; + return true; + } + + fn expectStr(self: *Parser, text: []const u8) !void { + if (self.source.len < self.offset + text.len) { + return error.UnexpectedEof; + } else if (mem.startsWith(u8, self.source[self.offset..], text)) { + var i: usize = 0; + while (i < text.len) : (i += 1) { + _ = self.consumeNoEof(); + } + + return; + } + + return error.UnexpectedCharacter; + } + + fn eatWs(self: *Parser) bool { + var ws = false; + + while (self.peek()) |ch| { + switch (ch) { + ' ', '\t', '\n', '\r' => { + ws = true; + _ = self.consumeNoEof(); + }, + else => break, + } + } + + return ws; + } + + fn expectWs(self: *Parser) !void { + if (!self.eatWs()) return error.UnexpectedCharacter; + } + + fn currentLine(self: Parser) []const u8 { + var begin: usize = 0; + if (mem.lastIndexOfScalar(u8, self.source[0..self.offset], '\n')) |prev_nl| { + begin = prev_nl + 1; + } + + const end = mem.indexOfScalarPos(u8, self.source, self.offset, '\n') orelse self.source.len; + return self.source[begin..end]; + } +}; + +test "xml: Parser" { + { + var parser = Parser.init("I like pythons"); + try testing.expectEqual(@as(?u8, 'I'), parser.peek()); + try testing.expectEqual(@as(u8, 'I'), parser.consumeNoEof()); + try testing.expectEqual(@as(?u8, ' '), parser.peek()); + try testing.expectEqual(@as(u8, ' '), try parser.consume()); + + try testing.expect(parser.eat('l')); + try testing.expectEqual(@as(?u8, 'i'), parser.peek()); + try testing.expectEqual(false, parser.eat('a')); + try testing.expectEqual(@as(?u8, 'i'), parser.peek()); + + try parser.expect('i'); + try testing.expectEqual(@as(?u8, 'k'), parser.peek()); + try testing.expectError(error.UnexpectedCharacter, parser.expect('a')); + try testing.expectEqual(@as(?u8, 'k'), parser.peek()); + + try testing.expect(parser.eatStr("ke")); + try testing.expectEqual(@as(?u8, ' '), parser.peek()); + + try testing.expect(parser.eatWs()); + try testing.expectEqual(@as(?u8, 'p'), parser.peek()); + try testing.expectEqual(false, parser.eatWs()); + try testing.expectEqual(@as(?u8, 'p'), parser.peek()); + + try testing.expectEqual(false, parser.eatStr("aaaaaaaaa")); + try testing.expectEqual(@as(?u8, 'p'), parser.peek()); + + try testing.expectError(error.UnexpectedEof, parser.expectStr("aaaaaaaaa")); + try testing.expectEqual(@as(?u8, 'p'), parser.peek()); + try testing.expectError(error.UnexpectedCharacter, parser.expectStr("pytn")); + try testing.expectEqual(@as(?u8, 'p'), parser.peek()); + try parser.expectStr("python"); + try testing.expectEqual(@as(?u8, 's'), parser.peek()); + } + + { + var parser = Parser.init(""); + try testing.expectEqual(parser.peek(), null); + try testing.expectError(error.UnexpectedEof, parser.consume()); + try testing.expectEqual(parser.eat('p'), false); + try testing.expectError(error.UnexpectedEof, parser.expect('p')); + } +} + +pub const ParseError = error{ + IllegalCharacter, + UnexpectedEof, + UnexpectedCharacter, + UnclosedValue, + UnclosedComment, + InvalidName, + InvalidEntity, + InvalidStandaloneValue, + NonMatchingClosingTag, + InvalidDocument, + OutOfMemory, +}; + +pub fn parse(backing_allocator: Allocator, source: []const u8) !Document { + var parser = Parser.init(source); + return try parseDocument(&parser, backing_allocator); +} + +fn parseDocument(parser: *Parser, backing_allocator: Allocator) !Document { + var doc = Document{ + .arena = ArenaAllocator.init(backing_allocator), + .xml_decl = null, + .root = undefined, + }; + + errdefer doc.deinit(); + + const allocator = doc.arena.allocator(); + + try skipComments(parser, allocator); + + doc.xml_decl = try parseElement(parser, allocator, .xml_decl); + _ = parser.eatWs(); + try skipComments(parser, allocator); + + doc.root = (try parseElement(parser, allocator, .element)) orelse return error.InvalidDocument; + _ = parser.eatWs(); + try skipComments(parser, allocator); + + if (parser.peek() != null) return error.InvalidDocument; + + return doc; +} + +fn parseAttrValue(parser: *Parser, alloc: Allocator) ![]const u8 { + const quote = try parser.consume(); + if (quote != '"' and quote != '\'') return error.UnexpectedCharacter; + + const begin = parser.offset; + + while (true) { + const c = parser.consume() catch return error.UnclosedValue; + if (c == quote) break; + } + + const end = parser.offset - 1; + + return try unescape(alloc, parser.source[begin..end]); +} + +fn parseEqAttrValue(parser: *Parser, alloc: Allocator) ![]const u8 { + _ = parser.eatWs(); + try parser.expect('='); + _ = parser.eatWs(); + + return try parseAttrValue(parser, alloc); +} + +fn parseNameNoDupe(parser: *Parser) ![]const u8 { + // XML's spec on names is very long, so to make this easier + // we just take any character that is not special and not whitespace + const begin = parser.offset; + + while (parser.peek()) |ch| { + switch (ch) { + ' ', '\t', '\n', '\r' => break, + '&', '"', '\'', '<', '>', '?', '=', '/' => break, + else => _ = parser.consumeNoEof(), + } + } + + const end = parser.offset; + if (begin == end) return error.InvalidName; + + return parser.source[begin..end]; +} + +fn parseCharData(parser: *Parser, alloc: Allocator) !?[]const u8 { + const begin = parser.offset; + + while (parser.peek()) |ch| { + switch (ch) { + '<' => break, + else => _ = parser.consumeNoEof(), + } + } + + const end = parser.offset; + if (begin == end) return null; + + return try unescape(alloc, parser.source[begin..end]); +} + +fn parseContent(parser: *Parser, alloc: Allocator) ParseError!Content { + if (try parseCharData(parser, alloc)) |cd| { + return Content{ .char_data = cd }; + } else if (try parseComment(parser, alloc)) |comment| { + return Content{ .comment = comment }; + } else if (try parseElement(parser, alloc, .element)) |elem| { + return Content{ .element = elem }; + } else { + return error.UnexpectedCharacter; + } +} + +fn parseAttr(parser: *Parser, alloc: Allocator) !?Attribute { + const name = parseNameNoDupe(parser) catch return null; + _ = parser.eatWs(); + try parser.expect('='); + _ = parser.eatWs(); + const value = try parseAttrValue(parser, alloc); + + const attr = Attribute{ + .name = try alloc.dupe(u8, name), + .value = value, + }; + return attr; +} + +const ElementKind = enum { + xml_decl, + element, +}; + +fn parseElement(parser: *Parser, alloc: Allocator, comptime kind: ElementKind) !?*Element { + const start = parser.offset; + + const tag = switch (kind) { + .xml_decl => blk: { + if (!parser.eatStr(" blk: { + if (!parser.eat('<')) return null; + const tag = parseNameNoDupe(parser) catch { + parser.offset = start; + return null; + }; + break :blk tag; + }, + }; + + var attributes = std.ArrayList(Attribute).init(alloc); + defer attributes.deinit(); + + var children = std.ArrayList(Content).init(alloc); + defer children.deinit(); + + while (parser.eatWs()) { + const attr = (try parseAttr(parser, alloc)) orelse break; + try attributes.append(attr); + } + + switch (kind) { + .xml_decl => try parser.expectStr("?>"), + .element => { + if (!parser.eatStr("/>")) { + try parser.expect('>'); + + while (true) { + if (parser.peek() == null) { + return error.UnexpectedEof; + } else if (parser.eatStr("'); + } + }, + } + + const element = try alloc.create(Element); + element.* = .{ + .tag = try alloc.dupe(u8, tag), + .attributes = try attributes.toOwnedSlice(), + .children = try children.toOwnedSlice(), + }; + return element; +} + +test "xml: parseElement" { + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + { + var parser = Parser.init("<= a='b'/>"); + try testing.expectEqual(@as(?*Element, null), try parseElement(&parser, alloc, .element)); + try testing.expectEqual(@as(?u8, '<'), parser.peek()); + } + + { + var parser = Parser.init(""); + const elem = try parseElement(&parser, alloc, .element); + try testing.expectEqualSlices(u8, elem.?.tag, "python"); + + const size_attr = elem.?.attributes[0]; + try testing.expectEqualSlices(u8, size_attr.name, "size"); + try testing.expectEqualSlices(u8, size_attr.value, "15"); + + const color_attr = elem.?.attributes[1]; + try testing.expectEqualSlices(u8, color_attr.name, "color"); + try testing.expectEqualSlices(u8, color_attr.value, "green"); + } + + { + var parser = Parser.init("test"); + const elem = try parseElement(&parser, alloc, .element); + try testing.expectEqualSlices(u8, elem.?.tag, "python"); + try testing.expectEqualSlices(u8, elem.?.children[0].char_data, "test"); + } + + { + var parser = Parser.init("bdf"); + const elem = try parseElement(&parser, alloc, .element); + try testing.expectEqualSlices(u8, elem.?.tag, "a"); + try testing.expectEqualSlices(u8, elem.?.children[0].char_data, "b"); + try testing.expectEqualSlices(u8, elem.?.children[1].element.tag, "c"); + try testing.expectEqualSlices(u8, elem.?.children[2].char_data, "d"); + try testing.expectEqualSlices(u8, elem.?.children[3].element.tag, "e"); + try testing.expectEqualSlices(u8, elem.?.children[4].char_data, "f"); + try testing.expectEqualSlices(u8, elem.?.children[5].comment, "g"); + } +} + +test "xml: parse prolog" { + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + { + var parser = Parser.init(""); + try testing.expectEqual(@as(?*Element, null), try parseElement(&parser, a, .xml_decl)); + try testing.expectEqual(@as(?u8, '<'), parser.peek()); + } + + { + var parser = Parser.init(""); + const decl = try parseElement(&parser, a, .xml_decl); + try testing.expectEqualSlices(u8, "aa", decl.?.getAttribute("version").?); + try testing.expectEqual(@as(?[]const u8, null), decl.?.getAttribute("encoding")); + try testing.expectEqual(@as(?[]const u8, null), decl.?.getAttribute("standalone")); + } + + { + var parser = Parser.init(""); + const decl = try parseElement(&parser, a, .xml_decl); + try testing.expectEqualSlices(u8, "ccc", decl.?.getAttribute("version").?); + try testing.expectEqualSlices(u8, "bbb", decl.?.getAttribute("encoding").?); + try testing.expectEqualSlices(u8, "yes", decl.?.getAttribute("standalone").?); + } +} + +fn skipComments(parser: *Parser, alloc: Allocator) !void { + while ((try parseComment(parser, alloc)) != null) { + _ = parser.eatWs(); + } +} + +fn parseComment(parser: *Parser, alloc: Allocator) !?[]const u8 { + if (!parser.eatStr("")) { + _ = parser.consume() catch return error.UnclosedComment; + } + + const end = parser.offset - "-->".len; + return try alloc.dupe(u8, parser.source[begin..end]); +} + +fn unescapeEntity(text: []const u8) !u8 { + const EntitySubstition = struct { text: []const u8, replacement: u8 }; + + const entities = [_]EntitySubstition{ + .{ .text = "<", .replacement = '<' }, + .{ .text = ">", .replacement = '>' }, + .{ .text = "&", .replacement = '&' }, + .{ .text = "'", .replacement = '\'' }, + .{ .text = """, .replacement = '"' }, + }; + + for (entities) |entity| { + if (mem.eql(u8, text, entity.text)) return entity.replacement; + } + + return error.InvalidEntity; +} + +fn unescape(arena: Allocator, text: []const u8) ![]const u8 { + const unescaped = try arena.alloc(u8, text.len); + + var j: usize = 0; + var i: usize = 0; + while (i < text.len) : (j += 1) { + if (text[i] == '&') { + const entity_end = 1 + (mem.indexOfScalarPos(u8, text, i, ';') orelse return error.InvalidEntity); + unescaped[j] = try unescapeEntity(text[i..entity_end]); + i = entity_end; + } else { + unescaped[j] = text[i]; + i += 1; + } + } + + return unescaped[0..j]; +} + +test "xml: unescape" { + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + try testing.expectEqualSlices(u8, "test", try unescape(a, "test")); + try testing.expectEqualSlices(u8, "ad\"e'f<", try unescape(a, "a<b&c>d"e'f<")); + try testing.expectError(error.InvalidEntity, unescape(a, "python&")); + try testing.expectError(error.InvalidEntity, unescape(a, "python&&")); + try testing.expectError(error.InvalidEntity, unescape(a, "python&test;")); + try testing.expectError(error.InvalidEntity, unescape(a, "python&boa")); +} + +test "xml: top level comments" { + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + const doc = try parse(a, ""); + try testing.expectEqualSlices(u8, "python", doc.root.tag); +}