first commit

This commit is contained in:
2025-08-15 13:34:17 +02:00
commit 104b32694b
8 changed files with 11993 additions and 0 deletions

92
src/lib.zig Normal file
View File

@@ -0,0 +1,92 @@
const std = @import("std");
const rand = std.crypto.random;
pub const DataPoint = struct {
char: u8,
prob: f32,
pub fn desc(context: void, a: DataPoint, b: DataPoint) bool {
_ = context;
return a.prob > b.prob;
}
};
pub const MarkovChain = struct {
allocator: std.mem.Allocator,
map: std.AutoHashMap(u8, []DataPoint),
pub fn init(path: []const u8, allocator: std.mem.Allocator) !MarkovChain {
var self = MarkovChain{
.allocator = allocator,
.map = std.AutoHashMap(u8, []DataPoint).init(allocator),
};
var markovBinFile = try std.fs.cwd().openFile(path, .{ .mode = .read_only });
var reader = markovBinFile.reader();
for (0..256) |prevChar| {
const cnt = try reader.readInt(u8, .little);
//std.debug.print("previous : {c} - cnt : {d}\n", .{ @as(u8, @intCast(prevChar)), cnt });
var nextChars: []DataPoint = try self.allocator.alloc(DataPoint, cnt);
for (0..cnt) |i| {
const nextByte = try reader.readByte();
//std.debug.print("next : {c}\n", .{nextByte});
const prob: f32 = @bitCast(try reader.readInt(u32, .little));
nextChars[i] = DataPoint{
.char = nextByte,
.prob = prob,
};
}
try self.map.put(@as(u8, @intCast(prevChar)), nextChars);
}
//std.debug.print("choices for A : {any}\n", .{self.map.get(97).?});
return self;
}
pub fn deinit(self: *MarkovChain) void {
var iter = self.map.iterator();
while (iter.next()) |entry| {
self.allocator.free(entry.value_ptr.*);
}
self.map.deinit();
}
pub fn generate(self: *MarkovChain, size: u8, allocator: std.mem.Allocator) ![]u8 {
var result = try allocator.alloc(u8, size);
var previous: u8 = 0;
for (0..size) |i| {
const choices = self.map.get(previous).?;
const randFloat = rand.float(f32);
var cumul: f32 = 0;
for (0..choices.len) |j| {
cumul += choices[j].prob;
if (randFloat < cumul) {
result[i] = choices[j].char;
previous = choices[j].char;
break;
}
}
}
return result;
}
};
test "basic test" {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer {
_ = gpa.deinit();
}
var markov = try MarkovChain.init("markov.bin", allocator);
defer markov.deinit();
for (0..24) |_| {
const randName = try markov.generate(8, allocator);
defer allocator.free(randName);
std.debug.print("generated : {s}\n", .{randName});
}
}

69
src/main.zig Normal file
View File

@@ -0,0 +1,69 @@
const std = @import("std");
const DataPoint = @import("markov").DataPoint;
pub fn main() !void {
const path = "prenoms.csv";
var markovCnt: [256][256]u32 = undefined;
var computed: [256][256]DataPoint = undefined;
var file = try std.fs.cwd().openFile(path, .{});
defer file.close();
var buf_reader = std.io.bufferedReader(file.reader());
var in_stream = buf_reader.reader();
// init
for (0..256) |i| {
for (0..256) |j| {
markovCnt[i][j] = 0;
}
}
// stats
var buf: [1024]u8 = undefined;
while (try in_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
var previous: u8 = 0;
for (line) |char| {
if (char == ';') break;
markovCnt[previous][char] += 1;
previous = char;
}
}
for (0..256) |i| {
var acc: u32 = 0;
for (0..256) |j| {
acc += markovCnt[i][j];
}
for (0..256) |j| {
const ratio = @as(f32, @floatFromInt(markovCnt[i][j])) / @as(f32, @floatFromInt(acc));
computed[i][j] = DataPoint{ .char = @intCast(j), .prob = ratio };
}
}
for (0..256) |i| {
std.mem.sort(DataPoint, &computed[i], {}, DataPoint.desc);
}
var outFile = try std.fs.cwd().createFile("markov.bin", .{ .truncate = true });
defer outFile.close();
const writer = outFile.writer();
for (0..256) |i| {
var cntnonzero: u8 = 0;
for (0..256) |j| {
if (computed[i][j].prob > 0) {
cntnonzero += 1;
} else {
break;
}
}
try writer.writeInt(u8, cntnonzero, .little);
for (0..cntnonzero) |j| {
try writer.writeByte(computed[i][j].char);
try writer.writeInt(u32, @bitCast(computed[i][j].prob), .little);
}
}
}