first commit
This commit is contained in:
92
src/lib.zig
Normal file
92
src/lib.zig
Normal file
@@ -0,0 +1,92 @@
|
||||
const std = @import("std");
|
||||
const rand = std.crypto.random;
|
||||
|
||||
pub const DataPoint = struct {
|
||||
char: u8,
|
||||
prob: f32,
|
||||
|
||||
pub fn desc(context: void, a: DataPoint, b: DataPoint) bool {
|
||||
_ = context;
|
||||
return a.prob > b.prob;
|
||||
}
|
||||
};
|
||||
|
||||
pub const MarkovChain = struct {
|
||||
allocator: std.mem.Allocator,
|
||||
map: std.AutoHashMap(u8, []DataPoint),
|
||||
|
||||
pub fn init(path: []const u8, allocator: std.mem.Allocator) !MarkovChain {
|
||||
var self = MarkovChain{
|
||||
.allocator = allocator,
|
||||
.map = std.AutoHashMap(u8, []DataPoint).init(allocator),
|
||||
};
|
||||
|
||||
var markovBinFile = try std.fs.cwd().openFile(path, .{ .mode = .read_only });
|
||||
|
||||
var reader = markovBinFile.reader();
|
||||
|
||||
for (0..256) |prevChar| {
|
||||
const cnt = try reader.readInt(u8, .little);
|
||||
//std.debug.print("previous : {c} - cnt : {d}\n", .{ @as(u8, @intCast(prevChar)), cnt });
|
||||
var nextChars: []DataPoint = try self.allocator.alloc(DataPoint, cnt);
|
||||
for (0..cnt) |i| {
|
||||
const nextByte = try reader.readByte();
|
||||
//std.debug.print("next : {c}\n", .{nextByte});
|
||||
const prob: f32 = @bitCast(try reader.readInt(u32, .little));
|
||||
nextChars[i] = DataPoint{
|
||||
.char = nextByte,
|
||||
.prob = prob,
|
||||
};
|
||||
}
|
||||
try self.map.put(@as(u8, @intCast(prevChar)), nextChars);
|
||||
}
|
||||
|
||||
//std.debug.print("choices for A : {any}\n", .{self.map.get(97).?});
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *MarkovChain) void {
|
||||
var iter = self.map.iterator();
|
||||
while (iter.next()) |entry| {
|
||||
self.allocator.free(entry.value_ptr.*);
|
||||
}
|
||||
self.map.deinit();
|
||||
}
|
||||
|
||||
pub fn generate(self: *MarkovChain, size: u8, allocator: std.mem.Allocator) ![]u8 {
|
||||
var result = try allocator.alloc(u8, size);
|
||||
var previous: u8 = 0;
|
||||
for (0..size) |i| {
|
||||
const choices = self.map.get(previous).?;
|
||||
const randFloat = rand.float(f32);
|
||||
var cumul: f32 = 0;
|
||||
for (0..choices.len) |j| {
|
||||
cumul += choices[j].prob;
|
||||
if (randFloat < cumul) {
|
||||
result[i] = choices[j].char;
|
||||
previous = choices[j].char;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
test "basic test" {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
const allocator = gpa.allocator();
|
||||
defer {
|
||||
_ = gpa.deinit();
|
||||
}
|
||||
|
||||
var markov = try MarkovChain.init("markov.bin", allocator);
|
||||
defer markov.deinit();
|
||||
|
||||
for (0..24) |_| {
|
||||
const randName = try markov.generate(8, allocator);
|
||||
defer allocator.free(randName);
|
||||
std.debug.print("generated : {s}\n", .{randName});
|
||||
}
|
||||
}
|
||||
69
src/main.zig
Normal file
69
src/main.zig
Normal file
@@ -0,0 +1,69 @@
|
||||
const std = @import("std");
|
||||
const DataPoint = @import("markov").DataPoint;
|
||||
|
||||
pub fn main() !void {
|
||||
const path = "prenoms.csv";
|
||||
var markovCnt: [256][256]u32 = undefined;
|
||||
var computed: [256][256]DataPoint = undefined;
|
||||
|
||||
var file = try std.fs.cwd().openFile(path, .{});
|
||||
defer file.close();
|
||||
|
||||
var buf_reader = std.io.bufferedReader(file.reader());
|
||||
var in_stream = buf_reader.reader();
|
||||
|
||||
// init
|
||||
for (0..256) |i| {
|
||||
for (0..256) |j| {
|
||||
markovCnt[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// stats
|
||||
|
||||
var buf: [1024]u8 = undefined;
|
||||
while (try in_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
|
||||
var previous: u8 = 0;
|
||||
for (line) |char| {
|
||||
if (char == ';') break;
|
||||
markovCnt[previous][char] += 1;
|
||||
previous = char;
|
||||
}
|
||||
}
|
||||
|
||||
for (0..256) |i| {
|
||||
var acc: u32 = 0;
|
||||
for (0..256) |j| {
|
||||
acc += markovCnt[i][j];
|
||||
}
|
||||
for (0..256) |j| {
|
||||
const ratio = @as(f32, @floatFromInt(markovCnt[i][j])) / @as(f32, @floatFromInt(acc));
|
||||
computed[i][j] = DataPoint{ .char = @intCast(j), .prob = ratio };
|
||||
}
|
||||
}
|
||||
|
||||
for (0..256) |i| {
|
||||
std.mem.sort(DataPoint, &computed[i], {}, DataPoint.desc);
|
||||
}
|
||||
|
||||
var outFile = try std.fs.cwd().createFile("markov.bin", .{ .truncate = true });
|
||||
defer outFile.close();
|
||||
|
||||
const writer = outFile.writer();
|
||||
|
||||
for (0..256) |i| {
|
||||
var cntnonzero: u8 = 0;
|
||||
for (0..256) |j| {
|
||||
if (computed[i][j].prob > 0) {
|
||||
cntnonzero += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
try writer.writeInt(u8, cntnonzero, .little);
|
||||
for (0..cntnonzero) |j| {
|
||||
try writer.writeByte(computed[i][j].char);
|
||||
try writer.writeInt(u32, @bitCast(computed[i][j].prob), .little);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user