first commit
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
zig-out
|
||||
.zig-cache
|
||||
@@ -0,0 +1,116 @@
|
||||
const std = @import("std");
|
||||
|
||||
// Although this function looks imperative, note that its job is to
|
||||
// declaratively construct a build graph that will be executed by an external
|
||||
// runner.
|
||||
pub fn build(b: *std.Build) void {
|
||||
// Standard target options allows the person running `zig build` to choose
|
||||
// what target to build for. Here we do not override the defaults, which
|
||||
// means any target is allowed, and the default is native. Other options
|
||||
// for restricting supported target set are available.
|
||||
const target = b.standardTargetOptions(.{});
|
||||
|
||||
// Standard optimization options allow the person running `zig build` to select
|
||||
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
|
||||
// set a preferred release mode, allowing the user to decide how to optimize.
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
// This creates a "module", which represents a collection of source files alongside
|
||||
// some compilation options, such as optimization mode and linked system libraries.
|
||||
// Every executable or library we compile will be based on one or more modules.
|
||||
const lib_mod = b.createModule(.{
|
||||
// `root_source_file` is the Zig "entry point" of the module. If a module
|
||||
// only contains e.g. external object files, you can make this `null`.
|
||||
// In this case the main source file is merely a path, however, in more
|
||||
// complicated build scripts, this could be a generated file.
|
||||
.root_source_file = b.path("src/lib.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
// We will also create a module for our other entry point, 'main.zig'.
|
||||
const exe_mod = b.createModule(.{
|
||||
// `root_source_file` is the Zig "entry point" of the module. If a module
|
||||
// only contains e.g. external object files, you can make this `null`.
|
||||
// In this case the main source file is merely a path, however, in more
|
||||
// complicated build scripts, this could be a generated file.
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
// Modules can depend on one another using the `std.Build.Module.addImport` function.
|
||||
// This is what allows Zig source code to use `@import("foo")` where 'foo' is not a
|
||||
// file path. In this case, we set up `exe_mod` to import `lib_mod`.
|
||||
exe_mod.addImport("markov", lib_mod);
|
||||
|
||||
// Now, we will create a static library based on the module we created above.
|
||||
// This creates a `std.Build.Step.Compile`, which is the build step responsible
|
||||
// for actually invoking the compiler.
|
||||
const lib = b.addLibrary(.{
|
||||
.linkage = .static,
|
||||
.name = "markov",
|
||||
.root_module = lib_mod,
|
||||
});
|
||||
|
||||
// This declares intent for the library to be installed into the standard
|
||||
// location when the user invokes the "install" step (the default step when
|
||||
// running `zig build`).
|
||||
b.installArtifact(lib);
|
||||
|
||||
// This creates another `std.Build.Step.Compile`, but this one builds an executable
|
||||
// rather than a static library.
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "markov_prenoms",
|
||||
.root_module = exe_mod,
|
||||
});
|
||||
|
||||
// This declares intent for the executable to be installed into the
|
||||
// standard location when the user invokes the "install" step (the default
|
||||
// step when running `zig build`).
|
||||
b.installArtifact(exe);
|
||||
|
||||
// This *creates* a Run step in the build graph, to be executed when another
|
||||
// step is evaluated that depends on it. The next line below will establish
|
||||
// such a dependency.
|
||||
const run_cmd = b.addRunArtifact(exe);
|
||||
|
||||
// By making the run step depend on the install step, it will be run from the
|
||||
// installation directory rather than directly from within the cache directory.
|
||||
// This is not necessary, however, if the application depends on other installed
|
||||
// files, this ensures they will be present and in the expected location.
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
|
||||
// This allows the user to pass arguments to the application in the build
|
||||
// command itself, like this: `zig build run -- arg1 arg2 etc`
|
||||
if (b.args) |args| {
|
||||
run_cmd.addArgs(args);
|
||||
}
|
||||
|
||||
// This creates a build step. It will be visible in the `zig build --help` menu,
|
||||
// and can be selected like this: `zig build run`
|
||||
// This will evaluate the `run` step rather than the default, which is "install".
|
||||
const run_step = b.step("run", "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
|
||||
// Creates a step for unit testing. This only builds the test executable
|
||||
// but does not run it.
|
||||
const lib_unit_tests = b.addTest(.{
|
||||
.root_module = lib_mod,
|
||||
});
|
||||
|
||||
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
|
||||
|
||||
const exe_unit_tests = b.addTest(.{
|
||||
.root_module = exe_mod,
|
||||
});
|
||||
|
||||
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
|
||||
|
||||
// Similar to creating the run step earlier, this exposes a `test` step to
|
||||
// the `zig build --help` menu, providing a way for the user to request
|
||||
// running the unit tests.
|
||||
const test_step = b.step("test", "Run unit tests");
|
||||
test_step.dependOn(&run_lib_unit_tests.step);
|
||||
test_step.dependOn(&run_exe_unit_tests.step);
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
.{
|
||||
// This is the default name used by packages depending on this one. For
|
||||
// example, when a user runs `zig fetch --save <url>`, this field is used
|
||||
// as the key in the `dependencies` table. Although the user can choose a
|
||||
// different name, most users will stick with this provided value.
|
||||
//
|
||||
// It is redundant to include "zig" in this name because it is already
|
||||
// within the Zig package namespace.
|
||||
.name = .markov,
|
||||
|
||||
// This is a [Semantic Version](https://semver.org/).
|
||||
// In a future version of Zig it will be used for package deduplication.
|
||||
.version = "0.1.0",
|
||||
|
||||
// Together with name, this represents a globally unique package
|
||||
// identifier. This field is generated by the Zig toolchain when the
|
||||
// package is first created, and then *never changes*. This allows
|
||||
// unambiguous detection of one package being an updated version of
|
||||
// another.
|
||||
//
|
||||
// When forking a Zig project, this id should be regenerated (delete the
|
||||
// field and run `zig build`) if the upstream project is still maintained.
|
||||
// Otherwise, the fork is *hostile*, attempting to take control over the
|
||||
// original project's identity. Thus it is recommended to leave the comment
|
||||
// on the following line intact, so that it shows up in code reviews that
|
||||
// modify the field.
|
||||
.fingerprint = 0x7f4d0c6d23b24010, // Changing this has security and trust implications.
|
||||
|
||||
// Tracks the earliest Zig version that the package considers to be a
|
||||
// supported use case.
|
||||
.minimum_zig_version = "0.14.1",
|
||||
|
||||
// This field is optional.
|
||||
// Each dependency must either provide a `url` and `hash`, or a `path`.
|
||||
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
|
||||
// Once all dependencies are fetched, `zig build` no longer requires
|
||||
// internet connectivity.
|
||||
.dependencies = .{
|
||||
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
|
||||
//.example = .{
|
||||
// // When updating this field to a new URL, be sure to delete the corresponding
|
||||
// // `hash`, otherwise you are communicating that you expect to find the old hash at
|
||||
// // the new URL. If the contents of a URL change this will result in a hash mismatch
|
||||
// // which will prevent zig from using it.
|
||||
// .url = "https://example.com/foo.tar.gz",
|
||||
//
|
||||
// // This is computed from the file contents of the directory of files that is
|
||||
// // obtained after fetching `url` and applying the inclusion rules given by
|
||||
// // `paths`.
|
||||
// //
|
||||
// // This field is the source of truth; packages do not come from a `url`; they
|
||||
// // come from a `hash`. `url` is just one of many possible mirrors for how to
|
||||
// // obtain a package matching this `hash`.
|
||||
// //
|
||||
// // Uses the [multihash](https://multiformats.io/multihash/) format.
|
||||
// .hash = "...",
|
||||
//
|
||||
// // When this is provided, the package is found in a directory relative to the
|
||||
// // build root. In this case the package's hash is irrelevant and therefore not
|
||||
// // computed. This field and `url` are mutually exclusive.
|
||||
// .path = "foo",
|
||||
//
|
||||
// // When this is set to `true`, a package is declared to be lazily
|
||||
// // fetched. This makes the dependency only get fetched if it is
|
||||
// // actually used.
|
||||
// .lazy = false,
|
||||
//},
|
||||
},
|
||||
|
||||
// Specifies the set of files and directories that are included in this package.
|
||||
// Only files and directories listed here are included in the `hash` that
|
||||
// is computed for this package. Only files listed here will remain on disk
|
||||
// when using the zig package manager. As a rule of thumb, one should list
|
||||
// files required for compilation plus any license(s).
|
||||
// Paths are relative to the build root. Use the empty string (`""`) to refer to
|
||||
// the build root itself.
|
||||
// A directory listed here means that all files within, recursively, are included.
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
"build.zig.zon",
|
||||
"src",
|
||||
// For example...
|
||||
//"LICENSE",
|
||||
//"README.md",
|
||||
},
|
||||
}
|
||||
BIN
Binary file not shown.
+11627
File diff suppressed because it is too large
Load Diff
+92
@@ -0,0 +1,92 @@
|
||||
const std = @import("std");
|
||||
const rand = std.crypto.random;
|
||||
|
||||
pub const DataPoint = struct {
|
||||
char: u8,
|
||||
prob: f32,
|
||||
|
||||
pub fn desc(context: void, a: DataPoint, b: DataPoint) bool {
|
||||
_ = context;
|
||||
return a.prob > b.prob;
|
||||
}
|
||||
};
|
||||
|
||||
pub const MarkovChain = struct {
|
||||
allocator: std.mem.Allocator,
|
||||
map: std.AutoHashMap(u8, []DataPoint),
|
||||
|
||||
pub fn init(path: []const u8, allocator: std.mem.Allocator) !MarkovChain {
|
||||
var self = MarkovChain{
|
||||
.allocator = allocator,
|
||||
.map = std.AutoHashMap(u8, []DataPoint).init(allocator),
|
||||
};
|
||||
|
||||
var markovBinFile = try std.fs.cwd().openFile(path, .{ .mode = .read_only });
|
||||
|
||||
var reader = markovBinFile.reader();
|
||||
|
||||
for (0..256) |prevChar| {
|
||||
const cnt = try reader.readInt(u8, .little);
|
||||
//std.debug.print("previous : {c} - cnt : {d}\n", .{ @as(u8, @intCast(prevChar)), cnt });
|
||||
var nextChars: []DataPoint = try self.allocator.alloc(DataPoint, cnt);
|
||||
for (0..cnt) |i| {
|
||||
const nextByte = try reader.readByte();
|
||||
//std.debug.print("next : {c}\n", .{nextByte});
|
||||
const prob: f32 = @bitCast(try reader.readInt(u32, .little));
|
||||
nextChars[i] = DataPoint{
|
||||
.char = nextByte,
|
||||
.prob = prob,
|
||||
};
|
||||
}
|
||||
try self.map.put(@as(u8, @intCast(prevChar)), nextChars);
|
||||
}
|
||||
|
||||
//std.debug.print("choices for A : {any}\n", .{self.map.get(97).?});
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *MarkovChain) void {
|
||||
var iter = self.map.iterator();
|
||||
while (iter.next()) |entry| {
|
||||
self.allocator.free(entry.value_ptr.*);
|
||||
}
|
||||
self.map.deinit();
|
||||
}
|
||||
|
||||
pub fn generate(self: *MarkovChain, size: u8, allocator: std.mem.Allocator) ![]u8 {
|
||||
var result = try allocator.alloc(u8, size);
|
||||
var previous: u8 = 0;
|
||||
for (0..size) |i| {
|
||||
const choices = self.map.get(previous).?;
|
||||
const randFloat = rand.float(f32);
|
||||
var cumul: f32 = 0;
|
||||
for (0..choices.len) |j| {
|
||||
cumul += choices[j].prob;
|
||||
if (randFloat < cumul) {
|
||||
result[i] = choices[j].char;
|
||||
previous = choices[j].char;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
test "basic test" {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
const allocator = gpa.allocator();
|
||||
defer {
|
||||
_ = gpa.deinit();
|
||||
}
|
||||
|
||||
var markov = try MarkovChain.init("markov.bin", allocator);
|
||||
defer markov.deinit();
|
||||
|
||||
for (0..24) |_| {
|
||||
const randName = try markov.generate(8, allocator);
|
||||
defer allocator.free(randName);
|
||||
std.debug.print("generated : {s}\n", .{randName});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
const std = @import("std");
|
||||
const DataPoint = @import("markov").DataPoint;
|
||||
|
||||
pub fn main() !void {
|
||||
const path = "prenoms.csv";
|
||||
var markovCnt: [256][256]u32 = undefined;
|
||||
var computed: [256][256]DataPoint = undefined;
|
||||
|
||||
var file = try std.fs.cwd().openFile(path, .{});
|
||||
defer file.close();
|
||||
|
||||
var buf_reader = std.io.bufferedReader(file.reader());
|
||||
var in_stream = buf_reader.reader();
|
||||
|
||||
// init
|
||||
for (0..256) |i| {
|
||||
for (0..256) |j| {
|
||||
markovCnt[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// stats
|
||||
|
||||
var buf: [1024]u8 = undefined;
|
||||
while (try in_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
|
||||
var previous: u8 = 0;
|
||||
for (line) |char| {
|
||||
if (char == ';') break;
|
||||
markovCnt[previous][char] += 1;
|
||||
previous = char;
|
||||
}
|
||||
}
|
||||
|
||||
for (0..256) |i| {
|
||||
var acc: u32 = 0;
|
||||
for (0..256) |j| {
|
||||
acc += markovCnt[i][j];
|
||||
}
|
||||
for (0..256) |j| {
|
||||
const ratio = @as(f32, @floatFromInt(markovCnt[i][j])) / @as(f32, @floatFromInt(acc));
|
||||
computed[i][j] = DataPoint{ .char = @intCast(j), .prob = ratio };
|
||||
}
|
||||
}
|
||||
|
||||
for (0..256) |i| {
|
||||
std.mem.sort(DataPoint, &computed[i], {}, DataPoint.desc);
|
||||
}
|
||||
|
||||
var outFile = try std.fs.cwd().createFile("markov.bin", .{ .truncate = true });
|
||||
defer outFile.close();
|
||||
|
||||
const writer = outFile.writer();
|
||||
|
||||
for (0..256) |i| {
|
||||
var cntnonzero: u8 = 0;
|
||||
for (0..256) |j| {
|
||||
if (computed[i][j].prob > 0) {
|
||||
cntnonzero += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
try writer.writeInt(u8, cntnonzero, .little);
|
||||
for (0..cntnonzero) |j| {
|
||||
try writer.writeByte(computed[i][j].char);
|
||||
try writer.writeInt(u32, @bitCast(computed[i][j].prob), .little);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user