-
Notifications
You must be signed in to change notification settings - Fork 0
/
trigrams.exs
64 lines (53 loc) · 1.69 KB
/
trigrams.exs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Reads a text from standard input, and generates a random stream of words similar in style.
#
# For example:
# curl -s http://www.gutenberg.org/cache/epub/610/pg610.txt \
# | awk '/These to His Memory/,/Where all of high and holy dies away./' \
# | elixir trigrams.exs --count=12 And there
defmodule Trigrams do
def transform(source, initial) do
source
|> to_words
|> train
|> generate(initial)
|> format
end
defp to_words(lines) do
Stream.flat_map(lines, fn line -> Regex.scan(~r/\w+/, line, capture: :first) end)
end
defp train(words) do
words
|> Stream.chunk(3, 1)
|> Stream.map(fn [first, second, third] -> {{first, second}, third} end)
|> map_group_by(fn {first, _} -> first end, fn {_, second} -> second end)
end
defp generate(model, start) do
start
|> Stream.iterate(fn pair -> sample_next(model, pair) end)
|> Stream.map(fn {first, _} -> first end)
end
defp sample_next(model, pair) do
choices = model[pair] || Enum.random(Dict.values(model))
next = Enum.random(choices)
{_, second} = pair
{second, next}
end
defp format(words) do
words
|> Stream.chunk(10)
|> Stream.map(&(Enum.join(&1, " ")))
end
defp map_group_by(enumerable, key_extractor, value_mapper) do
Enum.reduce(Enum.reverse(enumerable), %{}, fn(entry, categories) ->
value = value_mapper.(entry)
Map.update(categories, key_extractor.(entry), [value], &[value | &1])
end)
end
end
:random.seed(:os.timestamp)
{[count: count], [first, second], []} =
OptionParser.parse(System.argv, strict: [count: :integer])
IO.stream(:stdio, :line)
|> Trigrams.transform({first, second})
|> Enum.take(count)
|> Enum.each(&IO.puts/1)