Skip to content

Commit

Permalink
API surface has been drastically simplified, still retaining its func…
Browse files Browse the repository at this point in the history
…tions
  • Loading branch information
Aizen committed Oct 6, 2024
1 parent b3041c4 commit f78b6f9
Show file tree
Hide file tree
Showing 3 changed files with 229 additions and 246 deletions.
275 changes: 35 additions & 240 deletions cryptipass.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,113 +15,6 @@ import (
"strings"
)

// Transition represents a character transition model for generating
// pronounceable passwords. It encapsulates the statistical data
// necessary to understand the frequency and distribution of
// character transitions based on a given set of tokens.
//
// Fields:
// - Runes: A slice of runes representing characters that can follow
// a given state in the transition matrix.
// - Counts: A slice of integers where each integer represents the
// cumulative frequency count of the corresponding rune in
// the Runes slice.
// - Total: An integer representing the total count of transitions
// for the current state, used for entropy calculations.
// - Entropy: A float64 representing the entropy of the transitions,
// calculated using the Shannon entropy formula to provide
// a measure of uncertainty or randomness in the transitions.
//
// The Transition struct is used internally to facilitate the generation
// of secure and unpredictable passwords by modeling the relationships
// between characters in the generated output.
type Transition struct {
Runes []rune
Counts []int
Total int
Entropy float64
}

// distill processes a list of words and builds a probabilistic transition matrix
// that models the relationships between successive characters within those words.
//
// The transition matrix is a map where each key represents a string prefix (up to two characters),
// and the value is a Transition struct that tracks the possible next characters, their frequencies,
// and the total number of transitions.
//
// This matrix enables generating new sequences of characters based on learned patterns,
// making it useful for generating pronounceable words.
//
// The function returns a map of transitions that can be used for creating passphrases or similar text generation.
//
// Parameters:
//
// tokens - a slice of strings representing the input tokens from which to distill the transition matrix.
//
// Returns:
//
// map[string]Transition - a map where each key is a string prefix and each value is a Transition struct.
//
// Example:
//
// tokens := []string{"hello", "world", "golang"}
// transitions := distill(tokens)
//
// // 'transitions' will contain a probabilistic model for generating new character sequences.
func distill(tokens []string) map[string]Transition {
transition_matrix := make(map[string]map[rune]int)
put := func(str string, r rune) {
if transition_matrix[str] == nil {
transition_matrix[str] = make(map[rune]int)
}
transition_matrix[str][r]++
}
for _, w := range tokens {
R := []rune(strings.ToLower(w))
if len(R) == 0 {
continue
}
put("LENGTHS", rune(len(R)))
put("", R[0])
if len(R) == 1 {
continue
}
put(string(R[0]), R[1])
for i := 0; i < len(R)-2; i++ {
put(string(R[i])+string(R[i+1]), R[i+2])
}
}
dist_trans_matrix := make(map[string]Transition)
for k, rfreq := range transition_matrix {
C := 0
for _, freq := range rfreq {
C += freq
}
H := 0.0
tr := Transition{}
tr.Counts = make([]int, 0)

tr.Runes = make([]rune, 0)
cum := 0
for ru, freq := range rfreq {
p := float64(freq) / float64(C)
H -= math.Log2(p) * p
cum += freq
tr.Counts = append(tr.Counts, cum)
tr.Runes = append(tr.Runes, ru)
}
tr.Total = C
tr.Entropy = H
dist_trans_matrix[k] = tr
}
return dist_trans_matrix
}

type generator struct {
Rng *rand.Rand
JumpTable *map[string]Transition
}

// NewInstanceFromList creates a new instance of the cryptipass password generator
// using a custom word list. The word list should consist of tokens (words) that
// will be used to construct pronounceable passphrases.
Expand Down Expand Up @@ -154,7 +47,7 @@ func NewInstanceFromList(tokens []string) *generator {
g := new(generator)
g.Rng = rng
jtbl := distill(tokens)
g.JumpTable = &jtbl
g.jump_table = &jtbl

return g
}
Expand Down Expand Up @@ -255,9 +148,18 @@ func (g *generator) GenFromPattern(pattern string) (string, float64) {
pushnext = true
continue
case 'w', 'W':
head, h_head := g.genword(c)
head, h_head := g.GenNextToken("")
leng, h_leng := g.GenWordLength()
if c == 'W' {
head = strings.ToUpper(head)
}
for len(head) < leng {
nc, nh := g.GenNextToken(strings.ToLower(head))
head += nc
h_head += nh
}
passphrase = passphrase + head
entropy = entropy + h_head
entropy = entropy + h_head + h_leng
case 'd':
d := g.Rng.IntN(10)
H := math.Log2(10.0)
Expand All @@ -270,11 +172,11 @@ func (g *generator) GenFromPattern(pattern string) (string, float64) {
passphrase += string(symbols[d])
entropy += H
case 'c', 'C':
runc, dH := g.PickNext(strings.ToLower(passphrase))
tok, dH := g.GenNextToken(strings.ToLower(passphrase))
if c == 'C' {
runc = strings.ToUpper(runc)
tok = strings.ToUpper(tok)
}
passphrase += string(runc)
passphrase += string(tok)
entropy += dH
default:
passphrase += string(c)
Expand All @@ -284,153 +186,46 @@ func (g *generator) GenFromPattern(pattern string) (string, float64) {
return passphrase, entropy
}

// genword generates a random word using the internal transition matrix and length model.
// The generated word can either be lowercase or capitalized based on the input rune.
//
// The function uses a probabilistic model to pick the initial character and
// iteratively selects subsequent characters based on prior context, ensuring
// the word is pronounceable. It also chooses the word length from a distribution
// based on real-world token data.
//
// If the input rune is 'W', the generated word will be capitalized, while
// a lowercase 'w' will generate a lowercase word.
//
// Returns:
// - A string representing the generated word.
// - A float64 representing the total entropy of the generated word.
//
// Example:
//
// g := cryptipass.NewInstance()
// word, entropy := g.genword('w')
// fmt.Printf("Generated Word: %s, Entropy: %.2f\n", word, entropy)
func (g *generator) genword(c rune) (string, float64) {
head, h_head := g.PickNext("")
leng, h_leng := g.PickLength()
if c == 'W' {
head = strings.ToUpper(head)
}
for len(head) < leng {
c, h := g.PickNext(strings.ToLower(head))
head += c
h_head += h
}
h_head += h_leng
return head, h_head
}

type CertifyResult struct {
NominalH float64
Gap float64
StdDev float64
}

// Certify evaluates the entropy and randomness of passphrases generated by a given function.
//
// The Certify function runs a comprehensive statistical analysis on the provided password generator
// function `Gen` by simulating trials of passphrase generation. It computes the average entropy
// and monitors the gap between the expected entropy and the actual entropy based on the frequency
// distribution of passphrases generated.
//
// It returns a CertifyResult struct, which includes:
// - NominalH: The nominal entropy of the passphrases, averaged over all trials.
// - Gap: The difference between the nominal entropy and the actual observed entropy.
// - StdDev: The standard deviation of the nominal entropy across trials, giving a measure of variability.
//
// This function is useful for verifying the strength and unpredictability of passphrases generated by
// custom implementations of password generators.
//
// Parameters:
// - Gen: A function that generates a passphrase and returns it alongside its entropy.
//
// Returns:
//
// - CertifyResult: A struct containing the analysis of the generator's entropy.
//
// This process continues until the gap between nominal and actual entropy is small enough,
// or a sufficient number of trials has been conducted.
func Certify(Gen func() (string, float64)) CertifyResult {
nominal_H := 0.0
nominal_H2 := 0.0
cnt_nom_H := 0.0
for range 1000 {
_, nh := Gen()
nominal_H += nh
nominal_H2 += nh * nh
cnt_nom_H++
}
cnt := make(map[string]int)
n := float64(0)
Q := 64
for {
for range Q {
w, nh := Gen()
nominal_H += nh
nominal_H2 += nh * nh
cnt_nom_H++
cnt[w]++
n++
}
Q += Q / 16
m := float64(len(cnt))
H := 0.0
for _, iC := range cnt {
c := float64(iC)
p := (c / n)
H -= p * math.Log2(p)
}
H += (m - 1) / (2 * n)
nomH := nominal_H / cnt_nom_H
nomH2 := nominal_H2 / cnt_nom_H
stddev := math.Sqrt(max(nomH2-nomH*nomH, 1e-16))
gap := nomH - H
if math.Abs(gap) < 0.05 || math.Log2(n) > 3*nomH {
return CertifyResult{NominalH: nomH, Gap: gap, StdDev: stddev}
}
}

}

// PickNext selects the next rune based on the current seed (context) and
// GenNextToken selects the next token based on the current seed (context) and
// a probabilistic model derived from the transition matrix. It generates a
// rune that is most likely to follow the given string context `seed`,
// token that is most likely to follow the given string context `seed`,
// where `seed` can be up to two characters long.
//
// If the `seed` is too short or does not match any known transitions in the
// matrix, it falls back to using shorter prefixes until a match is found.
// The function retries with successively smaller parts of the `seed`
// until a suitable transition is discovered.
//
// The function returns the selected rune as a string and its entropy value.
// The function returns the selected token as a string and its entropy value.
//
// Parameters:
// - seed: A string representing the current context (up to 2 characters).
//
// Returns:
//
// string: The next rune, represented as a string.
// float64: The entropy value associated with the selected rune.
// string: The next token.
// float64: The entropy value associated with the selected token.
//
// Example:
//
// seed := "th"
// nextRune, entropy := generator.PickNext(seed)
// fmt.Printf("Next rune: %s, Entropy: %.2f\n", nextRune, entropy)
// nextTok, entropy := generator.GenNextToken(seed)
// fmt.Printf("Next token: %s, Entropy: %.2f\n", nextTok, entropy)
//
// Panic:
//
// This function panics if the transition matrix is not initialized or
// if the selection process encounters an unexpected error while choosing
// the next rune.
func (g *generator) PickNext(seed string) (string, float64) {
// the next token.
func (g *generator) GenNextToken(seed string) (string, float64) {
L := min(len(seed), 2)
tok := strings.ToLower(seed[len(seed)-L:])
retry:
if tr, ok := (*g.JumpTable)[tok]; ok {
N := g.Rng.IntN(tr.Total)
for i, v := range tr.Counts {
if tr, ok := (*g.jump_table)[tok]; ok {
N := g.Rng.IntN(tr.total)
for i, v := range tr.counts {
if N < v {
return string(tr.Runes[i]), tr.Entropy
return tr.tokens[i], tr.entropy
}
}
panic("unexpected")
Expand All @@ -439,7 +234,7 @@ retry:
goto retry
}

// PickLength selects a word length based on a pre-computed probability distribution
// GenWordLength selects a word length based on a pre-computed probability distribution
// from the transition matrix. It uses a cryptographically secure random number generator
// to ensure unpredictable outcomes.
//
Expand All @@ -451,20 +246,20 @@ retry:
// Example:
//
// gen := cryptipass.NewInstance()
// length, entropy := gen.PickLength()
// length, entropy := gen.GenWordLength()
// fmt.Printf("Generated word length: %d, Entropy: %.2f\n", length, entropy)
//
// Returns:
//
// int - Word length selected from the transition matrix.
// float64 - Entropy of the selected length based on its likelihood.
func (g *generator) PickLength() (int, float64) {
tr, ok := (*g.JumpTable)["LENGTHS"]
func (g *generator) GenWordLength() (int, float64) {
tr, ok := (*g.jump_table)["LENGTHS"]
if ok {
N := g.Rng.IntN(tr.Total)
for i, v := range tr.Counts {
N := g.Rng.IntN(tr.total)
for i, v := range tr.counts {
if N < v {
return int(tr.Runes[i]), tr.Entropy
return int(tr.tokens[i][0]), tr.entropy
}
}
}
Expand Down
Loading

0 comments on commit f78b6f9

Please sign in to comment.