diff --git a/entropy/entropyCalculator.go b/entropy/entropyCalculator.go new file mode 100644 index 0000000..3379843 --- /dev/null +++ b/entropy/entropyCalculator.go @@ -0,0 +1,166 @@ +package entropy + +import ( + "github.com/nbutton23/zxcvbn-go/adjacency" + "github.com/nbutton23/zxcvbn-go/match" + "github.com/nbutton23/zxcvbn-go/utils/math" + "math" + "regexp" + "unicode" +) + +const ( + START_UPPER string = `^[A-Z][^A-Z]+$` + END_UPPER string = `^[^A-Z]+[A-Z]$'` + ALL_UPPER string = `^[A-Z]+$` +) + +var ( + KEYPAD_STARTING_POSITIONS = len(adjacency.AdjacencyGph["keypad"].Graph) + KEYPAD_AVG_DEGREE = adjacency.AdjacencyGph["keypad"].CalculateAvgDegree() +) + +func DictionaryEntropy(match match.Match, rank float64) float64 { + baseEntropy := math.Log2(rank) + upperCaseEntropy := extraUpperCaseEntropy(match) + //TODO: L33t + return baseEntropy + upperCaseEntropy +} + +func extraUpperCaseEntropy(match match.Match) float64 { + word := match.Token + + allLower := true + + for _, char := range word { + if unicode.IsUpper(char) { + allLower = false + break + } + } + if allLower { + return float64(0) + } + + //a capitalized word is the most common capitalization scheme, + //so it only doubles the search space (uncapitalized + capitalized): 1 extra bit of entropy. + //allcaps and end-capitalized are common enough too, underestimate as 1 extra bit to be safe. + + for _, regex := range []string{START_UPPER, END_UPPER, ALL_UPPER} { + matcher := regexp.MustCompile(regex) + + if matcher.MatchString(word) { + return float64(1) + } + } + //Otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters with U uppercase letters or + //less. Or, if there's more uppercase than lower (for e.g. PASSwORD), the number of ways to lowercase U+L letters + //with L lowercase letters or less. + + countUpper, countLower := float64(0), float64(0) + for _, char := range word { + if unicode.IsUpper(char) { + countUpper++ + } else if unicode.IsLower(char) { + countLower++ + } + } + totalLenght := countLower + countUpper + var possibililities float64 + + for i := float64(0); i <= math.Min(countUpper, countLower); i++ { + possibililities += float64(zxcvbn_math.NChoseK(totalLenght, i)) + } + + if possibililities < 1 { + return float64(1) + } + + return float64(math.Log2(possibililities)) +} + +func SpatialEntropy(match match.Match, turns int, shiftCount int) float64 { + var s, d float64 + if match.DictionaryName == "qwerty" || match.DictionaryName == "dvorak" { + s = float64(len(adjacency.BuildQwerty().Graph)) + d = adjacency.BuildKeypad().CalculateAvgDegree() // Should that be avg degree of the graph? + } else { + s = float64(KEYPAD_STARTING_POSITIONS) // Can this be declared and calc here? + d = KEYPAD_AVG_DEGREE // can this be delcared and calc here? + } + + possibilities := float64(0) + + length := float64(len(match.Token)) + + //TODO: Should this be <= or just < ? + //Estimate the number of possible patterns w/ length L or less with t turns or less + for i := float64(2); i <= length+1; i++ { + possibleTurns := math.Min(float64(turns), i-1) + for j := float64(1); j <= possibleTurns+1; j++ { + x := zxcvbn_math.NChoseK(i-1, j-1) * s * math.Pow(d, j) + possibilities += x + } + } + + entropy := math.Log2(possibilities) + //add extra entropu for shifted keys. ( % instead of 5 A instead of a) + //Math is similar to extra entropy for uppercase letters in dictionary matches. + + if S := float64(shiftCount); S > float64(0) { + possibilities = float64(0) + U := length - S + + for i := float64(0); i < math.Min(S, U)+1; i++ { + possibilities += zxcvbn_math.NChoseK(S+U, i) + } + + entropy += math.Log2(possibilities) + } + + return entropy +} + +func RepeatEntropy(match match.Match) float64 { + cardinality := CalcBruteForceCardinality(match.Token) + entropy := math.Log2(cardinality * float64(len(match.Token))) + + return entropy +} + +func CalcBruteForceCardinality(password string) float64 { + lower, upper, digits, symbols := float64(0), float64(0), float64(0), float64(0) + + for _, char := range password { + if unicode.IsLower(char) { + lower = float64(26) + } else if unicode.IsDigit(char) { + digits = float64(10) + } else if unicode.IsUpper(char) { + upper = float64(26) + } else { + symbols = float64(33) + } + } + + cardinality := lower + upper + digits + symbols + return cardinality +} + +func SequenceEntropy(match match.Match, dictionaryLength int, ascending bool) float64 { + firstChar := match.Token[0] + baseEntropy := float64(0) + if string(firstChar) == "a" || string(firstChar) == "1" { + baseEntropy = float64(0) + } else { + baseEntropy = math.Log2(float64(dictionaryLength)) + if unicode.IsUpper(rune(firstChar)) { + baseEntropy++ + } + } + + if !ascending { + baseEntropy++ + } + return baseEntropy + math.Log2(float64(len(match.Token))) +} diff --git a/entropy/entropyCalculator_test.go b/entropy/entropyCalculator_test.go new file mode 100644 index 0000000..35130fe --- /dev/null +++ b/entropy/entropyCalculator_test.go @@ -0,0 +1,121 @@ +package entropy + +import ( + "github.com/nbutton23/zxcvbn-go/Godeps/_workspace/src/github.com/stretchr/testify/assert" + "github.com/nbutton23/zxcvbn-go/match" + "testing" +) + +/** +These test are more for make sure things don't change than check that the calculation is correct. +I need to hand calc a few of these for that +*/ + +func TestDictionaryEntropyCalculation(t *testing.T) { + match := match.Match{ + Pattern: "dictionary", + I: 0, + J: 4, + Token: "first", + } + + entropy := DictionaryEntropy(match, float64(20)) + //20 4.321928094887363 + + assert.Equal(t, 4.321928094887363, entropy) +} + +func TestSpatialEntropyCalculation(t *testing.T) { + matchPlain := match.Match{ + Pattern: "spatial", + I: 0, + J: 5, + Token: "asdfgh", + } + + entropy := SpatialEntropy(matchPlain, 0, 0) + + assert.Equal(t, 5.832890014164741, entropy) + + matchShift := match.Match{ + Pattern: "spatial", + I: 0, + J: 5, + Token: "asdFgh", + } + + entropyShift := SpatialEntropy(matchShift, 0, 1) + + assert.Equal(t, 8.640244936222345, entropyShift) + + matchTurn := match.Match{ + Pattern: "spatial", + I: 0, + J: 5, + Token: "asdcxz", + } + + entropyTurn := SpatialEntropy(matchTurn, 2, 0) + + assert.Equal(t, 8.307061631635431, entropyTurn) +} + +func TestRepeatMatchEntropyCalculation(t *testing.T) { + matchRepeat := match.Match{ + Pattern: "repeat", + I: 0, + J: 4, + Token: "aaaaa", + } + + entropy := RepeatEntropy(matchRepeat) + //20 4.321928094887363 + + assert.Equal(t, 7.022367813028454, entropy) +} + +func TestSequenceCalculation(t *testing.T) { + matchLower := match.Match{ + Pattern: "sequence", + I: 0, + J: 4, + Token: "jklmn", + } + + entropy := SequenceEntropy(matchLower, len("abcdefghijklmnopqrstuvwxyz"), true) + + assert.Equal(t, 7.022367813028454, entropy) + + matchUpper := match.Match{ + Pattern: "sequence", + I: 0, + J: 4, + Token: "JKLMN", + } + + entropy = SequenceEntropy(matchUpper, len("abcdefghijklmnopqrstuvwxyz"), true) + + assert.Equal(t, 8.022367813028454, entropy) + + matchUpperDec := match.Match{ + Pattern: "sequence", + I: 0, + J: 4, + Token: "JKLMN", + } + + entropy = SequenceEntropy(matchUpperDec, len("abcdefghijklmnopqrstuvwxyz"), false) + + assert.Equal(t, 9.022367813028454, entropy) + + matchDigit := match.Match{ + Pattern: "sequence", + I: 0, + J: 4, + Token: "34567", + } + + entropy = SequenceEntropy(matchDigit, 10, true) + + assert.Equal(t, 5.643856189774724, entropy) +} diff --git a/match/match.go b/match/match.go index 2eed131..b79129f 100644 --- a/match/match.go +++ b/match/match.go @@ -19,18 +19,11 @@ func (s Matches) Less(i, j int) bool { } type Match struct { - Pattern string - I, J int - Token string - MatchedWord string - Rank float64 - DictionaryName string - DictionaryLength int - Ascending bool - Turns int - ShiftedCount int - Entropy float64 - RepeatedChar string + Pattern string + I, J int + Token string + DictionaryName string + Entropy float64 } type DateMatch struct { diff --git a/matching/matching.go b/matching/matching.go index 525b2ee..9e19296 100644 --- a/matching/matching.go +++ b/matching/matching.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" // "github.com/deckarep/golang-set" + "github.com/nbutton23/zxcvbn-go/entropy" ) var ( @@ -105,13 +106,15 @@ func dictionaryMatch(password string, dictionaryName string, rankedDict map[stri for j := i; j < length; j++ { word := pwLower[i : j+1] if val, ok := rankedDict[word]; ok { - results = append(results, match.Match{Pattern: "dictionary", + matchDic := match.Match{Pattern: "dictionary", DictionaryName: dictionaryName, I: i, J: j, Token: password[i : j+1], - MatchedWord: word, - Rank: float64(val)}) + } + matchDic.Entropy = entropy.DictionaryEntropy(matchDic, float64(val)) + + results = append(results, matchDic) } } } @@ -244,8 +247,6 @@ func buildDateMatchCandidateTwo(day, month byte, year string, i, j int) match.Da return match.DateMatch{Day: intDay, Month: intMonth, Year: intYear, I: i, J: j} } -//TODO: This is not working. -//It appears that the Adjacency graph data is incorrect. Need to get a new copy from python-zxcvbn. func SpatialMatch(password string) (matches []match.Match) { for _, graph := range ADJACENCY_GRAPHS { if graph.Graph != nil { @@ -304,7 +305,9 @@ func spatialMatchHelper(password string, graph adjacency.AdjacencyGraph) (matche //otherwise push the pattern discovered so far, if any... //don't consider length 1 or 2 chains. if j-i > 2 { - matches = append(matches, match.Match{Pattern: "spatial", I: i, J: j - 1, Token: password[i:j], DictionaryName: graph.Name, Turns: turns, ShiftedCount: shiftedCount}) + matchSpc := match.Match{Pattern: "spatial", I: i, J: j - 1, Token: password[i:j], DictionaryName: graph.Name} + matchSpc.Entropy = entropy.SpatialEntropy(matchSpc, turns, shiftedCount) + matches = append(matches, matchSpc) } //. . . and then start a new search from the rest of the password i = j @@ -366,12 +369,14 @@ func RepeatMatch(password string) []match.Match { } else if currentStreak > 2 { iPos := i - currentStreak jPos := i - 1 - matches = append(matches, match.Match{ - Pattern: "repeat", - I: iPos, - J: jPos, - Token: password[iPos : jPos+1], - RepeatedChar: prev}) + matchRepeat := match.Match{ + Pattern: "repeat", + I: iPos, + J: jPos, + Token: password[iPos : jPos+1], + DictionaryName: prev} + matchRepeat.Entropy = entropy.RepeatEntropy(matchRepeat) + matches = append(matches, matchRepeat) currentStreak = 1 } else { currentStreak = 1 @@ -383,12 +388,14 @@ func RepeatMatch(password string) []match.Match { if currentStreak > 2 { iPos := i - currentStreak + 1 jPos := i - matches = append(matches, match.Match{ - Pattern: "repeat", - I: iPos, - J: jPos, - Token: password[iPos : jPos+1], - RepeatedChar: prev}) + matchRepeat := match.Match{ + Pattern: "repeat", + I: iPos, + J: jPos, + Token: password[iPos : jPos+1], + DictionaryName: prev} + matchRepeat.Entropy = entropy.RepeatEntropy(matchRepeat) + matches = append(matches, matchRepeat) } return matches } @@ -431,13 +438,16 @@ func SequenceMatch(password string) []match.Match { if j == len(password) || curN-prevN != seqDirection { if j-i > 2 { - matches = append(matches, match.Match{Pattern: "sequence", - I: i, - J: j - 1, - Token: password[i:j], - DictionaryName: seqName, - DictionaryLength: len(seq), - Ascending: (seqDirection == 1)}) + matchSequence := match.Match{ + Pattern: "sequence", + I: i, + J: j - 1, + Token: password[i:j], + DictionaryName: seqName, + } + + matchSequence.Entropy = entropy.SequenceEntropy(matchSequence, len(seq), (seqDirection == 1)) + matches = append(matches, matchSequence) } break } else { diff --git a/matching/matching_test.go b/matching/matching_test.go index b0bbcf4..f303d00 100644 --- a/matching/matching_test.go +++ b/matching/matching_test.go @@ -40,14 +40,17 @@ func TestRepeatMatch(t *testing.T) { assert.Len(t, matches, 2, "Lenght should be 2") for _, match := range matches { - if strings.ToLower(match.RepeatedChar) == "b" { + if strings.ToLower(match.DictionaryName) == "b" { assert.Equal(t, 3, match.I) assert.Equal(t, 6, match.J) assert.Equal(t, "bBbB", match.Token) + assert.NotZero(t, match.Entropy, "Entropy should be set") } else { assert.Equal(t, 0, match.I) assert.Equal(t, 2, match.J) assert.Equal(t, "aaa", match.Token) + assert.NotZero(t, match.Entropy, "Entropy should be set") + } } } @@ -63,14 +66,17 @@ func TestSequenceMatch(t *testing.T) { assert.Equal(t, 0, match.I) assert.Equal(t, 3, match.J) assert.Equal(t, "abcd", match.Token) + assert.NotZero(t, match.Entropy, "Entropy should be set") } else if match.DictionaryName == "upper" { assert.Equal(t, 10, match.I) assert.Equal(t, 14, match.J) assert.Equal(t, "LMNOP", match.Token) + assert.NotZero(t, match.Entropy, "Entropy should be set") } else if match.DictionaryName == "digits" { assert.Equal(t, 21, match.I) assert.Equal(t, 24, match.J) assert.Equal(t, "1234", match.Token) + assert.NotZero(t, match.Entropy, "Entropy should be set") } else { assert.True(t, false, "Unknow dictionary") } @@ -80,15 +86,18 @@ func TestSequenceMatch(t *testing.T) { func TestSpatialMatchQwerty(t *testing.T) { matches := SpatialMatch("qwerty") assert.Len(t, matches, 1, "Lenght should be 1") + assert.NotZero(t, matches[0].Entropy, "Entropy should be set") matches = SpatialMatch("asdf") assert.Len(t, matches, 1, "Lenght should be 1") + assert.NotZero(t, matches[0].Entropy, "Entropy should be set") } func TestSpatialMatchDvorak(t *testing.T) { matches := SpatialMatch("aoeuidhtns") assert.Len(t, matches, 1, "Lenght should be 1") + assert.NotZero(t, matches[0].Entropy, "Entropy should be set") } @@ -100,6 +109,10 @@ func TestDictionaryMatch(t *testing.T) { } assert.Len(t, matches, 4, "Lenght should be 4") + for _, match := range matches { + assert.NotZero(t, match.Entropy, "Entropy should be set") + + } } diff --git a/scoring/scoring.go b/scoring/scoring.go index 0d8d278..83daf60 100644 --- a/scoring/scoring.go +++ b/scoring/scoring.go @@ -2,14 +2,11 @@ package scoring import ( "fmt" - "github.com/nbutton23/zxcvbn-go/adjacency" + "github.com/nbutton23/zxcvbn-go/entropy" "github.com/nbutton23/zxcvbn-go/match" - "github.com/nbutton23/zxcvbn-go/matching" "github.com/nbutton23/zxcvbn-go/utils/math" "math" - "regexp" "sort" - "unicode" ) const ( @@ -43,7 +40,7 @@ Returns minimum entropy minimum entropy. O(nm) dp alg for length-n password with m candidate matches. */ func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntropyMatch { - bruteforceCardinality := float64(calcBruteforceCardinality(password)) + bruteforceCardinality := float64(entropy.CalcBruteForceCardinality(password)) upToK := make([]float64, len(password)) backPointers := make([]match.Match, len(password)) @@ -58,7 +55,7 @@ func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntr i, j := match.I, match.J // see if best entropy up to i-1 + entropy of match is less that current min at j upTo := get(upToK, i-1) - calculatedEntropy := calcEntropy(match) + calculatedEntropy := match.Entropy match.Entropy = calculatedEntropy candidateEntropy := upTo + calculatedEntropy @@ -87,7 +84,7 @@ func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntr } sort.Sort(match.Matches(matchSequence)) - makeBruteForecMatch := func(i, j int) match.Match { + makeBruteForceMatch := func(i, j int) match.Match { return match.Match{Pattern: "bruteforce", I: i, J: j, @@ -101,14 +98,14 @@ func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntr for _, match := range matchSequence { i, j := match.I, match.J if i-k > 0 { - matchSequenceCopy = append(matchSequenceCopy, makeBruteForecMatch(k, i-1)) + matchSequenceCopy = append(matchSequenceCopy, makeBruteForceMatch(k, i-1)) } k = j + 1 matchSequenceCopy = append(matchSequenceCopy, match) } if k < len(password) { - matchSequenceCopy = append(matchSequenceCopy, makeBruteForecMatch(k, len(password)-1)) + matchSequenceCopy = append(matchSequenceCopy, makeBruteForceMatch(k, len(password)-1)) } var minEntropy float64 if len(password) == 0 { @@ -133,170 +130,6 @@ func get(a []float64, i int) float64 { return a[i] } -func calcBruteforceCardinality(password string) float64 { - lower, upper, digits, symbols := float64(0), float64(0), float64(0), float64(0) - - for _, char := range password { - if unicode.IsLower(char) { - lower = float64(26) - } else if unicode.IsDigit(char) { - digits = float64(10) - } else if unicode.IsUpper(char) { - upper = float64(26) - } else { - symbols = float64(33) - } - } - - cardinality := lower + upper + digits + symbols - return cardinality -} - -func calcEntropy(match match.Match) float64 { - if match.Entropy > float64(0) { - return match.Entropy - } - - var entropy float64 - if match.Pattern == "dictionary" { - entropy = dictionaryEntropy(match) - } else if match.Pattern == "spatial" { - entropy = spatialEntropy(match) - } else if match.Pattern == "repeat" { - entropy = repeatEntropy(match) - } else if match.Pattern == "sequence" { - entropy = sequenceEntropy(match) - } - - match.Entropy = entropy - //TODO finish implement this. . . this looks to be the meat and potatoes of the calculation - return match.Entropy -} - -func dictionaryEntropy(match match.Match) float64 { - baseEntropy := math.Log2(match.Rank) - upperCaseEntropy := extraUpperCaseEntropy(match) - //TODO: L33t - return baseEntropy + upperCaseEntropy -} - -func spatialEntropy(match match.Match) float64 { - var s, d float64 - if match.DictionaryName == "qwerty" || match.DictionaryName == "dvorak" { - s = float64(len(adjacency.BuildQwerty().Graph)) - d = adjacency.BuildKeypad().CalculateAvgDegree() - } else { - s = float64(matching.KEYPAD_STARTING_POSITIONS) - d = matching.KEYPAD_AVG_DEGREE - } - - possibilities := float64(0) - - length := float64(len(match.Token)) - t := match.Turns - - //TODO: Should this be <= or just < ? - //Estimate the number of possible patterns w/ length L or less with t turns or less - for i := float64(2); i <= length+1; i++ { - possibleTurns := math.Min(float64(t), i-1) - for j := float64(1); j <= possibleTurns+1; j++ { - x := zxcvbn_math.NChoseK(i-1, j-1) * s * math.Pow(d, j) - possibilities += x - } - } - - entropy := math.Log2(possibilities) - //add extra entropu for shifted keys. ( % instead of 5 A instead of a) - //Math is similar to extra entropy for uppercase letters in dictionary matches. - - if S := float64(match.ShiftedCount); S > float64(0) { - possibilities = float64(0) - U := length - S - - for i := float64(0); i < math.Min(S, U)+1; i++ { - possibilities += zxcvbn_math.NChoseK(S+U, i) - } - - entropy += math.Log2(possibilities) - } - - return entropy -} -func sequenceEntropy(match match.Match) float64 { - firstChar := match.Token[0] - baseEntropy := float64(0) - if string(firstChar) == "a" || string(firstChar) == "1" { - baseEntropy = float64(0) - } else { - baseEntropy = math.Log2(float64(match.DictionaryLength)) - if unicode.IsUpper(rune(firstChar)) { - baseEntropy++ - } - } - - if !match.Ascending { - baseEntropy++ - } - return baseEntropy + math.Log2(float64(len(match.Token))) -} -func extraUpperCaseEntropy(match match.Match) float64 { - word := match.Token - - allLower := true - - for _, char := range word { - if unicode.IsUpper(char) { - allLower = false - break - } - } - if allLower { - return float64(0) - } - - //a capitalized word is the most common capitalization scheme, - //so it only doubles the search space (uncapitalized + capitalized): 1 extra bit of entropy. - //allcaps and end-capitalized are common enough too, underestimate as 1 extra bit to be safe. - - for _, regex := range []string{START_UPPER, END_UPPER, ALL_UPPER} { - matcher := regexp.MustCompile(regex) - - if matcher.MatchString(word) { - return float64(1) - } - } - //Otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters with U uppercase letters or - //less. Or, if there's more uppercase than lower (for e.g. PASSwORD), the number of ways to lowercase U+L letters - //with L lowercase letters or less. - - countUpper, countLower := float64(0), float64(0) - for _, char := range word { - if unicode.IsUpper(char) { - countUpper++ - } else if unicode.IsLower(char) { - countLower++ - } - } - totalLenght := countLower + countUpper - var possibililities float64 - - for i := float64(0); i <= math.Min(countUpper, countLower); i++ { - possibililities += float64(zxcvbn_math.NChoseK(totalLenght, i)) - } - - if possibililities < 1 { - return float64(1) - } - - return float64(math.Log2(possibililities)) -} - -func repeatEntropy(match match.Match) float64 { - cardinality := calcBruteforceCardinality(match.Token) - entropy := math.Log2(cardinality * float64(len(match.Token))) - - return entropy -} func entropyToCrackTime(entropy float64) float64 { crackTime := (0.5 * math.Pow(float64(2), entropy)) * SECONDS_PER_GUESS