Pull entropy calculation into its own file. Simplified the Match struct. matching now populates Match.Entropy.

This commit is contained in:
Nathan Button 2016-01-21 15:26:33 -07:00
parent 24b6c156b2
commit 8711c8f158
6 changed files with 347 additions and 211 deletions

View File

@ -0,0 +1,166 @@
package entropy
import (
"github.com/nbutton23/zxcvbn-go/adjacency"
"github.com/nbutton23/zxcvbn-go/match"
"github.com/nbutton23/zxcvbn-go/utils/math"
"math"
"regexp"
"unicode"
)
const (
START_UPPER string = `^[A-Z][^A-Z]+$`
END_UPPER string = `^[^A-Z]+[A-Z]$'`
ALL_UPPER string = `^[A-Z]+$`
)
var (
KEYPAD_STARTING_POSITIONS = len(adjacency.AdjacencyGph["keypad"].Graph)
KEYPAD_AVG_DEGREE = adjacency.AdjacencyGph["keypad"].CalculateAvgDegree()
)
func DictionaryEntropy(match match.Match, rank float64) float64 {
baseEntropy := math.Log2(rank)
upperCaseEntropy := extraUpperCaseEntropy(match)
//TODO: L33t
return baseEntropy + upperCaseEntropy
}
func extraUpperCaseEntropy(match match.Match) float64 {
word := match.Token
allLower := true
for _, char := range word {
if unicode.IsUpper(char) {
allLower = false
break
}
}
if allLower {
return float64(0)
}
//a capitalized word is the most common capitalization scheme,
//so it only doubles the search space (uncapitalized + capitalized): 1 extra bit of entropy.
//allcaps and end-capitalized are common enough too, underestimate as 1 extra bit to be safe.
for _, regex := range []string{START_UPPER, END_UPPER, ALL_UPPER} {
matcher := regexp.MustCompile(regex)
if matcher.MatchString(word) {
return float64(1)
}
}
//Otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters with U uppercase letters or
//less. Or, if there's more uppercase than lower (for e.g. PASSwORD), the number of ways to lowercase U+L letters
//with L lowercase letters or less.
countUpper, countLower := float64(0), float64(0)
for _, char := range word {
if unicode.IsUpper(char) {
countUpper++
} else if unicode.IsLower(char) {
countLower++
}
}
totalLenght := countLower + countUpper
var possibililities float64
for i := float64(0); i <= math.Min(countUpper, countLower); i++ {
possibililities += float64(zxcvbn_math.NChoseK(totalLenght, i))
}
if possibililities < 1 {
return float64(1)
}
return float64(math.Log2(possibililities))
}
func SpatialEntropy(match match.Match, turns int, shiftCount int) float64 {
var s, d float64
if match.DictionaryName == "qwerty" || match.DictionaryName == "dvorak" {
s = float64(len(adjacency.BuildQwerty().Graph))
d = adjacency.BuildKeypad().CalculateAvgDegree() // Should that be avg degree of the graph?
} else {
s = float64(KEYPAD_STARTING_POSITIONS) // Can this be declared and calc here?
d = KEYPAD_AVG_DEGREE // can this be delcared and calc here?
}
possibilities := float64(0)
length := float64(len(match.Token))
//TODO: Should this be <= or just < ?
//Estimate the number of possible patterns w/ length L or less with t turns or less
for i := float64(2); i <= length+1; i++ {
possibleTurns := math.Min(float64(turns), i-1)
for j := float64(1); j <= possibleTurns+1; j++ {
x := zxcvbn_math.NChoseK(i-1, j-1) * s * math.Pow(d, j)
possibilities += x
}
}
entropy := math.Log2(possibilities)
//add extra entropu for shifted keys. ( % instead of 5 A instead of a)
//Math is similar to extra entropy for uppercase letters in dictionary matches.
if S := float64(shiftCount); S > float64(0) {
possibilities = float64(0)
U := length - S
for i := float64(0); i < math.Min(S, U)+1; i++ {
possibilities += zxcvbn_math.NChoseK(S+U, i)
}
entropy += math.Log2(possibilities)
}
return entropy
}
func RepeatEntropy(match match.Match) float64 {
cardinality := CalcBruteForceCardinality(match.Token)
entropy := math.Log2(cardinality * float64(len(match.Token)))
return entropy
}
func CalcBruteForceCardinality(password string) float64 {
lower, upper, digits, symbols := float64(0), float64(0), float64(0), float64(0)
for _, char := range password {
if unicode.IsLower(char) {
lower = float64(26)
} else if unicode.IsDigit(char) {
digits = float64(10)
} else if unicode.IsUpper(char) {
upper = float64(26)
} else {
symbols = float64(33)
}
}
cardinality := lower + upper + digits + symbols
return cardinality
}
func SequenceEntropy(match match.Match, dictionaryLength int, ascending bool) float64 {
firstChar := match.Token[0]
baseEntropy := float64(0)
if string(firstChar) == "a" || string(firstChar) == "1" {
baseEntropy = float64(0)
} else {
baseEntropy = math.Log2(float64(dictionaryLength))
if unicode.IsUpper(rune(firstChar)) {
baseEntropy++
}
}
if !ascending {
baseEntropy++
}
return baseEntropy + math.Log2(float64(len(match.Token)))
}

View File

@ -0,0 +1,121 @@
package entropy
import (
"github.com/nbutton23/zxcvbn-go/Godeps/_workspace/src/github.com/stretchr/testify/assert"
"github.com/nbutton23/zxcvbn-go/match"
"testing"
)
/**
These test are more for make sure things don't change than check that the calculation is correct.
I need to hand calc a few of these for that
*/
func TestDictionaryEntropyCalculation(t *testing.T) {
match := match.Match{
Pattern: "dictionary",
I: 0,
J: 4,
Token: "first",
}
entropy := DictionaryEntropy(match, float64(20))
//20 4.321928094887363
assert.Equal(t, 4.321928094887363, entropy)
}
func TestSpatialEntropyCalculation(t *testing.T) {
matchPlain := match.Match{
Pattern: "spatial",
I: 0,
J: 5,
Token: "asdfgh",
}
entropy := SpatialEntropy(matchPlain, 0, 0)
assert.Equal(t, 5.832890014164741, entropy)
matchShift := match.Match{
Pattern: "spatial",
I: 0,
J: 5,
Token: "asdFgh",
}
entropyShift := SpatialEntropy(matchShift, 0, 1)
assert.Equal(t, 8.640244936222345, entropyShift)
matchTurn := match.Match{
Pattern: "spatial",
I: 0,
J: 5,
Token: "asdcxz",
}
entropyTurn := SpatialEntropy(matchTurn, 2, 0)
assert.Equal(t, 8.307061631635431, entropyTurn)
}
func TestRepeatMatchEntropyCalculation(t *testing.T) {
matchRepeat := match.Match{
Pattern: "repeat",
I: 0,
J: 4,
Token: "aaaaa",
}
entropy := RepeatEntropy(matchRepeat)
//20 4.321928094887363
assert.Equal(t, 7.022367813028454, entropy)
}
func TestSequenceCalculation(t *testing.T) {
matchLower := match.Match{
Pattern: "sequence",
I: 0,
J: 4,
Token: "jklmn",
}
entropy := SequenceEntropy(matchLower, len("abcdefghijklmnopqrstuvwxyz"), true)
assert.Equal(t, 7.022367813028454, entropy)
matchUpper := match.Match{
Pattern: "sequence",
I: 0,
J: 4,
Token: "JKLMN",
}
entropy = SequenceEntropy(matchUpper, len("abcdefghijklmnopqrstuvwxyz"), true)
assert.Equal(t, 8.022367813028454, entropy)
matchUpperDec := match.Match{
Pattern: "sequence",
I: 0,
J: 4,
Token: "JKLMN",
}
entropy = SequenceEntropy(matchUpperDec, len("abcdefghijklmnopqrstuvwxyz"), false)
assert.Equal(t, 9.022367813028454, entropy)
matchDigit := match.Match{
Pattern: "sequence",
I: 0,
J: 4,
Token: "34567",
}
entropy = SequenceEntropy(matchDigit, 10, true)
assert.Equal(t, 5.643856189774724, entropy)
}

View File

@ -19,18 +19,11 @@ func (s Matches) Less(i, j int) bool {
}
type Match struct {
Pattern string
I, J int
Token string
MatchedWord string
Rank float64
DictionaryName string
DictionaryLength int
Ascending bool
Turns int
ShiftedCount int
Entropy float64
RepeatedChar string
Pattern string
I, J int
Token string
DictionaryName string
Entropy float64
}
type DateMatch struct {

View File

@ -9,6 +9,7 @@ import (
"strconv"
"strings"
// "github.com/deckarep/golang-set"
"github.com/nbutton23/zxcvbn-go/entropy"
)
var (
@ -105,13 +106,15 @@ func dictionaryMatch(password string, dictionaryName string, rankedDict map[stri
for j := i; j < length; j++ {
word := pwLower[i : j+1]
if val, ok := rankedDict[word]; ok {
results = append(results, match.Match{Pattern: "dictionary",
matchDic := match.Match{Pattern: "dictionary",
DictionaryName: dictionaryName,
I: i,
J: j,
Token: password[i : j+1],
MatchedWord: word,
Rank: float64(val)})
}
matchDic.Entropy = entropy.DictionaryEntropy(matchDic, float64(val))
results = append(results, matchDic)
}
}
}
@ -244,8 +247,6 @@ func buildDateMatchCandidateTwo(day, month byte, year string, i, j int) match.Da
return match.DateMatch{Day: intDay, Month: intMonth, Year: intYear, I: i, J: j}
}
//TODO: This is not working.
//It appears that the Adjacency graph data is incorrect. Need to get a new copy from python-zxcvbn.
func SpatialMatch(password string) (matches []match.Match) {
for _, graph := range ADJACENCY_GRAPHS {
if graph.Graph != nil {
@ -304,7 +305,9 @@ func spatialMatchHelper(password string, graph adjacency.AdjacencyGraph) (matche
//otherwise push the pattern discovered so far, if any...
//don't consider length 1 or 2 chains.
if j-i > 2 {
matches = append(matches, match.Match{Pattern: "spatial", I: i, J: j - 1, Token: password[i:j], DictionaryName: graph.Name, Turns: turns, ShiftedCount: shiftedCount})
matchSpc := match.Match{Pattern: "spatial", I: i, J: j - 1, Token: password[i:j], DictionaryName: graph.Name}
matchSpc.Entropy = entropy.SpatialEntropy(matchSpc, turns, shiftedCount)
matches = append(matches, matchSpc)
}
//. . . and then start a new search from the rest of the password
i = j
@ -366,12 +369,14 @@ func RepeatMatch(password string) []match.Match {
} else if currentStreak > 2 {
iPos := i - currentStreak
jPos := i - 1
matches = append(matches, match.Match{
Pattern: "repeat",
I: iPos,
J: jPos,
Token: password[iPos : jPos+1],
RepeatedChar: prev})
matchRepeat := match.Match{
Pattern: "repeat",
I: iPos,
J: jPos,
Token: password[iPos : jPos+1],
DictionaryName: prev}
matchRepeat.Entropy = entropy.RepeatEntropy(matchRepeat)
matches = append(matches, matchRepeat)
currentStreak = 1
} else {
currentStreak = 1
@ -383,12 +388,14 @@ func RepeatMatch(password string) []match.Match {
if currentStreak > 2 {
iPos := i - currentStreak + 1
jPos := i
matches = append(matches, match.Match{
Pattern: "repeat",
I: iPos,
J: jPos,
Token: password[iPos : jPos+1],
RepeatedChar: prev})
matchRepeat := match.Match{
Pattern: "repeat",
I: iPos,
J: jPos,
Token: password[iPos : jPos+1],
DictionaryName: prev}
matchRepeat.Entropy = entropy.RepeatEntropy(matchRepeat)
matches = append(matches, matchRepeat)
}
return matches
}
@ -431,13 +438,16 @@ func SequenceMatch(password string) []match.Match {
if j == len(password) || curN-prevN != seqDirection {
if j-i > 2 {
matches = append(matches, match.Match{Pattern: "sequence",
I: i,
J: j - 1,
Token: password[i:j],
DictionaryName: seqName,
DictionaryLength: len(seq),
Ascending: (seqDirection == 1)})
matchSequence := match.Match{
Pattern: "sequence",
I: i,
J: j - 1,
Token: password[i:j],
DictionaryName: seqName,
}
matchSequence.Entropy = entropy.SequenceEntropy(matchSequence, len(seq), (seqDirection == 1))
matches = append(matches, matchSequence)
}
break
} else {

View File

@ -40,14 +40,17 @@ func TestRepeatMatch(t *testing.T) {
assert.Len(t, matches, 2, "Lenght should be 2")
for _, match := range matches {
if strings.ToLower(match.RepeatedChar) == "b" {
if strings.ToLower(match.DictionaryName) == "b" {
assert.Equal(t, 3, match.I)
assert.Equal(t, 6, match.J)
assert.Equal(t, "bBbB", match.Token)
assert.NotZero(t, match.Entropy, "Entropy should be set")
} else {
assert.Equal(t, 0, match.I)
assert.Equal(t, 2, match.J)
assert.Equal(t, "aaa", match.Token)
assert.NotZero(t, match.Entropy, "Entropy should be set")
}
}
}
@ -63,14 +66,17 @@ func TestSequenceMatch(t *testing.T) {
assert.Equal(t, 0, match.I)
assert.Equal(t, 3, match.J)
assert.Equal(t, "abcd", match.Token)
assert.NotZero(t, match.Entropy, "Entropy should be set")
} else if match.DictionaryName == "upper" {
assert.Equal(t, 10, match.I)
assert.Equal(t, 14, match.J)
assert.Equal(t, "LMNOP", match.Token)
assert.NotZero(t, match.Entropy, "Entropy should be set")
} else if match.DictionaryName == "digits" {
assert.Equal(t, 21, match.I)
assert.Equal(t, 24, match.J)
assert.Equal(t, "1234", match.Token)
assert.NotZero(t, match.Entropy, "Entropy should be set")
} else {
assert.True(t, false, "Unknow dictionary")
}
@ -80,15 +86,18 @@ func TestSequenceMatch(t *testing.T) {
func TestSpatialMatchQwerty(t *testing.T) {
matches := SpatialMatch("qwerty")
assert.Len(t, matches, 1, "Lenght should be 1")
assert.NotZero(t, matches[0].Entropy, "Entropy should be set")
matches = SpatialMatch("asdf")
assert.Len(t, matches, 1, "Lenght should be 1")
assert.NotZero(t, matches[0].Entropy, "Entropy should be set")
}
func TestSpatialMatchDvorak(t *testing.T) {
matches := SpatialMatch("aoeuidhtns")
assert.Len(t, matches, 1, "Lenght should be 1")
assert.NotZero(t, matches[0].Entropy, "Entropy should be set")
}
@ -100,6 +109,10 @@ func TestDictionaryMatch(t *testing.T) {
}
assert.Len(t, matches, 4, "Lenght should be 4")
for _, match := range matches {
assert.NotZero(t, match.Entropy, "Entropy should be set")
}
}

View File

@ -2,14 +2,11 @@ package scoring
import (
"fmt"
"github.com/nbutton23/zxcvbn-go/adjacency"
"github.com/nbutton23/zxcvbn-go/entropy"
"github.com/nbutton23/zxcvbn-go/match"
"github.com/nbutton23/zxcvbn-go/matching"
"github.com/nbutton23/zxcvbn-go/utils/math"
"math"
"regexp"
"sort"
"unicode"
)
const (
@ -43,7 +40,7 @@ Returns minimum entropy
minimum entropy. O(nm) dp alg for length-n password with m candidate matches.
*/
func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntropyMatch {
bruteforceCardinality := float64(calcBruteforceCardinality(password))
bruteforceCardinality := float64(entropy.CalcBruteForceCardinality(password))
upToK := make([]float64, len(password))
backPointers := make([]match.Match, len(password))
@ -58,7 +55,7 @@ func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntr
i, j := match.I, match.J
// see if best entropy up to i-1 + entropy of match is less that current min at j
upTo := get(upToK, i-1)
calculatedEntropy := calcEntropy(match)
calculatedEntropy := match.Entropy
match.Entropy = calculatedEntropy
candidateEntropy := upTo + calculatedEntropy
@ -87,7 +84,7 @@ func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntr
}
sort.Sort(match.Matches(matchSequence))
makeBruteForecMatch := func(i, j int) match.Match {
makeBruteForceMatch := func(i, j int) match.Match {
return match.Match{Pattern: "bruteforce",
I: i,
J: j,
@ -101,14 +98,14 @@ func MinimumEntropyMatchSequence(password string, matches []match.Match) MinEntr
for _, match := range matchSequence {
i, j := match.I, match.J
if i-k > 0 {
matchSequenceCopy = append(matchSequenceCopy, makeBruteForecMatch(k, i-1))
matchSequenceCopy = append(matchSequenceCopy, makeBruteForceMatch(k, i-1))
}
k = j + 1
matchSequenceCopy = append(matchSequenceCopy, match)
}
if k < len(password) {
matchSequenceCopy = append(matchSequenceCopy, makeBruteForecMatch(k, len(password)-1))
matchSequenceCopy = append(matchSequenceCopy, makeBruteForceMatch(k, len(password)-1))
}
var minEntropy float64
if len(password) == 0 {
@ -133,170 +130,6 @@ func get(a []float64, i int) float64 {
return a[i]
}
func calcBruteforceCardinality(password string) float64 {
lower, upper, digits, symbols := float64(0), float64(0), float64(0), float64(0)
for _, char := range password {
if unicode.IsLower(char) {
lower = float64(26)
} else if unicode.IsDigit(char) {
digits = float64(10)
} else if unicode.IsUpper(char) {
upper = float64(26)
} else {
symbols = float64(33)
}
}
cardinality := lower + upper + digits + symbols
return cardinality
}
func calcEntropy(match match.Match) float64 {
if match.Entropy > float64(0) {
return match.Entropy
}
var entropy float64
if match.Pattern == "dictionary" {
entropy = dictionaryEntropy(match)
} else if match.Pattern == "spatial" {
entropy = spatialEntropy(match)
} else if match.Pattern == "repeat" {
entropy = repeatEntropy(match)
} else if match.Pattern == "sequence" {
entropy = sequenceEntropy(match)
}
match.Entropy = entropy
//TODO finish implement this. . . this looks to be the meat and potatoes of the calculation
return match.Entropy
}
func dictionaryEntropy(match match.Match) float64 {
baseEntropy := math.Log2(match.Rank)
upperCaseEntropy := extraUpperCaseEntropy(match)
//TODO: L33t
return baseEntropy + upperCaseEntropy
}
func spatialEntropy(match match.Match) float64 {
var s, d float64
if match.DictionaryName == "qwerty" || match.DictionaryName == "dvorak" {
s = float64(len(adjacency.BuildQwerty().Graph))
d = adjacency.BuildKeypad().CalculateAvgDegree()
} else {
s = float64(matching.KEYPAD_STARTING_POSITIONS)
d = matching.KEYPAD_AVG_DEGREE
}
possibilities := float64(0)
length := float64(len(match.Token))
t := match.Turns
//TODO: Should this be <= or just < ?
//Estimate the number of possible patterns w/ length L or less with t turns or less
for i := float64(2); i <= length+1; i++ {
possibleTurns := math.Min(float64(t), i-1)
for j := float64(1); j <= possibleTurns+1; j++ {
x := zxcvbn_math.NChoseK(i-1, j-1) * s * math.Pow(d, j)
possibilities += x
}
}
entropy := math.Log2(possibilities)
//add extra entropu for shifted keys. ( % instead of 5 A instead of a)
//Math is similar to extra entropy for uppercase letters in dictionary matches.
if S := float64(match.ShiftedCount); S > float64(0) {
possibilities = float64(0)
U := length - S
for i := float64(0); i < math.Min(S, U)+1; i++ {
possibilities += zxcvbn_math.NChoseK(S+U, i)
}
entropy += math.Log2(possibilities)
}
return entropy
}
func sequenceEntropy(match match.Match) float64 {
firstChar := match.Token[0]
baseEntropy := float64(0)
if string(firstChar) == "a" || string(firstChar) == "1" {
baseEntropy = float64(0)
} else {
baseEntropy = math.Log2(float64(match.DictionaryLength))
if unicode.IsUpper(rune(firstChar)) {
baseEntropy++
}
}
if !match.Ascending {
baseEntropy++
}
return baseEntropy + math.Log2(float64(len(match.Token)))
}
func extraUpperCaseEntropy(match match.Match) float64 {
word := match.Token
allLower := true
for _, char := range word {
if unicode.IsUpper(char) {
allLower = false
break
}
}
if allLower {
return float64(0)
}
//a capitalized word is the most common capitalization scheme,
//so it only doubles the search space (uncapitalized + capitalized): 1 extra bit of entropy.
//allcaps and end-capitalized are common enough too, underestimate as 1 extra bit to be safe.
for _, regex := range []string{START_UPPER, END_UPPER, ALL_UPPER} {
matcher := regexp.MustCompile(regex)
if matcher.MatchString(word) {
return float64(1)
}
}
//Otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters with U uppercase letters or
//less. Or, if there's more uppercase than lower (for e.g. PASSwORD), the number of ways to lowercase U+L letters
//with L lowercase letters or less.
countUpper, countLower := float64(0), float64(0)
for _, char := range word {
if unicode.IsUpper(char) {
countUpper++
} else if unicode.IsLower(char) {
countLower++
}
}
totalLenght := countLower + countUpper
var possibililities float64
for i := float64(0); i <= math.Min(countUpper, countLower); i++ {
possibililities += float64(zxcvbn_math.NChoseK(totalLenght, i))
}
if possibililities < 1 {
return float64(1)
}
return float64(math.Log2(possibililities))
}
func repeatEntropy(match match.Match) float64 {
cardinality := calcBruteforceCardinality(match.Token)
entropy := math.Log2(cardinality * float64(len(match.Token)))
return entropy
}
func entropyToCrackTime(entropy float64) float64 {
crackTime := (0.5 * math.Pow(float64(2), entropy)) * SECONDS_PER_GUESS