Skip to content

Commit

Permalink
Merge pull request #49 from boyter/LazyLoading
Browse files Browse the repository at this point in the history
Lazy loading
  • Loading branch information
boyter authored Jan 9, 2019
2 parents 72e56aa + d6e14d7 commit 35560c3
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 59 deletions.
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ func main() {
Use: "scc",
Short: "scc DIRECTORY",
Long: "Sloc, Cloc and Code. Count lines of code in a directory with complexity estimation.\nBen Boyter <[email protected]> + Contributors",
Version: "2.0.0",
Version: "2.1.0",
Run: func(cmd *cobra.Command, args []string) {
processor.DirFilePaths = args
processor.ConfigureGc()
processor.ConfigureLazy(true)
processor.Process()
},
}
Expand Down
7 changes: 5 additions & 2 deletions processor/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ func walkDirectoryParallel(root string, output chan *FileJob) {
go func(toWalk string) {
filejobs := walkDirectory(toWalk, PathBlacklist, extensionLookup)
for i := 0; i < len(filejobs); i++ {
LoadLanguageFeature(filejobs[i].Language)
output <- &filejobs[i]
}

Expand All @@ -128,7 +129,7 @@ func walkDirectoryParallel(root string, output chan *FileJob) {
wg.Done()
}(filepath.Join(root, f.Name()))
}
} else {
} else { // File processing starts here
if gitignoreerror != nil || !gitignore.Match(filepath.Join(root, f.Name()), false) {

shouldSkip := false
Expand Down Expand Up @@ -158,10 +159,12 @@ func walkDirectoryParallel(root string, output chan *FileJob) {
}

if ok {
output <- &FileJob{Location: filepath.Join(root, f.Name()), Filename: f.Name(), Extension: extension, Language: language}
mutex.Lock()
totalCount++
mutex.Unlock()

LoadLanguageFeature(language)
output <- &FileJob{Location: filepath.Join(root, f.Name()), Filename: f.Name(), Extension: extension, Language: language}
} else if Verbose {
printWarn(fmt.Sprintf("skipping file unknown extension: %s", f.Name()))
}
Expand Down
165 changes: 109 additions & 56 deletions processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"runtime/debug"
"sort"
"strings"
"sync"
)

// Flags set via the CLI which control how the output is displayed
Expand Down Expand Up @@ -38,27 +39,36 @@ var WhiteListExtensions = []string{}
var AverageWage int64 = 56286
var GcFileCount = 10000
var gcPercent = -1
var isLazy = false

// Not set via flags but by arguments following the the flags
var DirFilePaths = []string{}

// Raw languageDatabase loaded
var languageDatabase = map[string]Language{}

// Loaded from the JSON that is in constants.go
var ExtensionToLanguage = map[string]string{}
var LanguageFeatures = map[string]LanguageFeature{}
var LanguageFeaturesMutex = sync.Mutex{}

// This needs to be set outside of ProcessConstants because it should only be enabled in command line
// mode https://github.com/boyter/scc/issues/32
func ConfigureGc() {
gcPercent = debug.SetGCPercent(gcPercent)
}

func ConfigureLazy(lazy bool) {
isLazy = lazy
}

// ProcessConstants is responsible for setting up the language features based on the JSON file that is stored in constants
// Needs to be called at least once in order for anything to actually happen
func ProcessConstants() {
var database = loadDatabase()
languageDatabase = loadDatabase()

startTime := makeTimestampNano()
for name, value := range database {
for name, value := range languageDatabase {
for _, ext := range value.Extensions {
ExtensionToLanguage[ext] = name
}
Expand All @@ -68,70 +78,113 @@ func ProcessConstants() {
printTrace(fmt.Sprintf("nanoseconds build extension to language: %d", makeTimestampNano()-startTime))
}

startTime = makeTimestampMilli()
for name, value := range database {
complexityTrie := &Trie{}
slCommentTrie := &Trie{}
mlCommentTrie := &Trie{}
stringTrie := &Trie{}
tokenTrie := &Trie{}

complexityMask := byte(0)
singleLineCommentMask := byte(0)
multiLineCommentMask := byte(0)
stringMask := byte(0)
processMask := byte(0)

for _, v := range value.ComplexityChecks {
complexityMask |= v[0]
complexityTrie.Insert(T_COMPLEXITY, []byte(v))
if !Complexity {
tokenTrie.Insert(T_COMPLEXITY, []byte(v))
}
}
if !Complexity {
processMask |= complexityMask
// If lazy is set then we want to load in the features as we find them not in one go
// however otherwise being used as a library so just load them all in
if !isLazy {
startTime = makeTimestampMilli()
for name, value := range languageDatabase {
processLanguageFeature(name, value)
}

for _, v := range value.LineComment {
singleLineCommentMask |= v[0]
slCommentTrie.Insert(T_SLCOMMENT, []byte(v))
tokenTrie.Insert(T_SLCOMMENT, []byte(v))
if Trace {
printTrace(fmt.Sprintf("milliseconds build language features: %d", makeTimestampMilli()-startTime))
}
processMask |= singleLineCommentMask
} else {
printTrace("configured to lazy load language features")
}
}

for _, v := range value.MultiLine {
multiLineCommentMask |= v[0][0]
mlCommentTrie.InsertClose(T_MLCOMMENT, []byte(v[0]), []byte(v[1]))
tokenTrie.InsertClose(T_MLCOMMENT, []byte(v[0]), []byte(v[1]))
}
processMask |= multiLineCommentMask
// Will load a single feature as requested given the name
// this is used with lazy loading
func LoadLanguageFeature(loadName string) {
if !isLazy {
return
}

for _, v := range value.Quotes {
stringMask |= v[0][0]
stringTrie.InsertClose(T_STRING, []byte(v[0]), []byte(v[1]))
tokenTrie.InsertClose(T_STRING, []byte(v[0]), []byte(v[1]))
}
processMask |= stringMask

LanguageFeatures[name] = LanguageFeature{
Complexity: complexityTrie,
MultiLineComments: mlCommentTrie,
SingleLineComments: slCommentTrie,
Strings: stringTrie,
Tokens: tokenTrie,
Nested: value.NestedMultiLine,
ComplexityCheckMask: complexityMask,
MultiLineCommentMask: multiLineCommentMask,
SingleLineCommentMask: singleLineCommentMask,
StringCheckMask: stringMask,
ProcessMask: processMask,
// Check if already loaded and if so return because we don't need to do it again
LanguageFeaturesMutex.Lock()
_, ok := LanguageFeatures[loadName]
LanguageFeaturesMutex.Unlock()
if ok {
return
}

var name string
var value Language

for name, value = range languageDatabase {
if name == loadName {
break
}
}

startTime := makeTimestampNano()
processLanguageFeature(loadName, value)
if Trace {
printTrace(fmt.Sprintf("milliseconds build language features: %d", makeTimestampMilli()-startTime))
printTrace(fmt.Sprintf("nanoseconds to build language %s features: %d", loadName, makeTimestampNano()-startTime))
}
}

func processLanguageFeature(name string, value Language) {
complexityTrie := &Trie{}
slCommentTrie := &Trie{}
mlCommentTrie := &Trie{}
stringTrie := &Trie{}
tokenTrie := &Trie{}

complexityMask := byte(0)
singleLineCommentMask := byte(0)
multiLineCommentMask := byte(0)
stringMask := byte(0)
processMask := byte(0)

for _, v := range value.ComplexityChecks {
complexityMask |= v[0]
complexityTrie.Insert(T_COMPLEXITY, []byte(v))
if !Complexity {
tokenTrie.Insert(T_COMPLEXITY, []byte(v))
}
}
if !Complexity {
processMask |= complexityMask
}

for _, v := range value.LineComment {
singleLineCommentMask |= v[0]
slCommentTrie.Insert(T_SLCOMMENT, []byte(v))
tokenTrie.Insert(T_SLCOMMENT, []byte(v))
}
processMask |= singleLineCommentMask

for _, v := range value.MultiLine {
multiLineCommentMask |= v[0][0]
mlCommentTrie.InsertClose(T_MLCOMMENT, []byte(v[0]), []byte(v[1]))
tokenTrie.InsertClose(T_MLCOMMENT, []byte(v[0]), []byte(v[1]))
}
processMask |= multiLineCommentMask

for _, v := range value.Quotes {
stringMask |= v[0][0]
stringTrie.InsertClose(T_STRING, []byte(v[0]), []byte(v[1]))
tokenTrie.InsertClose(T_STRING, []byte(v[0]), []byte(v[1]))
}
processMask |= stringMask

LanguageFeaturesMutex.Lock()
LanguageFeatures[name] = LanguageFeature{
Complexity: complexityTrie,
MultiLineComments: mlCommentTrie,
SingleLineComments: slCommentTrie,
Strings: stringTrie,
Tokens: tokenTrie,
Nested: value.NestedMultiLine,
ComplexityCheckMask: complexityMask,
MultiLineCommentMask: multiLineCommentMask,
SingleLineCommentMask: singleLineCommentMask,
StringCheckMask: stringMask,
ProcessMask: processMask,
}
LanguageFeaturesMutex.Unlock()
}

func processFlags() {
Expand Down
4 changes: 4 additions & 0 deletions processor/workers.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@ func CountStats(fileJob *FileJob) {
return
}

LanguageFeaturesMutex.Lock()
langFeatures := LanguageFeatures[fileJob.Language]
LanguageFeaturesMutex.Unlock()

if langFeatures.Complexity == nil {
langFeatures.Complexity = &Trie{}
Expand Down Expand Up @@ -337,6 +339,8 @@ func CountStats(fileJob *FileJob) {
}
}

// Only check the first 10000 characters for null bytes indicating a binary file
// and if we find it then we return otherwise carry on and ignore binary markers
if index < 10000 && fileJob.Binary {
return
}
Expand Down

0 comments on commit 35560c3

Please sign in to comment.