From 5bb20a3e7a4c4b28d32c7109f1148240679d7d55 Mon Sep 17 00:00:00 2001 From: Chady Kamar Date: Tue, 13 Nov 2018 16:51:28 -0500 Subject: [PATCH 1/6] 87ms without concurrency --- main.go | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index f62aa00..7619131 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,12 @@ package main import ( + "bufio" "fmt" + "os" + "path/filepath" + "regexp" + "strings" "time" ) @@ -32,6 +37,67 @@ func main() { // number of line deleted // list of function calls seen in the diffs and their number of calls func compute() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]+\\(") + if err != nil { + fmt.Println(err) + } + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + + if strings.HasPrefix(line, "@@") { + regions++ + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + files = append(files, line[6:]) + } else if strings.HasPrefix(line, "+") { + linesAdded++ + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeleted++ + } else { + matches := re.FindAllString(line, -1) + if matches == nil { + continue + } + for _, match := range matches { + // We'll keep only the function name i.e. remove the parentheses and params + functionCall := match[:len(match)-1] + + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + } + } + return nil + }) + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} - return nil } From d820bfa15411f278b1aa734d154d884ac32615ef Mon Sep 17 00:00:00 2001 From: Chady Kamar Date: Wed, 14 Nov 2018 12:45:29 -0500 Subject: [PATCH 2/6] Parallelized with goroutines 62ms --- main.go | 159 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 124 insertions(+), 35 deletions(-) diff --git a/main.go b/main.go index 7619131..a85f76a 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( "path/filepath" "regexp" "strings" + "sync" "time" ) @@ -43,12 +44,20 @@ func compute() *result { if err != nil { fmt.Println(err) } + var regions int var linesAdded int var linesDeleted int var files []string functionCalls := make(map[string]int) + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -57,46 +66,126 @@ func compute() *result { if info.IsDir() { return nil } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() - file, err := os.Open(path) - if err != nil { - return err - } - defer file.Close() - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - - if strings.HasPrefix(line, "@@") { - regions++ - } else if strings.HasPrefix(line, "+++") { - // If the file has been renamed or copied we keep the newer name and get rid - // of the prefix "+++ b/" - files = append(files, line[6:]) - } else if strings.HasPrefix(line, "+") { - linesAdded++ - } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { - linesDeleted++ - } else { - matches := re.FindAllString(line, -1) - if matches == nil { - continue - } - for _, match := range matches { - // We'll keep only the function name i.e. remove the parentheses and params - functionCall := match[:len(match)-1] - - if _, ok := functionCalls[functionCall]; ok { - functionCalls[functionCall]++ - } else { - functionCalls[functionCall] = 1 + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // "Processor" wg + var pwg sync.WaitGroup + + regionsChan := make(chan int) + linesAddedChan := make(chan int) + linesDeletedChan := make(chan int) + filesChan := make(chan string) + functionCallsChan := make(chan string) + + // Receive lines and process them, then send the result to the appropriate channel defined above. + pwg.Add(1) + go func() { + defer pwg.Done() + for line := range lines { + + pwg.Add(1) + go func(line string) { + defer pwg.Done() + if strings.HasPrefix(line, "@@") { + regionsChan <- 1 + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesChan <- line[6:] + } else if strings.HasPrefix(line, "+") { + linesAddedChan <- 1 + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedChan <- 1 + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + for _, match := range matches { + // We'll keep only the function name i.e. remove the parentheses and params + functionCall := match[:len(match)-1] + functionCallsChan <- functionCall } } + }(line) + } + }() + + // CLose the processing channels + go func() { + pwg.Wait() + close(regionsChan) + close(linesAddedChan) + close(linesDeletedChan) + close(filesChan) + close(functionCallsChan) + }() + + // Workers for each type + var wwg sync.WaitGroup + + wwg.Add(5) + + go func() { + defer wwg.Done() + for range regionsChan { + regions++ + } + }() + + go func() { + defer wwg.Done() + for range linesAddedChan { + linesAdded++ + } + }() + + go func() { + defer wwg.Done() + for range linesDeletedChan { + linesDeleted++ + } + }() + + go func() { + defer wwg.Done() + for file := range filesChan { + files = append(files, file) + } + }() + + go func() { + defer wwg.Done() + for functionCall := range functionCallsChan { + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 } } - return nil - }) + }() + + wwg.Wait() return &result{files, regions, linesAdded, linesDeleted, functionCalls} From 20f022d5d7a4a8341ec834f7103e5577d518533a Mon Sep 17 00:00:00 2001 From: Chady Kamar Date: Fri, 16 Nov 2018 15:13:51 -0500 Subject: [PATCH 3/6] Use strings.TrimSuffix to remove bracket and extract function names --- main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index a85f76a..a0b17f2 100644 --- a/main.go +++ b/main.go @@ -122,8 +122,8 @@ func compute() *result { matches := re.FindAllString(line, -1) for _, match := range matches { - // We'll keep only the function name i.e. remove the parentheses and params - functionCall := match[:len(match)-1] + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") functionCallsChan <- functionCall } } From aa69b14d68b5eaa403e5fc95c5b9b34355816bb5 Mon Sep 17 00:00:00 2001 From: Chady Kamar Date: Fri, 16 Nov 2018 15:16:03 -0500 Subject: [PATCH 4/6] Correct regex to allow for one character function names --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index a0b17f2..a348c40 100644 --- a/main.go +++ b/main.go @@ -40,7 +40,7 @@ func main() { func compute() *result { root := "./diffs" - re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]+\\(") + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") if err != nil { fmt.Println(err) } From 2120ef5be7f946c5c9b139621e1d3c537108f56e Mon Sep 17 00:00:00 2001 From: Chady Kamar Date: Fri, 16 Nov 2018 17:22:24 -0500 Subject: [PATCH 5/6] Benchmarks for different concurrency schemes --- main.go | 558 ++++++++++++++++++++++++++++++++++++++++++++++++++- main_test.go | 41 ++++ 2 files changed, 593 insertions(+), 6 deletions(-) create mode 100644 main_test.go diff --git a/main.go b/main.go index a348c40..1f88d9f 100644 --- a/main.go +++ b/main.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "regexp" + "runtime" "strings" "sync" "time" @@ -26,7 +27,7 @@ func timeTrack(start time.Time, name string) { //to stdout. func main() { defer timeTrack(time.Now(), "compute diff") - fmt.Println(compute()) + fmt.Println(computeConcurrencyChannelsWithWorkers()) } //compute parses the git diffs in ./diffs and returns @@ -92,11 +93,11 @@ func compute() *result { // "Processor" wg var pwg sync.WaitGroup - regionsChan := make(chan int) - linesAddedChan := make(chan int) - linesDeletedChan := make(chan int) - filesChan := make(chan string) - functionCallsChan := make(chan string) + regionsChan := make(chan int, 20) + linesAddedChan := make(chan int, 20) + linesDeletedChan := make(chan int, 20) + filesChan := make(chan string, 20) + functionCallsChan := make(chan string, 20) // Receive lines and process them, then send the result to the appropriate channel defined above. pwg.Add(1) @@ -190,3 +191,548 @@ func compute() *result { return &result{files, regions, linesAdded, linesDeleted, functionCalls} } + +// computeConcurrencyChannelsWithWorkers is the same as compute but it uses a fixed number of workers +// equal to the number of logical CPUs on the machine +func computeConcurrencyChannelsWithWorkers() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // "Processor" wg + var pwg sync.WaitGroup + + regionsChan := make(chan int, 20) + linesAddedChan := make(chan int, 20) + linesDeletedChan := make(chan int, 20) + filesChan := make(chan string, 20) + functionCallsChan := make(chan string, 20) + + // Receive lines and process them, then send the result to the appropriate channel defined above. + pwg.Add(1) + go func() { + defer pwg.Done() + + for w := 1; w <= runtime.NumCPU(); w++ { + pwg.Add(1) + go func() { + defer pwg.Done() + for line := range lines { + + if strings.HasPrefix(line, "@@") { + regionsChan <- 1 + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesChan <- line[6:] + } else if strings.HasPrefix(line, "+") { + linesAddedChan <- 1 + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedChan <- 1 + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + functionCallsChan <- functionCall + } + } + } + }() + } + }() + + // CLose the processing channels + go func() { + pwg.Wait() + close(regionsChan) + close(linesAddedChan) + close(linesDeletedChan) + close(filesChan) + close(functionCallsChan) + }() + + // Workers for each type + var wwg sync.WaitGroup + + wwg.Add(5) + + go func() { + defer wwg.Done() + for range regionsChan { + regions++ + } + }() + + go func() { + defer wwg.Done() + for range linesAddedChan { + linesAdded++ + } + }() + + go func() { + defer wwg.Done() + for range linesDeletedChan { + linesDeleted++ + } + }() + + go func() { + defer wwg.Done() + for file := range filesChan { + files = append(files, file) + } + }() + + go func() { + defer wwg.Done() + for functionCall := range functionCallsChan { + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + }() + + wwg.Wait() + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeConcurrencyReadingOnly only uses goroutines to read the different files +func computeConcurrencyReadingOnly() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // Receive lines and process them, then send the result to the appropriate channel defined above. + + for line := range lines { + + if strings.HasPrefix(line, "@@") { + regions++ + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + files = append(files, line[6:]) + } else if strings.HasPrefix(line, "+") { + linesAdded++ + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeleted++ + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + } + } + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeConcurrencyMutexes is the same as compute, but it uses mutexes instead of channels +func computeConcurrencyMutexes() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // Receive lines and process them, then send the result to the appropriate channel defined above. + + // Mutexes + var regionsMutex sync.Mutex + var linesAddedMutex sync.Mutex + var linesDeletedMutex sync.Mutex + var filesMutex sync.Mutex + var functionCallsMutex sync.Mutex + + var pwg sync.WaitGroup + for line := range lines { + pwg.Add(1) + go func(line string) { + defer pwg.Done() + + if strings.HasPrefix(line, "@@") { + regionsMutex.Lock() + regions++ + regionsMutex.Unlock() + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesMutex.Lock() + files = append(files, line[6:]) + filesMutex.Unlock() + } else if strings.HasPrefix(line, "+") { + linesAddedMutex.Lock() + linesAdded++ + linesAddedMutex.Unlock() + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedMutex.Lock() + linesDeleted++ + linesDeletedMutex.Unlock() + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + + functionCallsMutex.Lock() + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + functionCallsMutex.Unlock() + } + }(line) + } + + pwg.Wait() + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeConcurrencyMutexesWithWorkers is the same as computeConcurrencyMutexes, +// but it doesn't spawn a goroutine for every line, instead it uses a fixed number of workers +// equal to the number of logical CPUs on the machine +func computeConcurrencyMutexesWithWorkers() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // Receive lines and process them, then send the result to the appropriate channel defined above. + + // Mutexes + var regionsMutex sync.Mutex + var linesAddedMutex sync.Mutex + var linesDeletedMutex sync.Mutex + var filesMutex sync.Mutex + var functionCallsMutex sync.Mutex + + var pwg sync.WaitGroup + + numCpus := runtime.NumCPU() + + for w := 1; w <= numCpus; w++ { + pwg.Add(1) + go func() { + defer pwg.Done() + for line := range lines { + if strings.HasPrefix(line, "@@") { + regionsMutex.Lock() + regions++ + regionsMutex.Unlock() + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesMutex.Lock() + files = append(files, line[6:]) + filesMutex.Unlock() + } else if strings.HasPrefix(line, "+") { + linesAddedMutex.Lock() + linesAdded++ + linesAddedMutex.Unlock() + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedMutex.Lock() + linesDeleted++ + linesDeletedMutex.Unlock() + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + + functionCallsMutex.Lock() + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + functionCallsMutex.Unlock() + } + } + }() + } + + pwg.Wait() + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeNoConcurrency reads the files one at a time, and processes them line by line +func computeNoConcurrency() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]+\\(") + if err != nil { + fmt.Println(err) + } + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + + if strings.HasPrefix(line, "@@") { + regions++ + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + files = append(files, line[6:]) + } else if strings.HasPrefix(line, "+") { + linesAdded++ + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeleted++ + } else { + matches := re.FindAllString(line, -1) + if matches == nil { + continue + } + for _, match := range matches { + // We'll keep only the function name i.e. remove the parentheses and params + functionCall := match[:len(match)-1] + + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + } + } + return nil + }) + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..f22492a --- /dev/null +++ b/main_test.go @@ -0,0 +1,41 @@ +package main + +import ( + "testing" +) + +func BenchmarkNoConcurrency(b *testing.B) { + for n := 0; n < b.N; n++ { + computeNoConcurrency() + } +} + +func BenchmarkConcurrencyReadingOnly(b *testing.B) { + for n := 0; n < b.N; n++ { + computeConcurrencyReadingOnly() + } +} + +func BenchmarkConcurrency(b *testing.B) { + for n := 0; n < b.N; n++ { + compute() + } +} + +func BenchmarkConcurrencyMutexes(b *testing.B) { + for n := 0; n < b.N; n++ { + computeConcurrencyMutexes() + } +} + +func BenchmarkConcurrencyMutexesWithWorkers(b *testing.B) { + for n := 0; n < b.N; n++ { + computeConcurrencyMutexesWithWorkers() + } +} + +func BenchmarkConcurrencyChannelsWithWorkers(b *testing.B) { + for n := 0; n < b.N; n++ { + computeConcurrencyChannelsWithWorkers() + } +} From 5041cf936d1f4349d9f438a2e716575fae7bda13 Mon Sep 17 00:00:00 2001 From: Chady Kamar Date: Fri, 16 Nov 2018 17:49:59 -0500 Subject: [PATCH 6/6] Settled on the fastest scheme --- compute_alternatives.go | 713 +++++++++++++++++++++++++++++++++++++++ main.go | 716 +--------------------------------------- main_test.go | 16 +- 3 files changed, 723 insertions(+), 722 deletions(-) create mode 100644 compute_alternatives.go diff --git a/compute_alternatives.go b/compute_alternatives.go new file mode 100644 index 0000000..d6afa07 --- /dev/null +++ b/compute_alternatives.go @@ -0,0 +1,713 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" + "runtime" + "strings" + "sync" +) + +// computeConcurrencyChannelsOneGoroutinePerLine spawns one goroutine to process every line +func computeConcurrencyChannelsOneGoroutinePerLine() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // "Processor" wg + var pwg sync.WaitGroup + + regionsChan := make(chan int, 20) + linesAddedChan := make(chan int, 20) + linesDeletedChan := make(chan int, 20) + filesChan := make(chan string, 20) + functionCallsChan := make(chan string, 20) + + // Receive lines and process them, then send the result to the appropriate channel defined above. + pwg.Add(1) + go func() { + defer pwg.Done() + for line := range lines { + + pwg.Add(1) + go func(line string) { + defer pwg.Done() + if strings.HasPrefix(line, "@@") { + regionsChan <- 1 + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesChan <- line[6:] + } else if strings.HasPrefix(line, "+") { + linesAddedChan <- 1 + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedChan <- 1 + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + functionCallsChan <- functionCall + } + } + }(line) + } + }() + + // CLose the processing channels + go func() { + pwg.Wait() + close(regionsChan) + close(linesAddedChan) + close(linesDeletedChan) + close(filesChan) + close(functionCallsChan) + }() + + // Workers for each type + var wwg sync.WaitGroup + + wwg.Add(5) + + go func() { + defer wwg.Done() + for range regionsChan { + regions++ + } + }() + + go func() { + defer wwg.Done() + for range linesAddedChan { + linesAdded++ + } + }() + + go func() { + defer wwg.Done() + for range linesDeletedChan { + linesDeleted++ + } + }() + + go func() { + defer wwg.Done() + for file := range filesChan { + files = append(files, file) + } + }() + + go func() { + defer wwg.Done() + for functionCall := range functionCallsChan { + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + }() + + wwg.Wait() + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeConcurrencyChannelsOneGoroutinePerCPU is the same as compute but it uses a fixed number of workers +// equal to the number of logical CPUs on the machine +func computeConcurrencyChannelsOneGoroutinePerCPU() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // "Processor" wg + var pwg sync.WaitGroup + + regionsChan := make(chan int, 20) + linesAddedChan := make(chan int, 20) + linesDeletedChan := make(chan int, 20) + filesChan := make(chan string, 20) + functionCallsChan := make(chan string, 20) + + // Receive lines and process them, then send the result to the appropriate channel defined above. + pwg.Add(1) + go func() { + defer pwg.Done() + + for w := 1; w <= runtime.NumCPU(); w++ { + pwg.Add(1) + go func() { + defer pwg.Done() + for line := range lines { + + if strings.HasPrefix(line, "@@") { + regionsChan <- 1 + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesChan <- line[6:] + } else if strings.HasPrefix(line, "+") { + linesAddedChan <- 1 + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedChan <- 1 + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + functionCallsChan <- functionCall + } + } + } + }() + } + }() + + // CLose the processing channels + go func() { + pwg.Wait() + close(regionsChan) + close(linesAddedChan) + close(linesDeletedChan) + close(filesChan) + close(functionCallsChan) + }() + + // Workers for each type + var wwg sync.WaitGroup + + wwg.Add(5) + + go func() { + defer wwg.Done() + for range regionsChan { + regions++ + } + }() + + go func() { + defer wwg.Done() + for range linesAddedChan { + linesAdded++ + } + }() + + go func() { + defer wwg.Done() + for range linesDeletedChan { + linesDeleted++ + } + }() + + go func() { + defer wwg.Done() + for file := range filesChan { + files = append(files, file) + } + }() + + go func() { + defer wwg.Done() + for functionCall := range functionCallsChan { + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + }() + + wwg.Wait() + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeConcurrencyReadingOnly only uses goroutines to read the different files +func computeConcurrencyReadingOnly() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // Receive lines and process them, then send the result to the appropriate channel defined above. + + for line := range lines { + + if strings.HasPrefix(line, "@@") { + regions++ + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + files = append(files, line[6:]) + } else if strings.HasPrefix(line, "+") { + linesAdded++ + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeleted++ + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + } + } + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeConcurrencyMutexesOneGoroutinePerLine is the same as computeConcurrencyChannelsOneGoroutinePerLine, +// but it uses mutexes instead of channels +func computeConcurrencyMutexesOneGoroutinePerLine() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // Receive lines and process them, then send the result to the appropriate channel defined above. + + // Mutexes + var regionsMutex sync.Mutex + var linesAddedMutex sync.Mutex + var linesDeletedMutex sync.Mutex + var filesMutex sync.Mutex + var functionCallsMutex sync.Mutex + + var pwg sync.WaitGroup + for line := range lines { + pwg.Add(1) + go func(line string) { + defer pwg.Done() + + if strings.HasPrefix(line, "@@") { + regionsMutex.Lock() + regions++ + regionsMutex.Unlock() + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesMutex.Lock() + files = append(files, line[6:]) + filesMutex.Unlock() + } else if strings.HasPrefix(line, "+") { + linesAddedMutex.Lock() + linesAdded++ + linesAddedMutex.Unlock() + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedMutex.Lock() + linesDeleted++ + linesDeletedMutex.Unlock() + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + + functionCallsMutex.Lock() + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + functionCallsMutex.Unlock() + } + }(line) + } + + pwg.Wait() + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeConcurrencyMutexesOneGoroutinePerCPU is the same as computeConcurrencyMutexes, +// but it doesn't spawn a goroutine for every line, instead it uses a fixed number of workers +// equal to the number of logical CPUs on the machine +func computeConcurrencyMutexesOneGoroutinePerCPU() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") + if err != nil { + fmt.Println(err) + } + + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + // Reader wg + var rwg sync.WaitGroup + + // lines receives the lines of the diff files from their respective goroutines + lines := make(chan string, 50) + + // Line reader, one goroutine spawned per file + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + rwg.Add(1) + go func() { + defer rwg.Done() + file, err := os.Open(path) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines <- scanner.Text() + } + }() + + return nil + }) + + // Clean up readers + go func() { + rwg.Wait() + close(lines) + }() + + // Receive lines and process them, then send the result to the appropriate channel defined above. + + // Mutexes + var regionsMutex sync.Mutex + var linesAddedMutex sync.Mutex + var linesDeletedMutex sync.Mutex + var filesMutex sync.Mutex + var functionCallsMutex sync.Mutex + + var pwg sync.WaitGroup + + numCpus := runtime.NumCPU() + + for w := 1; w <= numCpus; w++ { + pwg.Add(1) + go func() { + defer pwg.Done() + for line := range lines { + if strings.HasPrefix(line, "@@") { + regionsMutex.Lock() + regions++ + regionsMutex.Unlock() + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + filesMutex.Lock() + files = append(files, line[6:]) + filesMutex.Unlock() + } else if strings.HasPrefix(line, "+") { + linesAddedMutex.Lock() + linesAdded++ + linesAddedMutex.Unlock() + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeletedMutex.Lock() + linesDeleted++ + linesDeletedMutex.Unlock() + } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && + !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { + + matches := re.FindAllString(line, -1) + + functionCallsMutex.Lock() + for _, match := range matches { + // We'll keep only the function name i.e. remove the bracket '(' + functionCall := strings.TrimSuffix(match, "(") + + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + functionCallsMutex.Unlock() + } + } + }() + } + + pwg.Wait() + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} + +// computeNoConcurrency reads the files one at a time, and processes them line by line +func computeNoConcurrency() *result { + root := "./diffs" + + re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]+\\(") + if err != nil { + fmt.Println(err) + } + var regions int + var linesAdded int + var linesDeleted int + var files []string + functionCalls := make(map[string]int) + + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + + if strings.HasPrefix(line, "@@") { + regions++ + } else if strings.HasPrefix(line, "+++") { + // If the file has been renamed or copied we keep the newer name and get rid + // of the prefix "+++ b/" + files = append(files, line[6:]) + } else if strings.HasPrefix(line, "+") { + linesAdded++ + } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { + linesDeleted++ + } else { + matches := re.FindAllString(line, -1) + if matches == nil { + continue + } + for _, match := range matches { + // We'll keep only the function name i.e. remove the parentheses and params + functionCall := match[:len(match)-1] + + if _, ok := functionCalls[functionCall]; ok { + functionCalls[functionCall]++ + } else { + functionCalls[functionCall] = 1 + } + } + } + } + return nil + }) + + return &result{files, regions, linesAdded, linesDeleted, functionCalls} + +} diff --git a/main.go b/main.go index 1f88d9f..39b14b7 100644 --- a/main.go +++ b/main.go @@ -1,14 +1,7 @@ package main import ( - "bufio" "fmt" - "os" - "path/filepath" - "regexp" - "runtime" - "strings" - "sync" "time" ) @@ -27,712 +20,7 @@ func timeTrack(start time.Time, name string) { //to stdout. func main() { defer timeTrack(time.Now(), "compute diff") - fmt.Println(computeConcurrencyChannelsWithWorkers()) + fmt.Println(compute()) } -//compute parses the git diffs in ./diffs and returns -//a result struct that contains all the relevant informations -//about these diffs -// list of files in the diffs -// number of regions -// number of line added -// number of line deleted -// list of function calls seen in the diffs and their number of calls -func compute() *result { - root := "./diffs" - - re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") - if err != nil { - fmt.Println(err) - } - - var regions int - var linesAdded int - var linesDeleted int - var files []string - functionCalls := make(map[string]int) - - // Reader wg - var rwg sync.WaitGroup - - // lines receives the lines of the diff files from their respective goroutines - lines := make(chan string, 50) - - // Line reader, one goroutine spawned per file - filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - rwg.Add(1) - go func() { - defer rwg.Done() - file, err := os.Open(path) - if err != nil { - return - } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - lines <- scanner.Text() - } - }() - - return nil - }) - - // Clean up readers - go func() { - rwg.Wait() - close(lines) - }() - - // "Processor" wg - var pwg sync.WaitGroup - - regionsChan := make(chan int, 20) - linesAddedChan := make(chan int, 20) - linesDeletedChan := make(chan int, 20) - filesChan := make(chan string, 20) - functionCallsChan := make(chan string, 20) - - // Receive lines and process them, then send the result to the appropriate channel defined above. - pwg.Add(1) - go func() { - defer pwg.Done() - for line := range lines { - - pwg.Add(1) - go func(line string) { - defer pwg.Done() - if strings.HasPrefix(line, "@@") { - regionsChan <- 1 - } else if strings.HasPrefix(line, "+++") { - // If the file has been renamed or copied we keep the newer name and get rid - // of the prefix "+++ b/" - filesChan <- line[6:] - } else if strings.HasPrefix(line, "+") { - linesAddedChan <- 1 - } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { - linesDeletedChan <- 1 - } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && - !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { - - matches := re.FindAllString(line, -1) - for _, match := range matches { - // We'll keep only the function name i.e. remove the bracket '(' - functionCall := strings.TrimSuffix(match, "(") - functionCallsChan <- functionCall - } - } - }(line) - } - }() - - // CLose the processing channels - go func() { - pwg.Wait() - close(regionsChan) - close(linesAddedChan) - close(linesDeletedChan) - close(filesChan) - close(functionCallsChan) - }() - - // Workers for each type - var wwg sync.WaitGroup - - wwg.Add(5) - - go func() { - defer wwg.Done() - for range regionsChan { - regions++ - } - }() - - go func() { - defer wwg.Done() - for range linesAddedChan { - linesAdded++ - } - }() - - go func() { - defer wwg.Done() - for range linesDeletedChan { - linesDeleted++ - } - }() - - go func() { - defer wwg.Done() - for file := range filesChan { - files = append(files, file) - } - }() - - go func() { - defer wwg.Done() - for functionCall := range functionCallsChan { - if _, ok := functionCalls[functionCall]; ok { - functionCalls[functionCall]++ - } else { - functionCalls[functionCall] = 1 - } - } - }() - - wwg.Wait() - - return &result{files, regions, linesAdded, linesDeleted, functionCalls} - -} - -// computeConcurrencyChannelsWithWorkers is the same as compute but it uses a fixed number of workers -// equal to the number of logical CPUs on the machine -func computeConcurrencyChannelsWithWorkers() *result { - root := "./diffs" - - re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") - if err != nil { - fmt.Println(err) - } - - var regions int - var linesAdded int - var linesDeleted int - var files []string - functionCalls := make(map[string]int) - - // Reader wg - var rwg sync.WaitGroup - - // lines receives the lines of the diff files from their respective goroutines - lines := make(chan string, 50) - - // Line reader, one goroutine spawned per file - filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - rwg.Add(1) - go func() { - defer rwg.Done() - file, err := os.Open(path) - if err != nil { - return - } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - lines <- scanner.Text() - } - }() - - return nil - }) - - // Clean up readers - go func() { - rwg.Wait() - close(lines) - }() - - // "Processor" wg - var pwg sync.WaitGroup - - regionsChan := make(chan int, 20) - linesAddedChan := make(chan int, 20) - linesDeletedChan := make(chan int, 20) - filesChan := make(chan string, 20) - functionCallsChan := make(chan string, 20) - - // Receive lines and process them, then send the result to the appropriate channel defined above. - pwg.Add(1) - go func() { - defer pwg.Done() - - for w := 1; w <= runtime.NumCPU(); w++ { - pwg.Add(1) - go func() { - defer pwg.Done() - for line := range lines { - - if strings.HasPrefix(line, "@@") { - regionsChan <- 1 - } else if strings.HasPrefix(line, "+++") { - // If the file has been renamed or copied we keep the newer name and get rid - // of the prefix "+++ b/" - filesChan <- line[6:] - } else if strings.HasPrefix(line, "+") { - linesAddedChan <- 1 - } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { - linesDeletedChan <- 1 - } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && - !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { - - matches := re.FindAllString(line, -1) - for _, match := range matches { - // We'll keep only the function name i.e. remove the bracket '(' - functionCall := strings.TrimSuffix(match, "(") - functionCallsChan <- functionCall - } - } - } - }() - } - }() - - // CLose the processing channels - go func() { - pwg.Wait() - close(regionsChan) - close(linesAddedChan) - close(linesDeletedChan) - close(filesChan) - close(functionCallsChan) - }() - - // Workers for each type - var wwg sync.WaitGroup - - wwg.Add(5) - - go func() { - defer wwg.Done() - for range regionsChan { - regions++ - } - }() - - go func() { - defer wwg.Done() - for range linesAddedChan { - linesAdded++ - } - }() - - go func() { - defer wwg.Done() - for range linesDeletedChan { - linesDeleted++ - } - }() - - go func() { - defer wwg.Done() - for file := range filesChan { - files = append(files, file) - } - }() - - go func() { - defer wwg.Done() - for functionCall := range functionCallsChan { - if _, ok := functionCalls[functionCall]; ok { - functionCalls[functionCall]++ - } else { - functionCalls[functionCall] = 1 - } - } - }() - - wwg.Wait() - - return &result{files, regions, linesAdded, linesDeleted, functionCalls} - -} - -// computeConcurrencyReadingOnly only uses goroutines to read the different files -func computeConcurrencyReadingOnly() *result { - root := "./diffs" - - re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") - if err != nil { - fmt.Println(err) - } - - var regions int - var linesAdded int - var linesDeleted int - var files []string - functionCalls := make(map[string]int) - - // Reader wg - var rwg sync.WaitGroup - - // lines receives the lines of the diff files from their respective goroutines - lines := make(chan string, 50) - - // Line reader, one goroutine spawned per file - filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - rwg.Add(1) - go func() { - defer rwg.Done() - file, err := os.Open(path) - if err != nil { - return - } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - lines <- scanner.Text() - } - }() - - return nil - }) - - // Clean up readers - go func() { - rwg.Wait() - close(lines) - }() - - // Receive lines and process them, then send the result to the appropriate channel defined above. - - for line := range lines { - - if strings.HasPrefix(line, "@@") { - regions++ - } else if strings.HasPrefix(line, "+++") { - // If the file has been renamed or copied we keep the newer name and get rid - // of the prefix "+++ b/" - files = append(files, line[6:]) - } else if strings.HasPrefix(line, "+") { - linesAdded++ - } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { - linesDeleted++ - } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && - !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { - - matches := re.FindAllString(line, -1) - for _, match := range matches { - // We'll keep only the function name i.e. remove the bracket '(' - functionCall := strings.TrimSuffix(match, "(") - if _, ok := functionCalls[functionCall]; ok { - functionCalls[functionCall]++ - } else { - functionCalls[functionCall] = 1 - } - } - } - } - - return &result{files, regions, linesAdded, linesDeleted, functionCalls} - -} - -// computeConcurrencyMutexes is the same as compute, but it uses mutexes instead of channels -func computeConcurrencyMutexes() *result { - root := "./diffs" - - re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") - if err != nil { - fmt.Println(err) - } - - var regions int - var linesAdded int - var linesDeleted int - var files []string - functionCalls := make(map[string]int) - - // Reader wg - var rwg sync.WaitGroup - - // lines receives the lines of the diff files from their respective goroutines - lines := make(chan string, 50) - - // Line reader, one goroutine spawned per file - filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - rwg.Add(1) - go func() { - defer rwg.Done() - file, err := os.Open(path) - if err != nil { - return - } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - lines <- scanner.Text() - } - }() - - return nil - }) - - // Clean up readers - go func() { - rwg.Wait() - close(lines) - }() - - // Receive lines and process them, then send the result to the appropriate channel defined above. - - // Mutexes - var regionsMutex sync.Mutex - var linesAddedMutex sync.Mutex - var linesDeletedMutex sync.Mutex - var filesMutex sync.Mutex - var functionCallsMutex sync.Mutex - - var pwg sync.WaitGroup - for line := range lines { - pwg.Add(1) - go func(line string) { - defer pwg.Done() - - if strings.HasPrefix(line, "@@") { - regionsMutex.Lock() - regions++ - regionsMutex.Unlock() - } else if strings.HasPrefix(line, "+++") { - // If the file has been renamed or copied we keep the newer name and get rid - // of the prefix "+++ b/" - filesMutex.Lock() - files = append(files, line[6:]) - filesMutex.Unlock() - } else if strings.HasPrefix(line, "+") { - linesAddedMutex.Lock() - linesAdded++ - linesAddedMutex.Unlock() - } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { - linesDeletedMutex.Lock() - linesDeleted++ - linesDeletedMutex.Unlock() - } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && - !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { - - matches := re.FindAllString(line, -1) - - functionCallsMutex.Lock() - for _, match := range matches { - // We'll keep only the function name i.e. remove the bracket '(' - functionCall := strings.TrimSuffix(match, "(") - - if _, ok := functionCalls[functionCall]; ok { - functionCalls[functionCall]++ - } else { - functionCalls[functionCall] = 1 - } - } - functionCallsMutex.Unlock() - } - }(line) - } - - pwg.Wait() - - return &result{files, regions, linesAdded, linesDeleted, functionCalls} - -} - -// computeConcurrencyMutexesWithWorkers is the same as computeConcurrencyMutexes, -// but it doesn't spawn a goroutine for every line, instead it uses a fixed number of workers -// equal to the number of logical CPUs on the machine -func computeConcurrencyMutexesWithWorkers() *result { - root := "./diffs" - - re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]*\\(") - if err != nil { - fmt.Println(err) - } - - var regions int - var linesAdded int - var linesDeleted int - var files []string - functionCalls := make(map[string]int) - - // Reader wg - var rwg sync.WaitGroup - - // lines receives the lines of the diff files from their respective goroutines - lines := make(chan string, 50) - - // Line reader, one goroutine spawned per file - filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - rwg.Add(1) - go func() { - defer rwg.Done() - file, err := os.Open(path) - if err != nil { - return - } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - lines <- scanner.Text() - } - }() - - return nil - }) - - // Clean up readers - go func() { - rwg.Wait() - close(lines) - }() - - // Receive lines and process them, then send the result to the appropriate channel defined above. - - // Mutexes - var regionsMutex sync.Mutex - var linesAddedMutex sync.Mutex - var linesDeletedMutex sync.Mutex - var filesMutex sync.Mutex - var functionCallsMutex sync.Mutex - - var pwg sync.WaitGroup - - numCpus := runtime.NumCPU() - - for w := 1; w <= numCpus; w++ { - pwg.Add(1) - go func() { - defer pwg.Done() - for line := range lines { - if strings.HasPrefix(line, "@@") { - regionsMutex.Lock() - regions++ - regionsMutex.Unlock() - } else if strings.HasPrefix(line, "+++") { - // If the file has been renamed or copied we keep the newer name and get rid - // of the prefix "+++ b/" - filesMutex.Lock() - files = append(files, line[6:]) - filesMutex.Unlock() - } else if strings.HasPrefix(line, "+") { - linesAddedMutex.Lock() - linesAdded++ - linesAddedMutex.Unlock() - } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { - linesDeletedMutex.Lock() - linesDeleted++ - linesDeletedMutex.Unlock() - } else if !strings.HasPrefix(line, "-") && !strings.HasPrefix(line[1:], "#") && - !strings.HasPrefix(line[1:], "//") && !strings.HasPrefix(line[1:], "/*") { - - matches := re.FindAllString(line, -1) - - functionCallsMutex.Lock() - for _, match := range matches { - // We'll keep only the function name i.e. remove the bracket '(' - functionCall := strings.TrimSuffix(match, "(") - - if _, ok := functionCalls[functionCall]; ok { - functionCalls[functionCall]++ - } else { - functionCalls[functionCall] = 1 - } - } - functionCallsMutex.Unlock() - } - } - }() - } - - pwg.Wait() - - return &result{files, regions, linesAdded, linesDeleted, functionCalls} - -} - -// computeNoConcurrency reads the files one at a time, and processes them line by line -func computeNoConcurrency() *result { - root := "./diffs" - - re, err := regexp.Compile("[A-Za-z_][A-Za-z0-9_]+\\(") - if err != nil { - fmt.Println(err) - } - var regions int - var linesAdded int - var linesDeleted int - var files []string - functionCalls := make(map[string]int) - - filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - - file, err := os.Open(path) - if err != nil { - return err - } - defer file.Close() - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - - if strings.HasPrefix(line, "@@") { - regions++ - } else if strings.HasPrefix(line, "+++") { - // If the file has been renamed or copied we keep the newer name and get rid - // of the prefix "+++ b/" - files = append(files, line[6:]) - } else if strings.HasPrefix(line, "+") { - linesAdded++ - } else if strings.HasPrefix(line, "-") && !strings.HasPrefix(line, "---") { - linesDeleted++ - } else { - matches := re.FindAllString(line, -1) - if matches == nil { - continue - } - for _, match := range matches { - // We'll keep only the function name i.e. remove the parentheses and params - functionCall := match[:len(match)-1] - - if _, ok := functionCalls[functionCall]; ok { - functionCalls[functionCall]++ - } else { - functionCalls[functionCall] = 1 - } - } - } - } - return nil - }) - - return &result{files, regions, linesAdded, linesDeleted, functionCalls} - -} +var compute = computeConcurrencyChannelsOneGoroutinePerCPU diff --git a/main_test.go b/main_test.go index f22492a..b5b5427 100644 --- a/main_test.go +++ b/main_test.go @@ -16,26 +16,26 @@ func BenchmarkConcurrencyReadingOnly(b *testing.B) { } } -func BenchmarkConcurrency(b *testing.B) { +func BenchmarkConcurrencyChannelsOneGoroutinePerLine(b *testing.B) { for n := 0; n < b.N; n++ { - compute() + computeConcurrencyChannelsOneGoroutinePerLine() } } -func BenchmarkConcurrencyMutexes(b *testing.B) { +func BenchmarkConcurrencyMutexesOneGoroutinePerLine(b *testing.B) { for n := 0; n < b.N; n++ { - computeConcurrencyMutexes() + computeConcurrencyMutexesOneGoroutinePerLine() } } -func BenchmarkConcurrencyMutexesWithWorkers(b *testing.B) { +func BenchmarkConcurrencyMutexesOneGoroutinePerCPU(b *testing.B) { for n := 0; n < b.N; n++ { - computeConcurrencyMutexesWithWorkers() + computeConcurrencyMutexesOneGoroutinePerCPU() } } -func BenchmarkConcurrencyChannelsWithWorkers(b *testing.B) { +func BenchmarkConcurrencyChannelsOneGoroutinePerCPU(b *testing.B) { for n := 0; n < b.N; n++ { - computeConcurrencyChannelsWithWorkers() + computeConcurrencyChannelsOneGoroutinePerCPU() } }