From fa005dd1028b4d5f64444ff0e83ce33eb6bd5c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1ximo=20Cuadros?= Date: Tue, 5 May 2020 09:33:36 +0200 Subject: [PATCH 1/5] *: a bit of house keeping --- regex.go | 154 ++++++++++++++++++++++++------------------------------- 1 file changed, 66 insertions(+), 88 deletions(-) diff --git a/regex.go b/regex.go index 00f0c0e..e5e5b03 100644 --- a/regex.go +++ b/regex.go @@ -21,8 +21,6 @@ import ( "unsafe" ) -type strRange []int - const numMatchStartSize = 4 const numReadBufferStartSize = 256 @@ -70,8 +68,6 @@ func NewRegexpASCII(pattern string, option int) (*Regexp, error) { } func initRegexp(re *Regexp, option int) (*Regexp, error) { - var err error - patternCharPtr := C.CString(re.pattern) defer C.free(unsafe.Pointer(patternCharPtr)) @@ -80,20 +76,20 @@ func initRegexp(re *Regexp, option int) (*Regexp, error) { errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.region, &re.encoding, &re.errorInfo, &re.errorBuf) if errorCode != C.ONIG_NORMAL { - err = errors.New(C.GoString(re.errorBuf)) - } else { - err = nil - numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1 - re.matchData = &MatchData{} - re.matchData.indexes = make([][]int32, numMatchStartSize) - for i := 0; i < numMatchStartSize; i++ { - re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2) - } - re.namedGroupInfo = re.getNamedGroupInfo() - runtime.SetFinalizer(re, (*Regexp).Free) + return re, errors.New(C.GoString(re.errorBuf)) } - return re, err + numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1 + re.matchData = &MatchData{} + re.matchData.indexes = make([][]int32, numMatchStartSize) + for i := 0; i < numMatchStartSize; i++ { + re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2) + } + + re.namedGroupInfo = re.getNamedGroupInfo() + runtime.SetFinalizer(re, (*Regexp).Free) + + return re, nil } func Compile(str string) (*Regexp, error) { @@ -105,6 +101,7 @@ func MustCompile(str string) *Regexp { if error != nil { panic("regexp: compiling " + str + ": " + error.Error()) } + return regexp } @@ -117,6 +114,7 @@ func MustCompileWithOption(str string, option int) *Regexp { if error != nil { panic("regexp: compiling " + str + ": " + error.Error()) } + return regexp } @@ -126,6 +124,7 @@ func MustCompileASCII(str string) *Regexp { if error != nil { panic("regexp: compiling " + str + ": " + error.Error()) } + return regexp } @@ -167,7 +166,8 @@ func (re *Regexp) Free() { func (re *Regexp) getNamedGroupInfo() NamedGroupInfo { numNamedGroups := int(C.onig_number_of_names(re.regex)) - //when any named capture exisits, there is no numbered capture even if there are unnamed captures + // when any named capture exists, there is no numbered capture even if + // there are unnamed captures. if numNamedGroups == 0 { return nil } @@ -202,23 +202,6 @@ func (re *Regexp) getNamedGroupInfo() NamedGroupInfo { return namedGroupInfo } -func (re *Regexp) groupNameToId(name string) int { - if re.namedGroupInfo == nil { - return ONIGERR_UNDEFINED_NAME_REFERENCE - } - - return re.namedGroupInfo[name] -} - -func (re *Regexp) processMatch(numCaptures int) []int32 { - if numCaptures <= 0 { - panic("cannot have 0 captures when processing a match") - } - - matchData := re.matchData - return matchData.indexes[matchData.count][:numCaptures*2] -} - func (re *Regexp) ClearMatchData() { matchData := re.matchData matchData.count = 0 @@ -303,7 +286,7 @@ func (re *Regexp) findAll(b []byte, n int) [][]int { } matchData := re.matchData - offset := 0 + var offset int for offset <= n { if matchData.count >= len(matchData.indexes) { length := len(matchData.indexes[0]) @@ -316,9 +299,11 @@ func (re *Regexp) findAll(b []byte, n int) [][]int { } matchData.count++ - //move offset to the ending index of the current match and prepare to find the next non-overlapping match + // move offset to the ending index of the current match and prepare to + // find the next non-overlapping match. offset = match[1] - //if match[0] == match[1], it means the current match does not advance the search. we need to exit the loop to avoid getting stuck here. + // if match[0] == match[1], it means the current match does not advance + // the search. we need to exit the loop to avoid getting stuck here. if match[0] == match[1] { if offset < n && offset >= 0 { //there are more bytes, so move offset by a word @@ -389,10 +374,12 @@ func (re *Regexp) FindAll(b []byte, n int) [][]byte { if matches == nil { return nil } + matchBytes := make([][]byte, 0, len(matches)) for _, match := range matches { matchBytes = append(matchBytes, getCapture(b, match[0], match[1])) } + return matchBytes } @@ -412,6 +399,7 @@ func (re *Regexp) FindAllString(s string, n int) []string { matchStrings = append(matchStrings, string(m)) } } + return matchStrings } @@ -553,36 +541,18 @@ func (re *Regexp) NumSubexp() int { return (int)(C.onig_number_of_captures(re.regex)) } -func (re *Regexp) getNamedCapture(name []byte, capturedBytes [][]byte) []byte { - nameStr := string(name) - capNum := re.groupNameToId(nameStr) - if capNum < 0 || capNum >= len(capturedBytes) { - panic(fmt.Sprintf("capture group name (%q) has error\n", nameStr)) - } - return capturedBytes[capNum] -} - -func (re *Regexp) getNumberedCapture(num int, capturedBytes [][]byte) []byte { - //when named capture groups exist, numbered capture groups returns "" - if re.namedGroupInfo == nil && num <= (len(capturedBytes)-1) && num >= 0 { - return capturedBytes[num] - } - return ([]byte)("") -} - func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) []byte { replLen := len(repl) newRepl := make([]byte, 0, replLen*3) - inEscapeMode := false - inGroupNameMode := false groupName := make([]byte, 0, replLen) - for index := 0; index < replLen; index += 1 { + + var inGroupNameMode, inEscapeMode bool + for index := 0; index < replLen; index++ { ch := repl[index] if inGroupNameMode && ch == byte('<') { } else if inGroupNameMode && ch == byte('>') { inGroupNameMode = false - groupNameStr := string(groupName) - capBytes := capturedBytes[groupNameStr] + capBytes := capturedBytes[string(groupName)] newRepl = append(newRepl, capBytes...) groupName = groupName[:0] //reset the name } else if inGroupNameMode { @@ -594,7 +564,7 @@ func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) } else if inEscapeMode && ch == byte('k') && (index+1) < replLen && repl[index+1] == byte('<') { inGroupNameMode = true inEscapeMode = false - index += 1 //bypass the next char '<' + index++ //bypass the next char '<' } else if inEscapeMode { newRepl = append(newRepl, '\\') newRepl = append(newRepl, ch) @@ -605,6 +575,7 @@ func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) inEscapeMode = !inEscapeMode } } + return newRepl } @@ -619,6 +590,7 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map for i, match := range matches { length := len(match) / 2 capturedBytes := make(map[string][]byte) + if re.namedGroupInfo == nil { for j := 0; j < length; j++ { capturedBytes[strconv.Itoa(j)] = getCapture(src, match[2*j], match[2*j+1]) @@ -628,6 +600,7 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map capturedBytes[name] = getCapture(src, match[2*j], match[2*j+1]) } } + matchBytes := getCapture(src, match[0], match[1]) newRepl := replFunc(repl, matchBytes, capturedBytes) prevEnd := 0 @@ -635,15 +608,19 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map prevMatch := matches[i-1][:2] prevEnd = prevMatch[1] } + if match[0] > prevEnd && prevEnd >= 0 && match[0] <= srcLen { dest = append(dest, src[prevEnd:match[0]]...) } + dest = append(dest, newRepl...) } + lastEnd := matches[len(matches)-1][1] if lastEnd < srcLen && lastEnd >= 0 { dest = append(dest, src[lastEnd:]...) } + return dest } @@ -652,7 +629,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte { } func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { - return re.replaceAll(src, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { + return re.replaceAll(src, nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { return repl(matchBytes) }) } @@ -662,7 +639,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string { } func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { - return string(re.replaceAll([]byte(src), []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { + return string(re.replaceAll([]byte(src), nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { return []byte(repl(string(matchBytes))) })) } @@ -674,32 +651,35 @@ func (re *Regexp) String() string { return re.pattern } -func grow_buffer(b []byte, offset int, n int) []byte { +func growBuffer(b []byte, offset int, n int) []byte { if offset+n > cap(b) { buf := make([]byte, 2*cap(b)+n) copy(buf, b[:offset]) return buf } + return b } func fromReader(r io.RuneReader) []byte { b := make([]byte, numReadBufferStartSize) - offset := 0 - var err error = nil - for err == nil { + + var offset int + for { rune, runeWidth, err := r.ReadRune() - if err == nil { - b = grow_buffer(b, offset, runeWidth) - writeWidth := utf8.EncodeRune(b[offset:], rune) - if runeWidth != writeWidth { - panic("reading rune width not equal to the written rune width") - } - offset += writeWidth - } else { + if err != nil { break } + + b = growBuffer(b, offset, runeWidth) + writeWidth := utf8.EncodeRune(b[offset:], rune) + if runeWidth != writeWidth { + panic("reading rune width not equal to the written rune width") + } + + offset += writeWidth } + return b[:offset] } @@ -728,27 +708,25 @@ func MatchString(pattern string, s string) (matched bool, error error) { if err != nil { return false, err } + return re.MatchString(s), nil } func (re *Regexp) Gsub(src, repl string) string { - srcBytes := ([]byte)(src) - replBytes := ([]byte)(repl) - replaced := re.replaceAll(srcBytes, replBytes, fillCapturedValues) - - return string(replaced) + return string(re.replaceAll([]byte(src), []byte(repl), fillCapturedValues)) } func (re *Regexp) GsubFunc(src string, replFunc func(string, map[string]string) string) string { - srcBytes := ([]byte)(src) - replaced := re.replaceAll(srcBytes, nil, func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte { - capturedStrings := make(map[string]string) - for name, capBytes := range capturedBytes { - capturedStrings[name] = string(capBytes) - } - matchString := string(matchBytes) - return ([]byte)(replFunc(matchString, capturedStrings)) - }) + replaced := re.replaceAll([]byte(src), nil, + func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte { + capturedStrings := make(map[string]string) + for name, capBytes := range capturedBytes { + capturedStrings[name] = string(capBytes) + } + matchString := string(matchBytes) + return ([]byte)(replFunc(matchString, capturedStrings)) + }, + ) return string(replaced) } From dd3559755cb8b4f759514ec9c7918b3c516b220f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1ximo=20Cuadros?= Date: Tue, 5 May 2020 11:48:04 +0200 Subject: [PATCH 2/5] regexp: remove matchData, to avoid colissions --- regex.go | 129 +++++++++++++++++++------------------------------- regex_test.go | 2 +- 2 files changed, 50 insertions(+), 81 deletions(-) diff --git a/regex.go b/regex.go index e5e5b03..714883a 100644 --- a/regex.go +++ b/regex.go @@ -26,21 +26,17 @@ const numReadBufferStartSize = 256 var mutex sync.Mutex -type MatchData struct { - count int - indexes [][]int32 -} - type NamedGroupInfo map[string]int type Regexp struct { - pattern string - regex C.OnigRegex - region *C.OnigRegion - encoding C.OnigEncoding - errorInfo *C.OnigErrorInfo - errorBuf *C.char - matchData *MatchData + pattern string + regex C.OnigRegex + region *C.OnigRegion + encoding C.OnigEncoding + errorInfo *C.OnigErrorInfo + errorBuf *C.char + + numCaptures int32 namedGroupInfo NamedGroupInfo mutex *sync.Mutex } @@ -79,14 +75,9 @@ func initRegexp(re *Regexp, option int) (*Regexp, error) { return re, errors.New(C.GoString(re.errorBuf)) } - numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1 - re.matchData = &MatchData{} - re.matchData.indexes = make([][]int32, numMatchStartSize) - for i := 0; i < numMatchStartSize; i++ { - re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2) - } - + re.numCaptures = int32(C.onig_number_of_captures(re.regex)) + 1 re.namedGroupInfo = re.getNamedGroupInfo() + runtime.SetFinalizer(re, (*Regexp).Free) return re, nil @@ -202,106 +193,95 @@ func (re *Regexp) getNamedGroupInfo() NamedGroupInfo { return namedGroupInfo } -func (re *Regexp) ClearMatchData() { - matchData := re.matchData - matchData.count = 0 -} - func (re *Regexp) find(b []byte, n int, offset int) []int { re.lock() defer re.unlock() - var match []int + match := make([]int, re.numCaptures*2) if n == 0 { b = []byte{0} } - ptr := unsafe.Pointer(&b[0]) - matchData := re.matchData - capturesPtr := unsafe.Pointer(&(matchData.indexes[matchData.count][0])) - numCaptures := int32(0) + bytesPtr := unsafe.Pointer(&b[0]) + + // captures contains two pairs of ints, start and end, so we need list + // twice the size of the capture groups. + captures := make([]C.int, re.numCaptures*2) + capturesPtr := unsafe.Pointer(&captures[0]) + + var numCaptures int32 numCapturesPtr := unsafe.Pointer(&numCaptures) - pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr))) - if pos >= 0 { - if numCaptures <= 0 { - panic("cannot have 0 captures when processing a match") - } - match2 := matchData.indexes[matchData.count][:numCaptures*2] - match = make([]int, len(match2)) - for i := range match2 { - match[i] = int(match2[i]) - } + pos := int(C.SearchOnigRegex( + bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), + re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr), + )) - numCapturesInPattern := int32(C.onig_number_of_captures(re.regex)) + 1 - if numCapturesInPattern != numCaptures { - panic(fmt.Errorf("expected %d captures but got %d", numCapturesInPattern, numCaptures)) - } + if pos < 0 { + return nil } - return re.copySlice(match) -} + if numCaptures <= 0 { + panic("cannot have 0 captures when processing a match") + } -func (re *Regexp) copySlice(indices []int) (result []int) { - if re.mutex == nil { - return indices + if re.numCaptures != numCaptures { + panic(fmt.Errorf("expected %d captures but got %d", re.numCaptures, numCaptures)) } - if indices != nil { - result = make([]int, len(indices)) - copy(result, indices) + for i := range captures { + match[i] = int(captures[i]) } - return result + return match } func getCapture(b []byte, beg int, end int) []byte { if beg < 0 || end < 0 { return nil } + return b[beg:end] } func (re *Regexp) match(b []byte, n int, offset int) bool { - re.lock() - defer re.unlock() + if n == 0 { + return true + } - re.ClearMatchData() if n == 0 { b = []byte{0} } - ptr := unsafe.Pointer(&b[0]) - pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(nil), (*C.int)(nil))) + bytesPtr := unsafe.Pointer(&b[0]) + pos := int(C.SearchOnigRegex( + bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), + re.regex, re.region, re.errorInfo, nil, nil, nil, + )) + return pos >= 0 } func (re *Regexp) findAll(b []byte, n int) [][]int { - var matches [][]int - re.ClearMatchData() - if n < 0 { n = len(b) } - matchData := re.matchData + capture := make([][]int, 0, numMatchStartSize) var offset int for offset <= n { - if matchData.count >= len(matchData.indexes) { - length := len(matchData.indexes[0]) - matchData.indexes = append(matchData.indexes, make([]int32, length)) - } - match := re.find(b, n, offset) - if len(match) == 0 { + if match == nil { break } - matchData.count++ + capture = append(capture, match) + // move offset to the ending index of the current match and prepare to // find the next non-overlapping match. offset = match[1] + // if match[0] == match[1], it means the current match does not advance // the search. we need to exit the loop to avoid getting stuck here. if match[0] == match[1] { @@ -316,20 +296,10 @@ func (re *Regexp) findAll(b []byte, n int) [][]int { } } - matches2 := matchData.indexes[:matchData.count] - matches = make([][]int, len(matches2)) - for i, v := range matches2 { - matches[i] = make([]int, len(v)) - for j, v2 := range v { - matches[i][j] = int(v2) - } - } - - return matches + return capture } func (re *Regexp) FindIndex(b []byte) []int { - re.ClearMatchData() match := re.find(b, len(b), 0) if len(match) == 0 { return nil @@ -409,7 +379,6 @@ func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { } func (re *Regexp) FindSubmatchIndex(b []byte) []int { - re.ClearMatchData() match := re.find(b, len(b), 0) if len(match) == 0 { return nil diff --git a/regex_test.go b/regex_test.go index 1af8adf..9c53195 100644 --- a/regex_test.go +++ b/regex_test.go @@ -529,7 +529,7 @@ type FindTest struct { } func (t FindTest) String() string { - return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text) + return fmt.Sprintf("pattern: %#q text: %#q", t.pat, t.text) } var findTests = []FindTest{ From 3302cabf6cff6a102e174f6f9c24a6030018820a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1ximo=20Cuadros?= Date: Tue, 5 May 2020 11:50:13 +0200 Subject: [PATCH 3/5] regexp: remove mutex --- regex.go | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/regex.go b/regex.go index 714883a..8fb6546 100644 --- a/regex.go +++ b/regex.go @@ -38,23 +38,10 @@ type Regexp struct { numCaptures int32 namedGroupInfo NamedGroupInfo - mutex *sync.Mutex } // NewRegexp creates and initializes a new Regexp with the given pattern and option. func NewRegexp(pattern string, option int) (*Regexp, error) { - re, err := initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_UTF8}, option) - if err != nil { - return nil, err - } - - re.mutex = new(sync.Mutex) - return re, nil -} - -// NewRegexpNonThreadsafe creates and initializes a new Regexp with the given -// pattern and option. The resulting regexp is not thread-safe. -func NewRegexpNonThreadsafe(pattern string, option int) (*Regexp, error) { return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_UTF8}, option) } @@ -119,22 +106,7 @@ func MustCompileASCII(str string) *Regexp { return regexp } -func (re *Regexp) lock() { - if re.mutex != nil { - re.mutex.Lock() - } -} - -func (re *Regexp) unlock() { - if re.mutex != nil { - re.mutex.Unlock() - } -} - func (re *Regexp) Free() { - re.lock() - defer re.unlock() - mutex.Lock() if re.regex != nil { C.onig_free(re.regex) @@ -194,9 +166,6 @@ func (re *Regexp) getNamedGroupInfo() NamedGroupInfo { } func (re *Regexp) find(b []byte, n int, offset int) []int { - re.lock() - defer re.unlock() - match := make([]int, re.numCaptures*2) if n == 0 { @@ -504,9 +473,6 @@ func (re *Regexp) MatchString(s string) bool { } func (re *Regexp) NumSubexp() int { - re.lock() - defer re.unlock() - return (int)(C.onig_number_of_captures(re.regex)) } @@ -614,9 +580,6 @@ func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) str } func (re *Regexp) String() string { - re.lock() - defer re.unlock() - return re.pattern } From b40dc80d2ffa8706878ee8d2be16cabd6c08a000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1ximo=20Cuadros?= Date: Tue, 5 May 2020 12:53:58 +0200 Subject: [PATCH 4/5] regexp: Match, remove test code --- regex.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/regex.go b/regex.go index 8fb6546..486412c 100644 --- a/regex.go +++ b/regex.go @@ -215,10 +215,6 @@ func getCapture(b []byte, beg int, end int) []byte { } func (re *Regexp) match(b []byte, n int, offset int) bool { - if n == 0 { - return true - } - if n == 0 { b = []byte{0} } From 9c8424ed7c79e3e0d89f7e22fedb1eb411c7ba83 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 6 May 2020 16:52:01 +0200 Subject: [PATCH 5/5] Create new region for each regexp usage --- chelper.c | 22 +++++++++++++++------- chelper.h | 8 ++++---- regex.go | 11 +++-------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/chelper.c b/chelper.c index d768a77..035f3b4 100644 --- a/chelper.c +++ b/chelper.c @@ -7,7 +7,7 @@ #include "chelper.h" int NewOnigRegex( char *pattern, int pattern_length, int option, - OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) { + OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) { int ret = ONIG_NORMAL; int error_msg_len = 0; @@ -23,8 +23,6 @@ int NewOnigRegex( char *pattern, int pattern_length, int option, memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char)); - *region = onig_region_new(); - ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info); if (ret != ONIG_NORMAL) { @@ -38,9 +36,10 @@ int NewOnigRegex( char *pattern, int pattern_length, int option, } int SearchOnigRegex( void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) { + OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) { int ret = ONIG_MISMATCH; int error_msg_len = 0; + OnigRegion *region; #ifdef BENCHMARK_CHELP struct timeval tim1, tim2; long t; @@ -55,6 +54,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option, gettimeofday(&tim1, NULL); #endif + region = onig_region_new(); + ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option); if (ret < 0 && error_buffer != NULL) { error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info); @@ -74,6 +75,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option, *numCaptures = count; } + onig_region_free(region, 1); + #ifdef BENCHMARK_CHELP gettimeofday(&tim2, NULL); t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; @@ -83,9 +86,10 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option, } int MatchOnigRegex(void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region) { + OnigRegex regex) { int ret = ONIG_MISMATCH; int error_msg_len = 0; + OnigRegion *region; #ifdef BENCHMARK_CHELP struct timeval tim1, tim2; long t; @@ -98,7 +102,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option, #ifdef BENCHMARK_CHELP gettimeofday(&tim1, NULL); #endif + region = onig_region_new(); ret = onig_match(regex, str_start, str_end, search_start, region, option); + onig_region_free(region, 1); #ifdef BENCHMARK_CHELP gettimeofday(&tim2, NULL); t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; @@ -108,8 +114,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option, } int LookupOnigCaptureByName(char *name, int name_length, - OnigRegex regex, OnigRegion *region) { + OnigRegex regex) { int ret = ONIGERR_UNDEFINED_NAME_REFERENCE; + OnigRegion *region; #ifdef BENCHMARK_CHELP struct timeval tim1, tim2; long t; @@ -119,7 +126,9 @@ int LookupOnigCaptureByName(char *name, int name_length, #ifdef BENCHMARK_CHELP gettimeofday(&tim1, NULL); #endif + region = onig_region_new(); ret = onig_name_to_backref_number(regex, name_start, name_end, region); + onig_region_free(region, 1); #ifdef BENCHMARK_CHELP gettimeofday(&tim2, NULL); t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; @@ -181,4 +190,3 @@ int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbe onig_foreach_name(reg, name_callback, (void* )&groupInfo); return groupInfo.bufferOffset; } - diff --git a/chelper.h b/chelper.h index 7926fc2..4d00e7f 100644 --- a/chelper.h +++ b/chelper.h @@ -1,14 +1,14 @@ #include extern int NewOnigRegex( char *pattern, int pattern_length, int option, - OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); + OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); extern int SearchOnigRegex( void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); + OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); extern int MatchOnigRegex( void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region); + OnigRegex regex); -extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region); +extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex); extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers); diff --git a/regex.go b/regex.go index 486412c..fbe661a 100644 --- a/regex.go +++ b/regex.go @@ -31,7 +31,6 @@ type NamedGroupInfo map[string]int type Regexp struct { pattern string regex C.OnigRegex - region *C.OnigRegion encoding C.OnigEncoding errorInfo *C.OnigErrorInfo errorBuf *C.char @@ -57,7 +56,7 @@ func initRegexp(re *Regexp, option int) (*Regexp, error) { mutex.Lock() defer mutex.Unlock() - errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.region, &re.encoding, &re.errorInfo, &re.errorBuf) + errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.encoding, &re.errorInfo, &re.errorBuf) if errorCode != C.ONIG_NORMAL { return re, errors.New(C.GoString(re.errorBuf)) } @@ -112,10 +111,6 @@ func (re *Regexp) Free() { C.onig_free(re.regex) re.regex = nil } - if re.region != nil { - C.onig_region_free(re.region, 1) - re.region = nil - } mutex.Unlock() if re.errorInfo != nil { C.free(unsafe.Pointer(re.errorInfo)) @@ -184,7 +179,7 @@ func (re *Regexp) find(b []byte, n int, offset int) []int { pos := int(C.SearchOnigRegex( bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), - re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr), + re.regex, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr), )) if pos < 0 { @@ -222,7 +217,7 @@ func (re *Regexp) match(b []byte, n int, offset int) bool { bytesPtr := unsafe.Pointer(&b[0]) pos := int(C.SearchOnigRegex( bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), - re.regex, re.region, re.errorInfo, nil, nil, nil, + re.regex, re.errorInfo, nil, nil, nil, )) return pos >= 0