@@ -78,15 +78,15 @@ public int match(String s, int index, MatchContext context) {
7878 }
7979 return -1 ;
8080 }
81- var i = StringUtils . indexOfIgnoreCase (s , text , index );
81+ var i = findTextWithBoundary (s , text . strip () , index );
8282 while (i != -1 ) {
8383 var toParse = s .substring (index , i ).strip ();
8484 var expression = parse (toParse , typeArray , context .getParserState (), logger );
8585 if (expression .isPresent ()) {
8686 context .addExpression (expression .get ());
8787 return index + toParse .length ();
8888 }
89- i = StringUtils . indexOfIgnoreCase (s , text , i + 1 );
89+ i = findTextWithBoundary (s , text . strip () , i + 1 );
9090 }
9191 } else if (possibleInput instanceof RegexGroup ) {
9292 var m = ((RegexGroup ) possibleInput ).getPattern ().matcher (s ).region (index , s .length ());
@@ -106,23 +106,93 @@ public int match(String s, int index, MatchContext context) {
106106 }
107107 } else {
108108 assert possibleInput instanceof ExpressionElement ;
109- var nextPossibleInputs = PatternElement .getPossibleInputs (flattened .subList (context .getPatternIndex () + 1 , flattened .size ()));
109+ // Find the index of the next expression element in the flattened list
110+ // We need to find the first ExpressionElement after the current position
111+ var expressionIndex = -1 ;
112+ for (var i = possibilityIndex + 1 ; i < flattened .size (); i ++) {
113+ var elem = flattened .get (i );
114+ // Skip optional groups and look inside them
115+ if (elem instanceof OptionalGroup ) {
116+ var inner = PatternElement .flatten (((OptionalGroup ) elem ).getElement ());
117+ if (inner .stream ().anyMatch (e -> e instanceof ExpressionElement )) {
118+ continue ; // Skip optional groups containing expressions
119+ }
120+ } else if (elem instanceof ExpressionElement ) {
121+ expressionIndex = i ;
122+ break ;
123+ }
124+ }
125+ if (expressionIndex == -1 ) {
126+ continue ;
127+ }
128+ // When the expression is the last element, nextPossibleInputs will contain "\0" (end of line)
129+ // which we handle below, so we should NOT skip this case!
130+ var nextPossibleInputs = PatternElement .getPossibleInputs (flattened .subList (expressionIndex + 1 , flattened .size ()));
110131 if (nextPossibleInputs .stream ().anyMatch (pe -> !(pe instanceof TextElement ))) {
111132 continue ;
112133 }
113134 for (var nextPossibleInput : nextPossibleInputs ) {
114135 var text = ((TextElement ) nextPossibleInput ).getText ();
115- if (text .equals ("" )) {
136+ if (text .equals ("\0 " )) {
137+ // End of line marker - parse the rest and we're done
116138 var rest = s .substring (index );
117139 var splits = splitAtSpaces (rest );
118- for (var split : splits ) {
119- var i = StringUtils .indexOfIgnoreCase (s , split , index );
120- if (i != -1 ) {
121- var toParse = s .substring (index , i );
122- var expression = parse (toParse , typeArray , context .getParserState (), logger );
123- if (expression .isPresent ()) {
124- context .addExpression (expression .get ());
125- return index + toParse .length ();
140+ if (splits .isEmpty ()) {
141+ return -1 ;
142+ }
143+ // Try parsing progressively larger prefixes
144+ for (var splitCount = 1 ; splitCount < splits .size (); splitCount ++) {
145+ var endIndex = index ;
146+ for (var j = 0 ; j < splitCount ; j ++) {
147+ var splitIndex = s .indexOf (splits .get (j ), endIndex );
148+ if (splitIndex == -1 ) {
149+ break ;
150+ }
151+ endIndex = splitIndex + splits .get (j ).length ();
152+ }
153+ while (endIndex < s .length () && Character .isWhitespace (s .charAt (endIndex ))) {
154+ endIndex ++;
155+ }
156+ if (endIndex > index ) {
157+ var toParse = s .substring (index , endIndex ).strip ();
158+ if (!toParse .isEmpty ()) {
159+ var expression = parse (toParse , typeArray , context .getParserState (), logger );
160+ if (expression .isPresent ()) {
161+ context .addExpression (expression .get ());
162+ return endIndex ;
163+ }
164+ }
165+ }
166+ }
167+ return -1 ;
168+ } else if (text .isEmpty () || text .isBlank ()) {
169+ var rest = s .substring (index );
170+ var splits = splitAtSpaces (rest );
171+ if (splits .isEmpty ()) {
172+ return -1 ;
173+ }
174+ // Try parsing progressively larger prefixes (first 1 token, then first 2 tokens, etc.)
175+ for (var splitCount = 1 ; splitCount < splits .size (); splitCount ++) {
176+ var endIndex = index ;
177+ for (var j = 0 ; j < splitCount ; j ++) {
178+ var splitIndex = s .indexOf (splits .get (j ), endIndex );
179+ if (splitIndex == -1 ) {
180+ break ;
181+ }
182+ endIndex = splitIndex + splits .get (j ).length ();
183+ }
184+ // Find the start of the next token (skip whitespace)
185+ while (endIndex < s .length () && Character .isWhitespace (s .charAt (endIndex ))) {
186+ endIndex ++;
187+ }
188+ if (endIndex > index ) {
189+ var toParse = s .substring (index , endIndex ).strip ();
190+ if (!toParse .isEmpty ()) {
191+ var expression = parse (toParse , typeArray , context .getParserState (), logger );
192+ if (expression .isPresent ()) {
193+ context .addExpression (expression .get ());
194+ return endIndex ;
195+ }
126196 }
127197 }
128198 }
@@ -137,11 +207,14 @@ public int match(String s, int index, MatchContext context) {
137207 for (var split : splits ) {
138208 var i = StringUtils .indexOfIgnoreCase (s , split , index );
139209 if (i != -1 ) {
140- var toParse = s .substring (index , i );
210+ var toParse = s .substring (index , i ).strip ();
211+ if (toParse .isEmpty ()) {
212+ continue ;
213+ }
141214 var expression = parse (toParse , typeArray , context .getParserState (), logger );
142215 if (expression .isPresent ()) {
143216 context .addExpression (expression .get ());
144- return index + toParse . length () ;
217+ return i ;
145218 }
146219 }
147220 }
@@ -152,6 +225,36 @@ public int match(String s, int index, MatchContext context) {
152225 return -1 ;
153226 }
154227
228+ /**
229+ * Finds the index of text in a string, respecting word boundaries for keywords like "or", "and", "nor".
230+ * @param s the string to search in
231+ * @param text the text to find
232+ * @param start the starting index
233+ * @return the index where text was found, or -1 if not found
234+ */
235+ private int findTextWithBoundary (String s , String text , int start ) {
236+ if (text .isEmpty ()) {
237+ return -1 ;
238+ }
239+ // Check if this is a keyword that needs word boundary checking
240+ var lowerText = text .toLowerCase ();
241+ var needsBoundaryCheck = lowerText .equals ("or" ) || lowerText .equals ("and" ) || lowerText .equals ("nor" );
242+
243+ var i = StringUtils .indexOfIgnoreCase (s , text , start );
244+ while (i != -1 && needsBoundaryCheck ) {
245+ // Check word boundaries
246+ var beforeIsWordChar = i > 0 && Character .isLetterOrDigit (s .charAt (i - 1 ));
247+ var afterIsWordChar = (i + text .length () < s .length ()) && Character .isLetterOrDigit (s .charAt (i + text .length ()));
248+
249+ if (!beforeIsWordChar && !afterIsWordChar ) {
250+ return i ; // Valid word boundary match
251+ }
252+ // Try next occurrence
253+ i = StringUtils .indexOfIgnoreCase (s , text , i + 1 );
254+ }
255+ return i ;
256+ }
257+
155258 private List <String > splitAtSpaces (String s ) {
156259 List <String > split = new ArrayList <>();
157260 var sb = new StringBuilder ();
0 commit comments