let unicode characters through the pure ruby lexer

Ben Miller · Ben Miller · commit 15adb9b12465 · 2022-10-20T17:24:50.000-04:00
ruby isn't going to mishandle these so we might as well allow them
diff --git a/lib/ios_parser/lexer.rb b/lib/ios_parser/lexer.rb
@@ -287,7 +287,8 @@ def word?
         ('A'..'Z').cover?(char) ||
         ['-', '+', '$', ':', '/', ',', '(', ')', '|', '*', '#', '=', '<', '>',
          '!', '"', '&', '@', ';', '%', '~', '{', '}', "'", '?', '[', ']', '_',
-         '^', '\\', '`'].include?(char)
+         '^', '\\', '`'].include?(char) ||
+        /[[:graph:]]/.match(char)
     end
 
     def space
diff --git a/spec/lib/ios_parser/pure_spec.rb b/spec/lib/ios_parser/pure_spec.rb
@@ -0,0 +1,16 @@
+require_relative '../../spec_helper'
+require 'ios_parser'
+require 'ios_parser/lexer'
+
+module IOSParser
+  describe PureLexer do
+    describe '#call' do
+      it 'accepts non-whitespace printable characters as words' do
+        input = "before emdash – after emdash"
+        tokens = PureLexer.new.call(input)
+        expect(tokens.map(&:value)).to eq %w[before emdash – after emdash]
+        expect(tokens.map(&:col)).to eq [1, 8, 15, 17, 23]
+      end
+    end
+  end
+end