diff --git a/lib/monetize.rb b/lib/monetize.rb index dfedaba7d..24f4831d5 100644 --- a/lib/monetize.rb +++ b/lib/monetize.rb @@ -5,11 +5,14 @@ require 'monetize/errors' require 'monetize/version' require 'monetize/parser' +require 'monetize/strict_parser' require 'monetize/collection' module Monetize # Class methods class << self + attr_writer :parser_class + # @attr_accessor [true, false] assume_from_symbol Use this to enable the # ability to assume the currency from a passed symbol attr_accessor :assume_from_symbol @@ -20,12 +23,15 @@ class << self # to true to enforce the delimiters set in the currency all the time. attr_accessor :enforce_currency_delimiters - # Where this set to true, the behavior for parsing thousands separators is changed to # expect that eg. €10.000 is EUR 10 000 and not EUR 10.000 - it's incredibly rare when parsing # human text that we're dealing with fractions of cents. attr_accessor :expect_whole_subunits + def parser_class + @parser_class || Monetize::Parser + end + def parse(input, currency = Money.default_currency, options = {}) parse! input, currency, options rescue Error @@ -36,7 +42,7 @@ def parse!(input, currency = Money.default_currency, options = {}) return input if input.is_a?(Money) return from_numeric(input, currency) if input.is_a?(Numeric) - parser = Monetize::Parser.new(input, currency, options) + parser = parser_class.new(input, currency, options) amount, currency = parser.parse Money.from_amount(amount, currency) @@ -79,3 +85,5 @@ def extract_cents(input, currency = Money.default_currency) end end end + +Monetize.parser_class = Monetize::StrictParser diff --git a/lib/monetize/strict_parser.rb b/lib/monetize/strict_parser.rb new file mode 100644 index 000000000..81bbe81db --- /dev/null +++ b/lib/monetize/strict_parser.rb @@ -0,0 +1,118 @@ +require 'monetize/parser' +require 'monetize/tokenizer' + +module Monetize + class StrictParser + # TODO: perform exhaustive match + # TODO: error subclasses with detailed explanation + # TODO: check if decimal mark is a thousands separator (1,000 USD) + # TODO: switch to using allowed format as strings for added flexibility + + def initialize(input, fallback_currency = Money.default_currency, options = {}) + @input = input.to_s + @options = options + @fallback_currency = fallback_currency + end + + def parse + result = Tokenizer.new(input, options).process + + unless ALLOWED_FORMATS.include?(result.map(&:first)) + raise ParseError, "invalid input - #{result.map(&:first)}" + end + + amount = result.find { |token| token.type == :amount } + sign = result.find { |token| token.type == :sign } + symbol = result.find { |token| token.type == :symbol } + currency_iso = result.find { |token| token.type == :currency_iso } + + currency = + if currency_iso + parse_currency_iso(currency_iso.match.to_s) + elsif symbol && assume_from_symbol? + parse_symbol(symbol.match.to_s) + else + fallback_currency + end + + amount = parse_amount(currency, amount.match, sign&.match) + + [amount, currency] + end + + private + + ALLOWED_FORMATS = [ + [:amount], # 9.99 + [:sign, :amount], # -9.99 + [:symbol, :amount], # £9.99 + [:sign, :symbol, :amount], # -£9.99 + [:symbol, :sign, :amount], # £-9.99 + [:symbol, :amount, :sign], # £9.99- + [:amount, :symbol], # 9.99£ + [:sign, :amount, :symbol], # -9.99£ + [:currency_iso, :amount], # GBP 9.99 + [:currency_iso, :sign, :amount], # GBP -9.99 + [:amount, :currency_iso], # 9.99 GBP + [:sign, :amount, :currency_iso], # -9.99 GBP + [:symbol, :amount, :currency_iso], # £9.99 GBP + [:sign, :symbol, :amount, :currency_iso], # -£9.99 GBP + ].freeze + + attr_reader :input, :fallback_currency, :options + + def parse_amount(currency, amount, sign) + multiplier = amount[:multiplier] + amount = amount[:amount].gsub(' ', '') + + used_delimiters = amount.scan(/[^\d]/).uniq + + num = + case used_delimiters.length + when 0 + amount.to_f + when 1 + decimal_mark = used_delimiters.first + amount = amount.gsub(decimal_mark, '.') + + amount.to_f + when 2 + thousands_separator, decimal_mark = used_delimiters + amount = amount.gsub(thousands_separator, '') + amount = amount.gsub(decimal_mark, '.') + + amount.to_f + else + raise ParseError, 'invalid amount of delimiters used' + end + + num = apply_multiplier(num, multiplier) + num = apply_sign(num, sign.to_s) + + num + end + + def parse_symbol(symbol) + Money::Currency.wrap(Monetize::Parser::CURRENCY_SYMBOLS[symbol]) + end + + def parse_currency_iso(currency_iso) + Money::Currency.wrap(currency_iso) + end + + def assume_from_symbol? + options.fetch(:assume_from_symbol) { Monetize.assume_from_symbol } + end + + def apply_multiplier(num, multiplier) + return num unless multiplier + + exponent = Monetize::Parser::MULTIPLIER_SUFFIXES[multiplier.to_s.upcase] + num * 10**exponent + end + + def apply_sign(num, sign) + sign == '-' ? num * -1 : num + end + end +end diff --git a/lib/monetize/tokenizer.rb b/lib/monetize/tokenizer.rb new file mode 100644 index 000000000..807ab9baa --- /dev/null +++ b/lib/monetize/tokenizer.rb @@ -0,0 +1,71 @@ +require 'monetize/parser' + +module Monetize + class Tokenizer + SYMBOLS = Monetize::Parser::CURRENCY_SYMBOLS.keys.map { |symbol| Regexp.escape(symbol) }.freeze + THOUSAND_SEPARATORS = /[\.\ ,]/.freeze + DECIMAL_MARKS = /[\.,]/.freeze + MULTIPLIERS = Monetize::Parser::MULTIPLIER_SUFFIXES.keys.join('|').freeze + + SYMBOL_REGEXP = Regexp.new(SYMBOLS.join('|')).freeze + CURRENCY_ISO_REGEXP = /(? # amount group + \d+ # starts with at least one digit + (?:#{THOUSAND_SEPARATORS}\d{3})* # separated into groups of 3 digits by a thousands separator + (?!\d) # not followed by a digit + (?:#{DECIMAL_MARKS}\d+)? # has decimal mark followed by decimal part + ) + (?#{MULTIPLIERS})? # optional multiplier + }ix.freeze + + class Token < Struct.new(:type, :match); end + + def initialize(input, options = {}) + @original_input = input + @options = options + end + + def process + # matches are removed from the input string to avoid overlapping matches + input = original_input.dup + result = [] + + result += match(input, :currency_iso, CURRENCY_ISO_REGEXP) + result += match(input, :symbol, SYMBOL_REGEXP) + result += match(input, :sign, SIGN_REGEXP) + result += match(input, :amount, AMOUNT_REGEXP) + + result.sort_by { |token| token.match.offset(0).first } + end + + private + + attr_reader :original_input, :options + + def match(input, type, regexp) + tokens = [] + input.scan(regexp) { tokens << Token.new(type, Regexp.last_match) } + + # Replace the matches from the input with § to avoid over-matching + tokens.each do |token| + offset = token.match.offset(0) + input[offset.first..(offset.last - 1)] = '§' * token.match.to_s.length + end + + tokens + end + + def preview(result) + preview_input = original_input.dup + result.reverse.each do |token| + offset = token.match.offset(0) + preview_input.slice!(offset.first, token.match.to_s.length) + preview_input.insert(offset.first, "<#{token.type}>") + end + + puts preview_input + end + end +end