Skip to content

Commit

Permalink
chore: better QuoteReader NaiveText
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolasleger committed Nov 25, 2024
1 parent 0d615b6 commit 3f53f22
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 23 deletions.
2 changes: 2 additions & 0 deletions lib/quote_reader/global.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ module QuoteReader
class Global
attr_reader :filepath

VERSION = "0.0.1"

def initialize(filepath)
@filepath = filepath
end
Expand Down
57 changes: 37 additions & 20 deletions lib/quote_reader/naive_text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,56 +33,73 @@ def read_attributes
# rubocop:enable Metrics/MethodLength
# rubocop:enable Metrics/AbcSize

def self.find_adresse_pro(text)
text[/Adresse Pro\s*:\s*(\w+)/i, 1]
NUMBER_REFERENCE_REGEX = /n?[.°]/i
BETWEEN_LABEL_VALUE_REGEX = /\s+(?:#{NUMBER_REFERENCE_REGEX})?\s*(?::\s*)?/i
FRENCH_CHARACTER_REGEX = /[\wÀ-ÖØ-öø-ÿ]/i
PHONE_REGEX = /(?:\(?\+?33\)?)? ?(?:[\s.]*\d\d){5}/i # TODO: find better

def self.find_adresse(text)
text[/Adresse\s*:\s*(#{FRENCH_CHARACTER_REGEX}+)/i, 1]
end

def self.find_raison_sociale(text)
text[/Raison sociale\s*:\s*(\w+)/i, 1]
def self.find_adresse_chantier(text)
text[/Adresse chantier\s*:\s*(#{FRENCH_CHARACTER_REGEX}+)/i, 1]
end

def self.find_forme_juridique(text)
text[/Forme juridique\s*:\s*(\w+)/i, 1]
def self.find_adresse_pro(text)
text[/Adresse Pro\s*:\s*(#{FRENCH_CHARACTER_REGEX}+)/i, 1]
end

def self.find_numero_tva(text)
text[/TVA\s*:\s*(\w+)/i, 1]
def self.find_assurance(text)
text[/Assurance(?:\s+décennale)?#{BETWEEN_LABEL_VALUE_REGEX}((?:[#{FRENCH_CHARACTER_REGEX}:]+\s+)+(?:#{NUMBER_REFERENCE_REGEX}\s*)?(?:contrat\s+#{FRENCH_CHARACTER_REGEX}*\s*\d+)?)/i, 1] # rubocop:disable Layout/LineLength
end

def self.find_capital(text)
text[/Capital\s*:\s*(\w+)/i, 1]
text[/(?:Capitale?|capilâide)(?:\s+de)?#{BETWEEN_LABEL_VALUE_REGEX}(\d+(?: \d{3})*)\s*€/i, 1]
end

def self.find_forme_juridique(text)
text[/Forme juridique\s*:\s*(SAS|SARL|EURL|#{FRENCH_CHARACTER_REGEX}+) ?/i, 1]
end

def self.find_iban(text)
text[/(?:IBAN|RIB)#{BETWEEN_LABEL_VALUE_REGEX}(FR\d{2}\s?(?:\d{4}\s?){2,5}#{FRENCH_CHARACTER_REGEX}?\d{2})/i, 1]
end

def self.find_mention_devis(text)
text[/devis/i] if text
end

def self.find_adresse(text)
text[/Adresse\s*:\s*(\w+)/i, 1]
def self.find_nom(text)
text[/Nom\s*:\s*(#{FRENCH_CHARACTER_REGEX}+)/i, 1]
end

def self.find_adresse_chantier(text)
text[/Adresse chantier\s*:\s*(\w+)/i, 1]
def self.find_numero_devis(text)
text[/DEVIS\s+N.?\s*(#{FRENCH_CHARACTER_REGEX}*\d{4,})/i, 1]
end

def self.find_nom(text)
text[/Nom\s*:\s*(\w+)/i, 1]
def self.find_numero_tva(text)
text[/TVA(?:\s+intracommunautaire)?#{BETWEEN_LABEL_VALUE_REGEX}(FR\d{2}\s?\d{9})/i, 1]
end

def self.find_prenom(text)
text[/Prénom\s*:\s*(\w+)/i, 1]
text[/Prénom\s*:\s*(#{FRENCH_CHARACTER_REGEX}+)/i, 1]
end

def self.find_numero_devis(text)
text[/DEVIS\s+N°\s*(\d+)/i, 1]
def self.find_raison_sociale(text)
text[/Raison sociale\s*:\s*(#{FRENCH_CHARACTER_REGEX}+)/i, 1]
end

def self.find_rge_number(text)
text[/RGE\s+N°\s*(\d+)/i, 1]
text[/RGE#{BETWEEN_LABEL_VALUE_REGEX}((?:E-)?E?\d+)/i, 1]
end

def self.find_siret(text)
text[/SIRET\s*:\s*(\d{3}\s*\d{3}\s*\d{3}\s*\d{5})/i, 1]&.gsub(/\s/, "")
text[/SIRET#{BETWEEN_LABEL_VALUE_REGEX}(\d{3}\s*\d{3}\s*\d{3}\s*\d{5})/i, 1]
end

def self.find_telephone(text)
text[/(?:T[eé]l\.?(?:[eé]phone)#{BETWEEN_LABEL_VALUE_REGEX})?(#{PHONE_REGEX})/i, 1]
end
end
end
2 changes: 2 additions & 0 deletions lib/quote_validator/global.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
module QuoteValidator
# Validator for the Quote
class Global < Base
VERSION = "0.0.1"

def validate!
@errors = []
@warnings = []
Expand Down
115 changes: 112 additions & 3 deletions spec/lib/quote_reader/naive_text_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
Adresse Pro : 42 rue
Raison Sociale : ACME
Forme Juridique : SAS
TVA : 123456
Capital : 1000
TVA : FR12345678911
Capital : 1000
Siret : 123456789
RGE Number : 123456
TEXT
Expand All @@ -55,7 +55,7 @@
adresse: "42",
raison_sociale: "ACME",
forme_juridique: "SAS",
numero_tva: "123456",
numero_tva: "FR12345678911",
capital: "1000",
rge_number: nil,
siret: nil
Expand All @@ -65,4 +65,113 @@
# rubocop:enable RSpec/ExampleLength
end
end

# rubocop:disable RSpec/MultipleExpectations

describe ".find_adresse" do
it "returns the adresse" do
skip "Add test for find_adresse"
end
end

describe ".find_adresse_chantier" do
it "returns the adresse_chantier" do
skip "Add test for find_adresse_chantier"
end
end

describe ".find_adresse_pro" do
it "returns the adresse_pro" do
skip "Add test for find_adresse_pro"
end
end

describe ".find_assurance" do
it "returns the assurance" do
expect(
described_class.find_assurance("Assurance décennale : ToutRix couverture France n° contrat 0000010001234567")
).to eq("ToutRix couverture France n° contrat 0000010001234567")
end
end

describe ".find_capital" do
it "returns the capital" do
expect(described_class.find_capital("capital de 12345 €")).to eq("12345")
expect(described_class.find_capital("capilâide 12 345€")).to eq("12 345")
end
end

describe ".find_iban" do
it "returns the iban" do
expect(
described_class.find_iban("IBAN : FR74 3000 1234 9000 0000 1234 P77")
).to eq("FR74 3000 1234 9000 0000 1234 P77")
end
end

describe ".find_forme_juridique" do
it "returns the forme_juridique" do
skip "Add test for find_forme_juridique"
end
end

describe ".find_mention_devis" do
it "returns the mention_devis" do
skip "Add test for find_mention_devis"
end
end

describe ".find_nom" do
it "returns the nom" do
skip "Add test for find_nom"
end
end

describe ".find_numero_devis" do
it "returns the numero_devis" do
expect(described_class.find_numero_devis("Devis N\" ORG201234")).to eq("ORG201234")
expect(described_class.find_numero_devis("Devis n° DC001234")).to eq("DC001234")
end
end

describe ".find_numero_tva" do
it "returns the numero_tva" do
expect(described_class.find_numero_tva("TVA :FR10831861234")).to eq("FR10831861234")
expect(described_class.find_numero_tva("TVA intracommunautaire : FR86504321234")).to eq("FR86504321234")
end
end

describe ".find_prenom" do
it "returns the prenom" do
skip "Add test for find_prenom"
end
end

describe ".find_raison_sociale" do
it "returns the raison_sociale" do
skip "Add test for find_raison_sociale"
end
end

describe ".find_rge_number" do
it "returns the rge_number" do
expect(described_class.find_rge_number("RGE n. :E123456")).to eq("E123456")
expect(described_class.find_rge_number("RGE n°E-E123456")).to eq("E-E123456")
end
end

describe ".find_siret" do
it "returns the siret" do
expect(described_class.find_siret("Siret : 12345678900000")).to eq("12345678900000")
expect(described_class.find_siret("Siret : 123 456 789 00000")).to eq("123 456 789 00000")
end
end

describe ".find_telephone" do
it "returns the telephone" do
expect(described_class.find_telephone(" (33) 01 43 39 37 81")).to eq("(33) 01 43 39 37 81")
end
end

# rubocop:enable RSpec/MultipleExpectations
end

0 comments on commit 3f53f22

Please sign in to comment.