From b03334f1c2b24da3c60e65d896c6e12a592c7c2e Mon Sep 17 00:00:00 2001 From: Aditya Shah Date: Wed, 20 Aug 2025 21:39:26 +0530 Subject: [PATCH] fix: improve phone regex to support international formats - Add support for international phone numbers with country codes - Maintains backward compatibility with US phone formats - Fixes parsing confidence issues for non-US numbers Fixes #146 --- .../extract-profile.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/app/lib/parse-resume-from-pdf/extract-resume-from-sections/extract-profile.ts b/src/app/lib/parse-resume-from-pdf/extract-resume-from-sections/extract-profile.ts index 8f558f2d..51b4f105 100644 --- a/src/app/lib/parse-resume-from-pdf/extract-resume-from-sections/extract-profile.ts +++ b/src/app/lib/parse-resume-from-pdf/extract-resume-from-sections/extract-profile.ts @@ -23,9 +23,20 @@ export const matchEmail = (item: TextItem) => item.text.match(/\S+@\S+\.\S+/); const hasAt = (item: TextItem) => item.text.includes("@"); // Phone -// Simple phone regex that matches (xxx)-xxx-xxxx where () and - are optional, - can also be space -export const matchPhone = (item: TextItem) => - item.text.match(/\(?\d{3}\)?[\s-]?\d{3}[\s-]?\d{4}/); +export const matchPhone = (item: TextItem) => { + // First try: Full international format with country code + const withCountryCode = item.text.match(/^\+\d{1,3}[\s-]?\d{4,14}$/); + if (withCountryCode) return withCountryCode; + + // Second try: General international allowing flexible spacing + const international = item.text.match(/\+\d{1,3}[\s-]?\d+[\s-]?\d+/); + if (international) return international; + + // Fall back to original US format + return item.text.match(/\(?\d{3}\)?[\s-]?\d{3}[\s-]?\d{4}/); +}; + + const hasParenthesis = (item: TextItem) => /\([0-9]+\)/.test(item.text); // Location