From 184b337175c29df2802cc29ca0176f8990816c6f Mon Sep 17 00:00:00 2001
From: Rabi Panda <adnapibar@gmail.com>
Date: Tue, 8 Nov 2022 01:49:06 +0000
Subject: [PATCH] [Backport] Fix error handling while reading analyzer mapping
 rules

Add new parseWordList method that takes a parser as a parameter. It reads custom rules from settings or a file, parses and handles errors. Make error messages less verbose for rules files outside config directory.

Signed-off-by: Rabi Panda <adnapibar@gmail.com>
---
 ...enationCompoundWordTokenFilterFactory.java |   8 +-
 .../common/MappingCharFilterFactory.java      |  24 ++--
 .../StemmerOverrideTokenFilterFactory.java    |  46 +++----
 .../common/SynonymTokenFilterFactory.java     |   6 +-
 .../WordDelimiterGraphTokenFilterFactory.java |   8 +-
 .../WordDelimiterTokenFilterFactory.java      |  30 +++--
 ...rdDelimiterTokenFilterFactoryTestCase.java |  20 ++++
 .../common/MappingCharFilterFactoryTests.java |  70 +++++++++++
 ...temmerOverrideTokenFilterFactoryTests.java |  13 +-
 .../common/SynonymsAnalysisTests.java         |   4 +-
 .../IcuCollationTokenFilterFactory.java       |   8 +-
 .../KuromojiPartOfSpeechFilterFactory.java    |   2 +-
 .../analysis/KuromojiTokenizerFactory.java    |  38 +++---
 .../index/analysis/KuromojiAnalysisTests.java |   4 +-
 .../index/analysis/NoriAnalyzerProvider.java  |   2 +-
 .../NoriPartOfSpeechStopFilterFactory.java    |   2 +-
 .../index/analysis/NoriTokenizerFactory.java  |   8 +-
 .../opensearch.release-notes-2.4.0.md         |   2 +-
 .../opensearch/index/analysis/Analysis.java   | 113 ++++++++++++------
 .../analysis/CustomMappingRuleParser.java     |  21 ++++
 .../index/analysis/MappingRule.java           |  30 +++++
 .../indices/analysis/HunspellService.java     |   3 +-
 .../index/analysis/AnalysisTests.java         |  63 ++++++----
 .../indices/analysis/AnalysisModuleTests.java |  11 +-
 .../indices/analyze/HunspellServiceTests.java |  12 +-
 25 files changed, 393 insertions(+), 155 deletions(-)
 create mode 100644 modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java
 create mode 100644 server/src/main/java/org/opensearch/index/analysis/CustomMappingRuleParser.java
 create mode 100644 server/src/main/java/org/opensearch/index/analysis/MappingRule.java

diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java
index 875c5261f8387..25bf58409928e 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java
@@ -32,12 +32,14 @@
 
 package org.opensearch.analysis.common;
 
+import org.apache.logging.log4j.LogManager;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.env.Environment;
 import org.opensearch.index.IndexSettings;
+import org.opensearch.index.analysis.Analysis;
 import org.xml.sax.InputSource;
 
 import java.io.InputStream;
@@ -61,13 +63,15 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
             throw new IllegalArgumentException("hyphenation_patterns_path is a required setting.");
         }
 
-        Path hyphenationPatternsFile = env.configFile().resolve(hyphenationPatternsPath);
+        Path hyphenationPatternsFile = Analysis.resolveAnalyzerPath(env, hyphenationPatternsPath);
 
         try {
             InputStream in = Files.newInputStream(hyphenationPatternsFile);
             hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(in));
         } catch (Exception e) {
-            throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e);
+            LogManager.getLogger(HyphenationCompoundWordTokenFilterFactory.class)
+                .error("Exception while reading hyphenation_patterns_path ", e);
+            throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.");
         }
     }
 
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java
index 7200b69135a30..d6d9f8975f2fc 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java
@@ -39,6 +39,7 @@
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.analysis.AbstractCharFilterFactory;
 import org.opensearch.index.analysis.Analysis;
+import org.opensearch.index.analysis.MappingRule;
 import org.opensearch.index.analysis.NormalizingCharFilterFactory;
 
 import java.io.Reader;
@@ -53,13 +54,13 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen
     MappingCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name);
 
-        List<String> rules = Analysis.getWordList(env, settings, "mappings");
+        List<MappingRule<String, String>> rules = Analysis.parseWordList(env, settings, "mappings", this::parse);
         if (rules == null) {
             throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
         }
 
         NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
-        parseRules(rules, normMapBuilder);
+        rules.forEach(rule -> normMapBuilder.add(rule.getLeft(), rule.getRight()));
         normMap = normMapBuilder.build();
     }
 
@@ -71,18 +72,13 @@ public Reader create(Reader tokenStream) {
     // source => target
     private static Pattern rulePattern = Pattern.compile("(.*)\\s*=>\\s*(.*)\\s*$");
 
-    /**
-     * parses a list of MappingCharFilter style rules into a normalize char map
-     */
-    private void parseRules(List<String> rules, NormalizeCharMap.Builder map) {
-        for (String rule : rules) {
-            Matcher m = rulePattern.matcher(rule);
-            if (!m.find()) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
-            String lhs = parseString(m.group(1).trim());
-            String rhs = parseString(m.group(2).trim());
-            if (lhs == null || rhs == null) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal mapping.");
-            map.add(lhs, rhs);
-        }
+    private MappingRule<String, String> parse(String rule) {
+        Matcher m = rulePattern.matcher(rule);
+        if (!m.find()) throw new RuntimeException("Invalid mapping rule : [" + rule + "]");
+        String lhs = parseString(m.group(1).trim());
+        String rhs = parseString(m.group(2).trim());
+        if (lhs == null || rhs == null) throw new RuntimeException("Invalid mapping rule: [" + rule + "]. Illegal mapping.");
+        return new MappingRule<>(lhs, rhs);
     }
 
     char[] out = new char[256];
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactory.java
index 89f0766542296..bdd6e01261443 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactory.java
@@ -40,24 +40,31 @@
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.analysis.AbstractTokenFilterFactory;
 import org.opensearch.index.analysis.Analysis;
+import org.opensearch.index.analysis.MappingRule;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 
 public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory {
 
+    private static final String MAPPING_SEPARATOR = "=>";
     private final StemmerOverrideMap overrideMap;
 
     StemmerOverrideTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException {
         super(indexSettings, name, settings);
 
-        List<String> rules = Analysis.getWordList(env, settings, "rules");
+        List<MappingRule<List<String>, String>> rules = Analysis.parseWordList(env, settings, "rules", this::parse);
         if (rules == null) {
             throw new IllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured");
         }
 
         StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
-        parseRules(rules, builder, "=>");
+        for (MappingRule<List<String>, String> rule : rules) {
+            for (String key : rule.getLeft()) {
+                builder.add(key, rule.getRight());
+            }
+        }
         overrideMap = builder.build();
 
     }
@@ -67,27 +74,26 @@ public TokenStream create(TokenStream tokenStream) {
         return new StemmerOverrideFilter(tokenStream, overrideMap);
     }
 
-    static void parseRules(List<String> rules, StemmerOverrideFilter.Builder builder, String mappingSep) {
-        for (String rule : rules) {
-            String[] sides = rule.split(mappingSep, -1);
-            if (sides.length != 2) {
-                throw new RuntimeException("Invalid Keyword override Rule:" + rule);
-            }
+    private MappingRule<List<String>, String> parse(String rule) {
+        String[] sides = rule.split(MAPPING_SEPARATOR, -1);
+        if (sides.length != 2) {
+            throw new RuntimeException("Invalid keyword override rule: " + rule);
+        }
 
-            String[] keys = sides[0].split(",", -1);
-            String override = sides[1].trim();
-            if (override.isEmpty() || override.indexOf(',') != -1) {
-                throw new RuntimeException("Invalid Keyword override Rule:" + rule);
-            }
+        String[] keys = sides[0].split(",", -1);
+        String override = sides[1].trim();
+        if (override.isEmpty() || override.indexOf(',') != -1) {
+            throw new RuntimeException("Invalid keyword override rule: " + rule);
+        }
 
-            for (String key : keys) {
-                String trimmedKey = key.trim();
-                if (trimmedKey.isEmpty()) {
-                    throw new RuntimeException("Invalid Keyword override Rule:" + rule);
-                }
-                builder.add(trimmedKey, override);
+        List<String> trimmedKeys = new ArrayList<>();
+        for (String key : keys) {
+            String trimmedKey = key.trim();
+            if (trimmedKey.isEmpty()) {
+                throw new RuntimeException("Invalid keyword override rule: " + rule);
             }
+            trimmedKeys.add(trimmedKey);
         }
+        return new MappingRule<>(trimmedKeys, override);
     }
-
 }
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
index dc6b5b2dd8b7b..01a65e87d7466 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
@@ -32,6 +32,7 @@
 
 package org.opensearch.analysis.common;
 
+import org.apache.logging.log4j.LogManager;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.synonym.SynonymFilter;
@@ -155,14 +156,15 @@ SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {
             }
             return parser.build();
         } catch (Exception e) {
-            throw new IllegalArgumentException("failed to build synonyms", e);
+            LogManager.getLogger(SynonymTokenFilterFactory.class).error("Failed to build synonyms: ", e);
+            throw new IllegalArgumentException("Failed to build synonyms");
         }
     }
 
     Reader getRulesFromSettings(Environment env) {
         Reader rulesReader;
         if (settings.getAsList("synonyms", null) != null) {
-            List<String> rulesList = Analysis.getWordList(env, settings, "synonyms");
+            List<String> rulesList = Analysis.parseWordList(env, settings, "synonyms", s -> s);
             StringBuilder sb = new StringBuilder();
             for (String line : rulesList) {
                 sb.append(line).append(System.lineSeparator());
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterGraphTokenFilterFactory.java
index 31d52d030cb71..51ac3141fd465 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterGraphTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterGraphTokenFilterFactory.java
@@ -41,6 +41,7 @@
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.analysis.AbstractTokenFilterFactory;
 import org.opensearch.index.analysis.Analysis;
+import org.opensearch.index.analysis.MappingRule;
 import org.opensearch.index.analysis.TokenFilterFactory;
 
 import java.util.List;
@@ -73,7 +74,12 @@ public WordDelimiterGraphTokenFilterFactory(IndexSettings indexSettings, Environ
         // . => DIGIT
         // \u002C => DIGIT
         // \u200D => ALPHANUM
-        List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
+        List<MappingRule<Character, Byte>> charTypeTableValues = Analysis.parseWordList(
+            env,
+            settings,
+            "type_table",
+            WordDelimiterTokenFilterFactory::parse
+        );
         if (charTypeTableValues == null) {
             this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
         } else {
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterTokenFilterFactory.java
index d40acfa05dd21..96e50206fb53d 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/WordDelimiterTokenFilterFactory.java
@@ -41,6 +41,7 @@
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.analysis.AbstractTokenFilterFactory;
 import org.opensearch.index.analysis.Analysis;
+import org.opensearch.index.analysis.MappingRule;
 import org.opensearch.index.analysis.TokenFilterFactory;
 
 import java.util.Collection;
@@ -76,7 +77,12 @@ public WordDelimiterTokenFilterFactory(IndexSettings indexSettings, Environment
         // . => DIGIT
         // \u002C => DIGIT
         // \u200D => ALPHANUM
-        List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
+        List<MappingRule<Character, Byte>> charTypeTableValues = Analysis.parseWordList(
+            env,
+            settings,
+            "type_table",
+            WordDelimiterTokenFilterFactory::parse
+        );
         if (charTypeTableValues == null) {
             this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
         } else {
@@ -127,19 +133,23 @@ public int getFlag(int flag, Settings settings, String key, boolean defaultValue
     // source => type
     private static Pattern typePattern = Pattern.compile("(.*)\\s*=>\\s*(.*)\\s*$");
 
+    static MappingRule<Character, Byte> parse(String rule) {
+        Matcher m = typePattern.matcher(rule);
+        if (!m.find()) throw new RuntimeException("Invalid mapping rule: [" + rule + "]");
+        String lhs = parseString(m.group(1).trim());
+        Byte rhs = parseType(m.group(2).trim());
+        if (lhs.length() != 1) throw new RuntimeException("Invalid mapping rule: [" + rule + "]. Only a single character is allowed.");
+        if (rhs == null) throw new RuntimeException("Invalid mapping rule: [" + rule + "]. Illegal type.");
+        return new MappingRule<>(lhs.charAt(0), rhs);
+    }
+
     /**
      * parses a list of MappingCharFilter style rules into a custom byte[] type table
      */
-    static byte[] parseTypes(Collection<String> rules) {
+    static byte[] parseTypes(Collection<MappingRule<Character, Byte>> rules) {
         SortedMap<Character, Byte> typeMap = new TreeMap<>();
-        for (String rule : rules) {
-            Matcher m = typePattern.matcher(rule);
-            if (!m.find()) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
-            String lhs = parseString(m.group(1).trim());
-            Byte rhs = parseType(m.group(2).trim());
-            if (lhs.length() != 1) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
-            if (rhs == null) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal type.");
-            typeMap.put(lhs.charAt(0), rhs);
+        for (MappingRule<Character, Byte> rule : rules) {
+            typeMap.put(rule.getLeft(), rule.getRight());
         }
 
         // ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java
index 9d54776755766..2c3864a36fd22 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java
@@ -195,4 +195,24 @@ public void testStemEnglishPossessive() throws IOException {
         tokenizer.setReader(new StringReader(source));
         assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
     }
+
+    private void createTokenFilterFactoryWithTypeTable(String[] rules) throws IOException {
+        OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
+            Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .putList("index.analysis.filter.my_word_delimiter.type_table", rules)
+                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
+                .build(),
+            new CommonAnalysisPlugin()
+        );
+        analysis.tokenFilter.get("my_word_delimiter");
+    }
+
+    public void testTypeTableParsingError() {
+        String[] rules = { "# This is a comment", "$ => DIGIT", "\\u200D => ALPHANUM", "abc => ALPHA" };
+        RuntimeException ex = expectThrows(RuntimeException.class, () -> createTokenFilterFactoryWithTypeTable(rules));
+        assertEquals("Line [4]: Invalid mapping rule: [abc => ALPHA]. Only a single character is allowed.", ex.getMessage());
+    }
 }
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java
new file mode 100644
index 0000000000000..bdc452f8863d4
--- /dev/null
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java
@@ -0,0 +1,70 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.analysis.common;
+
+import org.apache.lucene.analysis.CharFilter;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.env.Environment;
+import org.opensearch.index.analysis.AnalysisTestsHelper;
+import org.opensearch.index.analysis.CharFilterFactory;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Arrays;
+
+public class MappingCharFilterFactoryTests extends OpenSearchTestCase {
+    public static CharFilterFactory create(String... rules) throws IOException {
+        OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
+            Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
+                .put("index.analysis.analyzer.my_analyzer.char_filter", "my_mappings_char_filter")
+                .put("index.analysis.char_filter.my_mappings_char_filter.type", "mapping")
+                .putList("index.analysis.char_filter.my_mappings_char_filter.mappings", rules)
+                .build(),
+            new CommonAnalysisPlugin()
+        );
+
+        return analysis.charFilter.get("my_mappings_char_filter");
+    }
+
+    public void testRulesOk() throws IOException {
+        MappingCharFilterFactory mappingCharFilterFactory = (MappingCharFilterFactory) create(
+            "# This is a comment",
+            ":) => _happy_",
+            ":( => _sad_"
+        );
+        CharFilter inputReader = (CharFilter) mappingCharFilterFactory.create(new StringReader("I'm so :)"));
+        char[] tempBuff = new char[14];
+        StringBuilder output = new StringBuilder();
+        while (true) {
+            int length = inputReader.read(tempBuff);
+            if (length == -1) break;
+            output.append(tempBuff, 0, length);
+        }
+        assertEquals("I'm so _happy_", output.toString());
+    }
+
+    public void testRuleError() {
+        for (String rule : Arrays.asList(
+            "",        // empty
+            "a",       // no arrow
+            "a:>b"     // invalid delimiter
+        )) {
+            RuntimeException ex = expectThrows(RuntimeException.class, () -> create(rule));
+            assertEquals("Line [1]: Invalid mapping rule : [" + rule + "]", ex.getMessage());
+        }
+    }
+
+    public void testRulePartError() {
+        RuntimeException ex = expectThrows(RuntimeException.class, () -> create("# This is a comment", ":) => _happy_", "a:b"));
+        assertEquals("Line [3]: Invalid mapping rule : [a:b]", ex.getMessage());
+    }
+}
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java
index 96e05efa97768..9e3345aa30dca 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java
@@ -46,7 +46,6 @@
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.Arrays;
-import java.util.Locale;
 
 public class StemmerOverrideTokenFilterFactoryTests extends OpenSearchTokenStreamTestCase {
     @Rule
@@ -76,11 +75,8 @@ public void testRuleError() {
             "=>a",     // no keys
             "a,=>b"    // empty key
         )) {
-            expectThrows(
-                RuntimeException.class,
-                String.format(Locale.ROOT, "Should fail for invalid rule: '%s'", rule),
-                () -> create(rule)
-            );
+            RuntimeException ex = expectThrows(RuntimeException.class, () -> create(rule));
+            assertEquals("Line [1]: Invalid keyword override rule: " + rule, ex.getMessage());
         }
     }
 
@@ -90,4 +86,9 @@ public void testRulesOk() throws IOException {
         tokenizer.setReader(new StringReader("a b c"));
         assertTokenStreamContents(tokenFilterFactory.create(tokenizer), new String[] { "1", "2", "2" });
     }
+
+    public void testRulePartError() {
+        RuntimeException ex = expectThrows(RuntimeException.class, () -> create("a => 1", "b,c => 2", "# This is a comment", "=>a=>b"));
+        assertEquals("Line [4]: Invalid keyword override rule: =>a=>b", ex.getMessage());
+    }
 }
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
index 8094e24b9adc8..03404a284020b 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
@@ -117,7 +117,7 @@ public void testSynonymWordDeleteByAnalyzer() throws IOException {
             fail("fail! due to synonym word deleted by analyzer");
         } catch (Exception e) {
             assertThat(e, instanceOf(IllegalArgumentException.class));
-            assertThat(e.getMessage(), startsWith("failed to build synonyms"));
+            assertThat(e.getMessage(), startsWith("Failed to build synonyms"));
         }
     }
 
@@ -138,7 +138,7 @@ public void testExpandSynonymWordDeleteByAnalyzer() throws IOException {
             fail("fail! due to synonym word deleted by analyzer");
         } catch (Exception e) {
             assertThat(e, instanceOf(IllegalArgumentException.class));
-            assertThat(e.getMessage(), startsWith("failed to build synonyms"));
+            assertThat(e.getMessage(), startsWith("Failed to build synonyms"));
         }
     }
 
diff --git a/plugins/analysis-icu/src/main/java/org/opensearch/index/analysis/IcuCollationTokenFilterFactory.java b/plugins/analysis-icu/src/main/java/org/opensearch/index/analysis/IcuCollationTokenFilterFactory.java
index 757a55487a162..af564bcc9d535 100644
--- a/plugins/analysis-icu/src/main/java/org/opensearch/index/analysis/IcuCollationTokenFilterFactory.java
+++ b/plugins/analysis-icu/src/main/java/org/opensearch/index/analysis/IcuCollationTokenFilterFactory.java
@@ -37,6 +37,7 @@
 import java.nio.file.Files;
 import java.nio.file.InvalidPathException;
 
+import org.apache.logging.log4j.LogManager;
 import org.apache.lucene.analysis.TokenStream;
 import org.opensearch.common.io.Streams;
 import org.opensearch.common.settings.Settings;
@@ -80,9 +81,12 @@ public IcuCollationTokenFilterFactory(IndexSettings indexSettings, Environment e
                 collator = new RuleBasedCollator(rules);
             } catch (Exception e) {
                 if (failureToResolve != null) {
-                    throw new IllegalArgumentException("Failed to resolve collation rules location", failureToResolve);
+                    LogManager.getLogger(IcuCollationTokenFilterFactory.class)
+                        .error("Failed to resolve collation rules location", failureToResolve);
+                    throw new IllegalArgumentException("Failed to resolve collation rules location");
                 } else {
-                    throw new IllegalArgumentException("Failed to parse collation rules", e);
+                    LogManager.getLogger(IcuCollationTokenFilterFactory.class).error("Failed to parse collation rules", e);
+                    throw new IllegalArgumentException("Failed to parse collation rules");
                 }
             }
         } else {
diff --git a/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java
index fef8d06c466b9..8e9c209ae421d 100644
--- a/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java
+++ b/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java
@@ -49,7 +49,7 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor
 
     public KuromojiPartOfSpeechFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
-        List<String> wordList = Analysis.getWordList(env, settings, "stoptags");
+        List<String> wordList = Analysis.parseWordList(env, settings, "stoptags", s -> s);
         if (wordList != null) {
             stopTags.addAll(wordList);
         } else {
diff --git a/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java
index b5e718eaa6fa0..ac4def8ac9a11 100644
--- a/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java
+++ b/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java
@@ -32,6 +32,8 @@
 
 package org.opensearch.index.analysis;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
@@ -50,6 +52,7 @@
 
 public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
 
+    private static final Logger LOGGER = LogManager.getLogger(KuromojiTokenizerFactory.class);
     private static final String USER_DICT_PATH_OPTION = "user_dictionary";
     private static final String USER_DICT_RULES_OPTION = "user_dictionary_rules";
     private static final String NBEST_COST = "nbest_cost";
@@ -74,6 +77,14 @@ public KuromojiTokenizerFactory(IndexSettings indexSettings, Environment env, St
         discardCompoundToken = settings.getAsBoolean(DISCARD_COMPOUND_TOKEN, false);
     }
 
+    private static String parse(String rule, Set<String> dup) {
+        String[] values = CSVUtil.parse(rule);
+        if (dup.add(values[0]) == false) {
+            throw new IllegalArgumentException("Found duplicate term [" + values[0] + "] in user dictionary.");
+        }
+        return rule;
+    }
+
     public static UserDictionary getUserDictionary(Environment env, Settings settings) {
         if (settings.get(USER_DICT_PATH_OPTION) != null && settings.get(USER_DICT_RULES_OPTION) != null) {
             throw new IllegalArgumentException(
@@ -81,31 +92,26 @@ public static UserDictionary getUserDictionary(Environment env, Settings setting
             );
         }
         try {
-            List<String> ruleList = Analysis.getWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, false);
+            Set<String> dup = new HashSet<>();
+            List<String> ruleList = Analysis.parseWordList(
+                env,
+                settings,
+                USER_DICT_PATH_OPTION,
+                USER_DICT_RULES_OPTION,
+                s -> parse(s, dup)
+            );
             if (ruleList == null || ruleList.isEmpty()) {
                 return null;
             }
-            Set<String> dup = new HashSet<>();
-            int lineNum = 0;
-            for (String line : ruleList) {
-                // ignore comments
-                if (line.startsWith("#") == false) {
-                    String[] values = CSVUtil.parse(line);
-                    if (dup.add(values[0]) == false) {
-                        throw new IllegalArgumentException(
-                            "Found duplicate term [" + values[0] + "] in user dictionary " + "at line [" + lineNum + "]"
-                        );
-                    }
-                }
-                ++lineNum;
-            }
+
             StringBuilder sb = new StringBuilder();
             for (String line : ruleList) {
                 sb.append(line).append(System.lineSeparator());
             }
             return UserDictionary.open(new StringReader(sb.toString()));
         } catch (IOException e) {
-            throw new OpenSearchException("failed to load kuromoji user dictionary", e);
+            LOGGER.error("Failed to load kuromoji user dictionary", e);
+            throw new OpenSearchException("Failed to load kuromoji user dictionary");
         }
     }
 
diff --git a/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java
index e17658d83a085..23e6ef9fea059 100644
--- a/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java
+++ b/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java
@@ -390,8 +390,8 @@ public void testKuromojiAnalyzerDuplicateUserDictRule() throws Exception {
                 "制限スピード,制限スピード,セイゲンスピード,テスト名詞"
             )
             .build();
-        IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
-        assertThat(exc.getMessage(), containsString("[制限スピード] in user dictionary at line [3]"));
+        RuntimeException exc = expectThrows(RuntimeException.class, () -> createTestAnalysis(settings));
+        assertThat(exc.getMessage(), equalTo("Line [4]: Found duplicate term [制限スピード] in user dictionary."));
     }
 
     public void testDiscardCompoundToken() throws Exception {
diff --git a/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriAnalyzerProvider.java b/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriAnalyzerProvider.java
index 3dee606185429..e3b1cef6aee8a 100644
--- a/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriAnalyzerProvider.java
+++ b/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriAnalyzerProvider.java
@@ -52,7 +52,7 @@ public NoriAnalyzerProvider(IndexSettings indexSettings, Environment env, String
         super(indexSettings, name, settings);
         final KoreanTokenizer.DecompoundMode mode = NoriTokenizerFactory.getMode(settings);
         final UserDictionary userDictionary = NoriTokenizerFactory.getUserDictionary(env, settings);
-        final List<String> tagList = Analysis.getWordList(env, settings, "stoptags");
+        final List<String> tagList = Analysis.parseWordList(env, settings, "stoptags", s -> s);
         final Set<POS.Tag> stopTags = tagList != null ? resolvePOSList(tagList) : KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS;
         analyzer = new KoreanAnalyzer(userDictionary, mode, stopTags, false);
     }
diff --git a/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriPartOfSpeechStopFilterFactory.java b/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriPartOfSpeechStopFilterFactory.java
index 18cbc3c7c153d..5023db50422fc 100644
--- a/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriPartOfSpeechStopFilterFactory.java
+++ b/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriPartOfSpeechStopFilterFactory.java
@@ -48,7 +48,7 @@ public class NoriPartOfSpeechStopFilterFactory extends AbstractTokenFilterFactor
 
     public NoriPartOfSpeechStopFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
-        List<String> tagList = Analysis.getWordList(env, settings, "stoptags");
+        List<String> tagList = Analysis.parseWordList(env, settings, "stoptags", s -> s);
         this.stopTags = tagList != null ? resolvePOSList(tagList) : KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS;
     }
 
diff --git a/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriTokenizerFactory.java b/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriTokenizerFactory.java
index 5136277611e3a..9f3183194cdae 100644
--- a/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriTokenizerFactory.java
+++ b/plugins/analysis-nori/src/main/java/org/opensearch/index/analysis/NoriTokenizerFactory.java
@@ -32,6 +32,8 @@
 
 package org.opensearch.index.analysis;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ko.KoreanTokenizer;
 import org.apache.lucene.analysis.ko.dict.UserDictionary;
@@ -47,6 +49,7 @@
 import java.util.Locale;
 
 public class NoriTokenizerFactory extends AbstractTokenizerFactory {
+    private static final Logger LOGGER = LogManager.getLogger(NoriTokenizerFactory.class);
     private static final String USER_DICT_PATH_OPTION = "user_dictionary";
     private static final String USER_DICT_RULES_OPTION = "user_dictionary_rules";
 
@@ -67,7 +70,7 @@ public static UserDictionary getUserDictionary(Environment env, Settings setting
                 "It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" + " with [" + USER_DICT_RULES_OPTION + "]"
             );
         }
-        List<String> ruleList = Analysis.getWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, true);
+        List<String> ruleList = Analysis.parseWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, s -> s);
         StringBuilder sb = new StringBuilder();
         if (ruleList == null || ruleList.isEmpty()) {
             return null;
@@ -78,7 +81,8 @@ public static UserDictionary getUserDictionary(Environment env, Settings setting
         try (Reader rulesReader = new StringReader(sb.toString())) {
             return UserDictionary.open(rulesReader);
         } catch (IOException e) {
-            throw new OpenSearchException("failed to load nori user dictionary", e);
+            LOGGER.error("Failed to load nori user dictionary", e);
+            throw new OpenSearchException("Failed to load nori user dictionary");
         }
     }
 
diff --git a/release-notes/opensearch.release-notes-2.4.0.md b/release-notes/opensearch.release-notes-2.4.0.md
index a2c9311f4159b..a2919798a358a 100644
--- a/release-notes/opensearch.release-notes-2.4.0.md
+++ b/release-notes/opensearch.release-notes-2.4.0.md
@@ -95,6 +95,6 @@
 - Fix message "No OpenSearchException found" when detailed_error disabled by return meaningful messages ([#4708](https://github.com/opensearch-project/OpenSearch/pull/4708))
 - Add fix for auto expand replica validation ([#4994](https://github.com/opensearch-project/OpenSearch/pull/4994))
 - Fix build failures on the Windows platform ([#4924](https://github.com/opensearch-project/OpenSearch/issues/4924))
-
+- Fix error handling while reading analyzer mapping rules ([6d20423](https://github.com/opensearch-project/OpenSearch/commit/6d20423f5920745463b1abc5f1daf6a786c41aa0))
 ### Security
 - CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341))
diff --git a/server/src/main/java/org/opensearch/index/analysis/Analysis.java b/server/src/main/java/org/opensearch/index/analysis/Analysis.java
index 9fe5abcb3ea9d..f2a9caebb9c03 100644
--- a/server/src/main/java/org/opensearch/index/analysis/Analysis.java
+++ b/server/src/main/java/org/opensearch/index/analysis/Analysis.java
@@ -32,6 +32,8 @@
 
 package org.opensearch.index.analysis;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.ar.ArabicAnalyzer;
 import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
@@ -94,6 +96,7 @@
  * @opensearch.internal
  */
 public class Analysis {
+    private static final Logger LOGGER = LogManager.getLogger(Analysis.class);
 
     public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
         String value = settings.get("stem_exclusion");
@@ -166,7 +169,7 @@ public static CharArraySet parseWords(
                 return resolveNamedWords(settings.getAsList(name), namedWords, ignoreCase);
             }
         }
-        List<String> pathLoadedWords = getWordList(env, settings, name);
+        List<String> pathLoadedWords = parseWordList(env, settings, name, s -> s);
         if (pathLoadedWords != null) {
             return resolveNamedWords(pathLoadedWords, namedWords, ignoreCase);
         }
@@ -207,7 +210,7 @@ private static CharArraySet resolveNamedWords(Collection<String> words, Map<Stri
     }
 
     public static CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix) {
-        List<String> wordList = getWordList(env, settings, settingsPrefix);
+        List<String> wordList = parseWordList(env, settings, settingsPrefix, s -> s);
         if (wordList == null) {
             return null;
         }
@@ -215,15 +218,48 @@ public static CharArraySet getWordSet(Environment env, Settings settings, String
         return new CharArraySet(wordList, ignoreCase);
     }
 
+    public static <T> List<T> parseWordList(Environment env, Settings settings, String settingPrefix, CustomMappingRuleParser<T> parser) {
+        return parseWordList(env, settings, settingPrefix + "_path", settingPrefix, parser);
+    }
+
     /**
-     * Fetches a list of words from the specified settings file. The list should either be available at the key
-     * specified by settingsPrefix or in a file specified by settingsPrefix + _path.
+     * Parses a list of words from the specified settings or from a file, with the given parser.
      *
      * @throws IllegalArgumentException
      *          If the word list cannot be found at either key.
+     * @throws RuntimeException
+     *          If there is error parsing the words
      */
-    public static List<String> getWordList(Environment env, Settings settings, String settingPrefix) {
-        return getWordList(env, settings, settingPrefix + "_path", settingPrefix, true);
+    public static <T> List<T> parseWordList(
+        Environment env,
+        Settings settings,
+        String settingPath,
+        String settingList,
+        CustomMappingRuleParser<T> parser
+    ) {
+        List<String> words = getWordList(env, settings, settingPath, settingList);
+        if (words == null) {
+            return null;
+        }
+        List<T> rules = new ArrayList<>();
+        int lineNum = 0;
+        for (String word : words) {
+            lineNum++;
+            if (word.startsWith("#") == false) {
+                try {
+                    rules.add(parser.apply(word));
+                } catch (RuntimeException ex) {
+                    String wordListPath = settings.get(settingPath, null);
+                    if (wordListPath == null || isUnderConfig(env, wordListPath)) {
+                        throw new RuntimeException("Line [" + lineNum + "]: " + ex.getMessage());
+                    } else {
+                        LOGGER.error("Line [{}]: {}", lineNum, ex);
+                        throw new RuntimeException("Line [" + lineNum + "]: " + "Invalid rule");
+                    }
+                }
+            }
+        }
+        return rules;
     }
 
     /**
@@ -233,43 +269,33 @@ public static List<String> getWordList(Environment env, Settings settings, Strin
      * @throws IllegalArgumentException
      *          If the word list cannot be found at either key.
      */
-    public static List<String> getWordList(
-        Environment env,
-        Settings settings,
-        String settingPath,
-        String settingList,
-        boolean removeComments
-    ) {
+    private static List<String> getWordList(Environment env, Settings settings, String settingPath, String settingList) {
         String wordListPath = settings.get(settingPath, null);
 
         if (wordListPath == null) {
-            List<String> explicitWordList = settings.getAsList(settingList, null);
-            if (explicitWordList == null) {
-                return null;
-            } else {
-                return explicitWordList;
-            }
+            return settings.getAsList(settingList, null);
         }
 
-        final Path path = env.configFile().resolve(wordListPath);
+        final Path path = resolveAnalyzerPath(env, wordListPath);
 
         try {
-            return loadWordList(path, removeComments);
+            return loadWordList(path);
         } catch (CharacterCodingException ex) {
             String message = String.format(
                 Locale.ROOT,
-                "Unsupported character encoding detected while reading %s: %s - files must be UTF-8 encoded",
-                settingPath,
-                path.toString()
+                "Unsupported character encoding detected while reading %s: files must be UTF-8 encoded",
+                settingPath
             );
-            throw new IllegalArgumentException(message, ex);
+            LOGGER.error("{}: from file: {}, exception is: {}", message, path.toString(), ex);
+            throw new IllegalArgumentException(message);
         } catch (IOException ioe) {
-            String message = String.format(Locale.ROOT, "IOException while reading %s: %s", settingPath, path.toString());
-            throw new IllegalArgumentException(message, ioe);
+            String message = String.format(Locale.ROOT, "IOException while reading %s: file not readable", settingPath);
+            LOGGER.error("{}, from file: {}, exception is: {}", message, path.toString(), ioe);
+            throw new IllegalArgumentException(message);
         }
     }
 
-    private static List<String> loadWordList(Path path, boolean removeComments) throws IOException {
+    private static List<String> loadWordList(Path path) throws IOException {
         final List<String> result = new ArrayList<>();
         try (BufferedReader br = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
             String word;
@@ -277,9 +303,7 @@ private static List<String> loadWordList(Path path, boolean removeComments) thro
                 if (Strings.hasText(word) == false) {
                     continue;
                 }
-                if (removeComments == false || word.startsWith("#") == false) {
-                    result.add(word.trim());
-                }
+                result.add(word.trim());
             }
         }
         return result;
@@ -296,21 +320,34 @@ public static Reader getReaderFromFile(Environment env, Settings settings, Strin
         if (filePath == null) {
             return null;
         }
-        final Path path = env.configFile().resolve(filePath);
+        final Path path = resolveAnalyzerPath(env, filePath);
         try {
             return Files.newBufferedReader(path, StandardCharsets.UTF_8);
         } catch (CharacterCodingException ex) {
             String message = String.format(
                 Locale.ROOT,
-                "Unsupported character encoding detected while reading %s_path: %s files must be UTF-8 encoded",
-                settingPrefix,
-                path.toString()
+                "Unsupported character encoding detected while reading %s_path: files must be UTF-8 encoded",
+                settingPrefix
             );
-            throw new IllegalArgumentException(message, ex);
+            LOGGER.error("{}: from file: {}, exception is: {}", message, path.toString(), ex);
+            throw new IllegalArgumentException(message);
         } catch (IOException ioe) {
-            String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
-            throw new IllegalArgumentException(message, ioe);
+            String message = String.format(Locale.ROOT, "IOException while reading %s_path: file not readable", settingPrefix);
+            LOGGER.error("{}, from file: {}, exception is: {}", message, path.toString(), ioe);
+            throw new IllegalArgumentException(message);
         }
     }
 
+    public static Path resolveAnalyzerPath(Environment env, String wordListPath) {
+        return env.configFile().resolve(wordListPath).normalize();
+    }
+
+    private static boolean isUnderConfig(Environment env, String wordListPath) {
+        try {
+            final Path path = env.configFile().resolve(wordListPath).normalize();
+            return path.startsWith(env.configFile().toAbsolutePath());
+        } catch (Exception ex) {
+            return false;
+        }
+    }
 }
diff --git a/server/src/main/java/org/opensearch/index/analysis/CustomMappingRuleParser.java b/server/src/main/java/org/opensearch/index/analysis/CustomMappingRuleParser.java
new file mode 100644
index 0000000000000..075c6ca1b585d
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/analysis/CustomMappingRuleParser.java
@@ -0,0 +1,21 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.analysis;
+
+import java.util.function.Function;
+
+/**
+ * A parser that takes a raw string and returns the parsed data of type T.
+ *
+ * @param <T> type of parsed data
+ */
+@FunctionalInterface
+public interface CustomMappingRuleParser<T> extends Function<String, T> {
+
+}
diff --git a/server/src/main/java/org/opensearch/index/analysis/MappingRule.java b/server/src/main/java/org/opensearch/index/analysis/MappingRule.java
new file mode 100644
index 0000000000000..92c9d2a17dc1e
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/analysis/MappingRule.java
@@ -0,0 +1,30 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.analysis;
+
+/**
+ * Represents a mapping between two objects.
+ */
+public class MappingRule<L, R> {
+    private final L left;
+    private final R right;
+
+    public MappingRule(L left, R right) {
+        this.left = left;
+        this.right = right;
+    }
+
+    public L getLeft() {
+        return left;
+    }
+
+    public R getRight() {
+        return right;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/indices/analysis/HunspellService.java b/server/src/main/java/org/opensearch/indices/analysis/HunspellService.java
index 61191a1e49f63..20b7ec23a778e 100644
--- a/server/src/main/java/org/opensearch/indices/analysis/HunspellService.java
+++ b/server/src/main/java/org/opensearch/indices/analysis/HunspellService.java
@@ -123,7 +123,8 @@ public HunspellService(final Settings settings, final Environment env, final Map
             try {
                 return loadDictionary(locale, settings, env);
             } catch (Exception e) {
-                throw new IllegalStateException("failed to load hunspell dictionary for locale: " + locale, e);
+                logger.error("Failed to load hunspell dictionary for locale: " + locale, e);
+                throw new IllegalStateException("Failed to load hunspell dictionary for locale: " + locale);
             }
         };
         if (!HUNSPELL_LAZY_LOAD.get(settings)) {
diff --git a/server/src/test/java/org/opensearch/index/analysis/AnalysisTests.java b/server/src/test/java/org/opensearch/index/analysis/AnalysisTests.java
index d6f0cb194f222..01281ea323e60 100644
--- a/server/src/test/java/org/opensearch/index/analysis/AnalysisTests.java
+++ b/server/src/test/java/org/opensearch/index/analysis/AnalysisTests.java
@@ -39,14 +39,10 @@
 import org.opensearch.test.OpenSearchTestCase;
 
 import java.io.BufferedWriter;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.MalformedInputException;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
-import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.List;
@@ -79,13 +75,10 @@ public void testParseNonExistingFile() {
         Environment env = TestEnvironment.newEnvironment(nodeSettings);
         IllegalArgumentException ex = expectThrows(
             IllegalArgumentException.class,
-            () -> Analysis.getWordList(env, nodeSettings, "foo.bar")
-        );
-        assertEquals("IOException while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString(), ex.getMessage());
-        assertTrue(
-            ex.getCause().toString(),
-            ex.getCause() instanceof FileNotFoundException || ex.getCause() instanceof NoSuchFileException
+            () -> Analysis.parseWordList(env, nodeSettings, "foo.bar", s -> s)
         );
+        assertEquals("IOException while reading foo.bar_path: file not readable", ex.getMessage());
+        assertNull(ex.getCause());
     }
 
     public void testParseFalseEncodedFile() throws IOException {
@@ -99,18 +92,10 @@ public void testParseFalseEncodedFile() throws IOException {
         Environment env = TestEnvironment.newEnvironment(nodeSettings);
         IllegalArgumentException ex = expectThrows(
             IllegalArgumentException.class,
-            () -> Analysis.getWordList(env, nodeSettings, "foo.bar")
-        );
-        assertEquals(
-            "Unsupported character encoding detected while reading foo.bar_path: "
-                + tempDir.resolve("foo.dict").toString()
-                + " - files must be UTF-8 encoded",
-            ex.getMessage()
-        );
-        assertTrue(
-            ex.getCause().toString(),
-            ex.getCause() instanceof MalformedInputException || ex.getCause() instanceof CharacterCodingException
+            () -> Analysis.parseWordList(env, nodeSettings, "foo.bar", s -> s)
         );
+        assertEquals("Unsupported character encoding detected while reading foo.bar_path: files must be UTF-8 encoded", ex.getMessage());
+        assertNull(ex.getCause());
     }
 
     public void testParseWordList() throws IOException {
@@ -124,8 +109,42 @@ public void testParseWordList() throws IOException {
             writer.write('\n');
         }
         Environment env = TestEnvironment.newEnvironment(nodeSettings);
-        List<String> wordList = Analysis.getWordList(env, nodeSettings, "foo.bar");
+        List<String> wordList = Analysis.parseWordList(env, nodeSettings, "foo.bar", s -> s);
         assertEquals(Arrays.asList("hello", "world"), wordList);
+    }
 
+    public void testParseWordListError() throws IOException {
+        Path home = createTempDir();
+        Path config = home.resolve("config");
+        Files.createDirectory(config);
+        Path dict = config.resolve("foo.dict");
+        Settings nodeSettings = Settings.builder().put("foo.bar_path", dict).put(Environment.PATH_HOME_SETTING.getKey(), home).build();
+        try (BufferedWriter writer = Files.newBufferedWriter(dict, StandardCharsets.UTF_8)) {
+            writer.write("abcd");
+            writer.write('\n');
+        }
+        Environment env = TestEnvironment.newEnvironment(nodeSettings);
+        RuntimeException ex = expectThrows(
+            RuntimeException.class,
+            () -> Analysis.parseWordList(
+                env,
+                nodeSettings,
+                "foo.bar",
+                s -> { throw new RuntimeException("Error while parsing rule = " + s); }
+            )
+        );
+        assertEquals("Line [1]: Error while parsing rule = abcd", ex.getMessage());
+    }
+
+    public void testParseWordListOutsideConfigDirError() {
+        Path home = createTempDir();
+        Path dict = home.resolve("/etc/os-release");
+        Settings nodeSettings = Settings.builder().put("foo.bar_path", dict).put(Environment.PATH_HOME_SETTING.getKey(), home).build();
+        Environment env = TestEnvironment.newEnvironment(nodeSettings);
+        RuntimeException ex = expectThrows(
+            RuntimeException.class,
+            () -> Analysis.parseWordList(env, nodeSettings, "foo.bar", s -> { throw new RuntimeException("Error while parsing"); })
+        );
+        assertEquals("Line [1]: Invalid rule", ex.getMessage());
     }
 }
diff --git a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
index efec81e803f1c..e2faa078134af 100644
--- a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
+++ b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
@@ -168,11 +168,12 @@ private void testSimpleConfiguration(Settings settings) throws IOException {
     }
 
     public void testWordListPath() throws Exception {
-        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
+        Path home = createTempDir();
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), home.toString()).build();
         Environment env = TestEnvironment.newEnvironment(settings);
         String[] words = new String[] { "donau", "dampf", "schiff", "spargel", "creme", "suppe" };
 
-        Path wordListFile = generateWordList(words);
+        Path wordListFile = generateWordList(home, words);
         settings = Settings.builder()
             .loadFromSource("index: \n  word_list_path: " + wordListFile.toAbsolutePath(), XContentType.YAML)
             .build();
@@ -183,8 +184,10 @@ public void testWordListPath() throws Exception {
         Files.delete(wordListFile);
     }
 
-    private Path generateWordList(String[] words) throws Exception {
-        Path wordListFile = createTempDir().resolve("wordlist.txt");
+    private Path generateWordList(Path home, String[] words) throws Exception {
+        Path config = home.resolve("config");
+        Files.createDirectory(config);
+        Path wordListFile = config.resolve("wordlist.txt");
         try (BufferedWriter writer = Files.newBufferedWriter(wordListFile, StandardCharsets.UTF_8)) {
             for (String word : words) {
                 writer.write(word);
diff --git a/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java b/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java
index 28732b099ea30..f66045898f4a3 100644
--- a/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java
+++ b/server/src/test/java/org/opensearch/indices/analyze/HunspellServiceTests.java
@@ -42,8 +42,6 @@
 import static java.util.Collections.emptyMap;
 import static org.opensearch.indices.analysis.HunspellService.HUNSPELL_IGNORE_CASE;
 import static org.opensearch.indices.analysis.HunspellService.HUNSPELL_LAZY_LOAD;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.hasToString;
 import static org.hamcrest.Matchers.notNullValue;
 
 public class HunspellServiceTests extends OpenSearchTestCase {
@@ -91,11 +89,11 @@ public void testDicWithNoAff() throws Exception {
             final Environment environment = new Environment(settings, getDataPath("/indices/analyze/no_aff_conf_dir"));
             new HunspellService(settings, environment, emptyMap()).getDictionary("en_US");
         });
-        assertEquals("failed to load hunspell dictionary for locale: en_US", e.getMessage());
-        assertThat(e.getCause(), hasToString(containsString("Missing affix file")));
+        assertEquals("Failed to load hunspell dictionary for locale: en_US", e.getMessage());
+        assertNull(e.getCause());
     }
 
-    public void testDicWithTwoAffs() throws Exception {
+    public void testDicWithTwoAffs() {
         Settings settings = Settings.builder()
             .put(HUNSPELL_LAZY_LOAD.getKey(), randomBoolean())
             .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
@@ -105,7 +103,7 @@ public void testDicWithTwoAffs() throws Exception {
             final Environment environment = new Environment(settings, getDataPath("/indices/analyze/two_aff_conf_dir"));
             new HunspellService(settings, environment, emptyMap()).getDictionary("en_US");
         });
-        assertEquals("failed to load hunspell dictionary for locale: en_US", e.getMessage());
-        assertThat(e.getCause(), hasToString(containsString("Too many affix files")));
+        assertEquals("Failed to load hunspell dictionary for locale: en_US", e.getMessage());
+        assertNull(e.getCause());
     }
 }