From 7122da29e556f9cd6d6c9ea34e93610c9f459f19 Mon Sep 17 00:00:00 2001
From: Jose Angel Hernao <joseangelhernao@gmail.com>
Date: Fri, 17 Sep 2021 13:59:15 -0500
Subject: [PATCH 1/2] Add string clustering tests

---
 tests/creators/creator_stringclustering.py |   67 +
 tests/test_created__stringclustering.py    | 1395 ++++++++++++++++++++
 2 files changed, 1462 insertions(+)
 create mode 100644 tests/creators/creator_stringclustering.py
 create mode 100644 tests/test_created__stringclustering.py

diff --git a/tests/creators/creator_stringclustering.py b/tests/creators/creator_stringclustering.py
new file mode 100644
index 000000000..8a63d1f66
--- /dev/null
+++ b/tests/creators/creator_stringclustering.py
@@ -0,0 +1,67 @@
+import datetime
+import sys
+sys.path.append("../..")
+
+
+def create():
+    from optimus import Optimus
+    from optimus.tests.creator import TestCreator, default_configs
+
+    op = Optimus("pandas")
+    df = op.create.dataframe({
+        'NullType': [None, None, None, None, None, None],
+        'date arrival': ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'],
+        'height(ft)': [-28, 17, 26, 13, None, 300],
+        ('last date seen', 'date'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'],
+        'last position seen': ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None],
+        'rank': [10, 7, 7, 8, 10, 8],
+        ('Cybertronian', 'bool'): [True, True, True, True, True, False],
+        ('Date Type'): [datetime.datetime(2016, 9, 10), datetime.datetime(2015, 8, 10), datetime.datetime(2014, 6, 24), datetime.datetime(2013, 6, 24), datetime.datetime(2012, 5, 10), datetime.datetime(2011, 4, 10)],
+        ('age', 'int'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000],
+        ('function', 'string'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'],
+        ('names', 'str'): ['Optimus', 'bumbl#ebéé  ', 'ironhide&', 'Jazz', 'Megatron', 'Metroplex_)^$'],
+        ('timestamp', 'time'): [datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0)],
+        ('weight(t)', 'float'): [4.3, 2.0, 4.0, 1.8, 5.7, None]
+    })
+
+    t = TestCreator(op, df, name="stringclustering", configs=default_configs)
+
+    t.create(method="string_clustering", variant="all_fingerprint", cols="*", algorithm="fingerprint")
+    t.create(method="string_clustering", variant="all_ngram_fingerprint", cols="*", algorithm="ngram_fingerprint")
+    t.create(method="string_clustering", variant="all_metaphone", cols="*", algorithm="metaphone")
+    t.create(method="string_clustering", variant="all_nysiis", cols="*", algorithm="nysiis")
+    t.create(method="string_clustering", variant="all_match_rating_codex", cols="*", algorithm="match_rating_codex")
+    t.create(method="string_clustering", variant="all_double_metaphone", cols="*", algorithm="double_metaphone")
+    t.create(method="string_clustering", variant="all_soundex", cols="*", algorithm="soundex")
+    t.create(method="string_clustering", variant="all_levenshtein", cols="*", algorithm="levenshtein")
+
+    t.create(method="string_clustering", variant="numeric_fingerprint", cols=["rank"], algorithm="fingerprint")
+    t.create(method="string_clustering", variant="numeric_ngram_fingerprint", cols=["rank"], algorithm="ngram_fingerprint")
+    t.create(method="string_clustering", variant="numeric_metaphone", cols=["rank"], algorithm="metaphone")
+    t.create(method="string_clustering", variant="numeric_nysiis", cols=["rank"], algorithm="nysiis")
+    t.create(method="string_clustering", variant="numeric_match_rating_codex", cols=["rank"], algorithm="match_rating_codex")
+    t.create(method="string_clustering", variant="numeric_double_metaphone", cols=["rank"], algorithm="double_metaphone")
+    t.create(method="string_clustering", variant="numeric_soundex", cols=["rank"], algorithm="soundex")
+    t.create(method="string_clustering", variant="numeric_levenshtein", cols=["rank"], algorithm="levenshtein")
+
+    t.create(method="string_clustering", variant="string_fingerprint", cols=["names"], algorithm="fingerprint")
+    t.create(method="string_clustering", variant="string_ngram_fingerprint", cols=["names"], algorithm="ngram_fingerprint")
+    t.create(method="string_clustering", variant="string_metaphone", cols=["names"], algorithm="metaphone")
+    t.create(method="string_clustering", variant="string_nysiis", cols=["names"], algorithm="nysiis")
+    t.create(method="string_clustering", variant="string_match_rating_codex", cols=["names"], algorithm="match_rating_codex")
+    t.create(method="string_clustering", variant="string_double_metaphone", cols=["names"], algorithm="double_metaphone")
+    t.create(method="string_clustering", variant="string_soundex", cols=["names"], algorithm="soundex")
+    t.create(method="string_clustering", variant="string_levenshtein", cols=["names"], algorithm="levenshtein")
+
+    t.create(method="string_clustering", variant="multiple_fingerprint", cols=["NullType","Cybertronian","timestamp"], algorithm="fingerprint")
+    t.create(method="string_clustering", variant="multiple_ngram_fingerprint", cols=["NullType","Cybertronian","timestamp"], algorithm="ngram_fingerprint")
+    t.create(method="string_clustering", variant="multiple_metaphone", cols=["NullType","Cybertronian","timestamp"], algorithm="metaphone")
+    t.create(method="string_clustering", variant="multiple_nysiis", cols=["NullType","Cybertronian","timestamp"], algorithm="nysiis")
+    t.create(method="string_clustering", variant="multiple_match_rating_codex", cols=["NullType","Cybertronian","timestamp"], algorithm="match_rating_codex")
+    t.create(method="string_clustering", variant="multiple_double_metaphone", cols=["NullType","Cybertronian","timestamp"], algorithm="double_metaphone")
+    t.create(method="string_clustering", variant="multiple_soundex", cols=["NullType","Cybertronian","timestamp"], algorithm="soundex")
+    t.create(method="string_clustering", variant="multiple_levenshtein", cols=["NullType","Cybertronian","timestamp"], algorithm="levenshtein")
+
+    t.run()
+
+create()
\ No newline at end of file
diff --git a/tests/test_created__stringclustering.py b/tests/test_created__stringclustering.py
new file mode 100644
index 000000000..4542e89a6
--- /dev/null
+++ b/tests/test_created__stringclustering.py
@@ -0,0 +1,1395 @@
+import datetime
+from optimus.tests.base import TestBase
+from optimus.helpers.json import json_encoding
+from optimus.helpers.functions import deep_sort, df_dicts_equal, results_equal
+
+
+def Timestamp(t):
+    return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
+
+
+nan = float("nan")
+inf = float("inf")
+
+
+class TestStringclusteringPandas(TestBase):
+    config = {'engine': 'pandas'}
+    dict = {('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'ironhide&', 'Jazz', 'Megatron', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}
+    maxDiff = None
+
+    def test_string_clustering_all_double_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='double_metaphone')
+        expected = { 'Cybertronian': { ('FLS', ''): { 'suggestion': False,
+                                   'suggestions': [False],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+                    ('TR', ''): { 'suggestion': True,
+                                  'suggestions': [True],
+                                  'suggestions_size': 1,
+                                  'total_count': 5}},
+  'Date Type': { ('', ''): { 'suggestion': Timestamp('2016-09-10 00:00:00'),
+                             'suggestions': [ Timestamp('2016-09-10 00:00:00'),
+                                              Timestamp('2015-08-10 00:00:00'),
+                                              Timestamp('2014-06-24 00:00:00'),
+                                              Timestamp('2013-06-24 00:00:00'),
+                                              Timestamp('2012-05-10 00:00:00'),
+                                              Timestamp('2011-04-10 00:00:00')],
+                             'suggestions_size': 6,
+                             'total_count': 6}},
+  'NullType': { ('NN', ''): { 'suggestion': None,
+                              'suggestions': [None],
+                              'suggestions_size': 1,
+                              'total_count': 6}},
+  'age': { ('', ''): { 'suggestion': 5000000,
+                       'suggestions': [5000000],
+                       'suggestions_size': 1,
+                       'total_count': 6}},
+  'date arrival': { ('', ''): { 'suggestion': '1980/04/10',
+                                'suggestions': ['1980/04/10'],
+                                'suggestions_size': 1,
+                                'total_count': 6}},
+  'function': { ('ASPNJ', 'ASPNK'): { 'suggestion': 'Espionage',
+                                      'suggestions': ['Espionage'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                ('FRSTLTNNT', ''): { 'suggestion': 'First Lieutenant',
+                                     'suggestions': ['First Lieutenant'],
+                                     'suggestions_size': 1,
+                                     'total_count': 1},
+                ('LTR', ''): { 'suggestion': 'Leader',
+                               'suggestions': ['Leader'],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                ('NN', ''): { 'suggestion': None,
+                              'suggestions': [None],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+                ('PTLSTXN', ''): { 'suggestion': 'Battle Station',
+                                   'suggestions': ['Battle Station'],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+                ('SKRT', ''): { 'suggestion': 'Security',
+                                'suggestions': ['Security'],
+                                'suggestions_size': 1,
+                                'total_count': 1}},
+  'height(ft)': { ('', ''): { 'suggestion': -28.0,
+                              'suggestions': [-28.0, 17.0, 26.0, 13.0, 300.0],
+                              'suggestions_size': 5,
+                              'total_count': 5},
+                  ('NN', ''): { 'suggestion': nan,
+                                'suggestions': [nan],
+                                'suggestions_size': 1,
+                                'total_count': 1}},
+  'last date seen': { ('', ''): { 'suggestion': '2016/09/10',
+                                  'suggestions': [ '2016/09/10',
+                                                   '2015/08/10',
+                                                   '2014/07/10',
+                                                   '2013/06/10',
+                                                   '2012/05/10',
+                                                   '2011/04/10'],
+                                  'suggestions_size': 6,
+                                  'total_count': 6}},
+  'last position seen': { ('', ''): { 'suggestion': '19.442735,-99.201111',
+                                      'suggestions': [ '19.442735,-99.201111',
+                                                       '10.642707,-71.612534',
+                                                       '37.789563,-122.400356',
+                                                       '33.670666,-117.841553'],
+                                      'suggestions_size': 4,
+                                      'total_count': 4},
+                          ('NN', ''): { 'suggestion': None,
+                                        'suggestions': [None],
+                                        'suggestions_size': 1,
+                                        'total_count': 2}},
+  'names': { ('APTMS', ''): { 'suggestion': 'Optimus',
+                              'suggestions': ['Optimus'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+             ('ARNT', ''): { 'suggestion': 'ironhide&',
+                             'suggestions': ['ironhide&'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+             ('JS', 'AS'): { 'suggestion': 'Jazz',
+                             'suggestions': ['Jazz'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+             ('MKTRN', ''): { 'suggestion': 'Megatron',
+                              'suggestions': ['Megatron'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+             ('MTRPLKSKSKSKSKS', ''): { 'suggestion': 'Metroplex_)^$',
+                                        'suggestions': ['Metroplex_)^$'],
+                                        'suggestions_size': 1,
+                                        'total_count': 1},
+             ('PMPLLP', ''): { 'suggestion': 'bumbl#ebéé  ',
+                               'suggestions': ['bumbl#ebéé  '],
+                               'suggestions_size': 1,
+                               'total_count': 1}},
+  'rank': { ('', ''): { 'suggestion': 10,
+                        'suggestions': [10, 7, 8],
+                        'suggestions_size': 3,
+                        'total_count': 6}},
+  'timestamp': { ('', ''): { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                             'suggestions': [Timestamp('2014-06-24 00:00:00')],
+                             'suggestions_size': 1,
+                             'total_count': 6}},
+  'weight(t)': { ('', ''): { 'suggestion': 4.3,
+                             'suggestions': [4.3, 2.0, 4.0, 1.8, 5.7],
+                             'suggestions_size': 5,
+                             'total_count': 5},
+                 ('NN', ''): { 'suggestion': nan,
+                               'suggestions': [nan],
+                               'suggestions_size': 1,
+                               'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_all_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='fingerprint')
+        expected = { 'Cybertronian': { 'false': { 'suggestion': False,
+                               'suggestions': [False],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                    'true': { 'suggestion': True,
+                              'suggestions': [True],
+                              'suggestions_size': 1,
+                              'total_count': 5}},
+  'Date Type': { '20110410': { 'suggestion': Timestamp('2011-04-10 00:00:00'),
+                               'suggestions': [ Timestamp('2011-04-10 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                 '20120510': { 'suggestion': Timestamp('2012-05-10 00:00:00'),
+                               'suggestions': [ Timestamp('2012-05-10 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                 '20130624': { 'suggestion': Timestamp('2013-06-24 00:00:00'),
+                               'suggestions': [ Timestamp('2013-06-24 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                 '20140624': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                               'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                 '20150810': { 'suggestion': Timestamp('2015-08-10 00:00:00'),
+                               'suggestions': [ Timestamp('2015-08-10 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                 '20160910': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
+                               'suggestions': [ Timestamp('2016-09-10 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 1}},
+  'NullType': { 'none': { 'suggestion': None,
+                          'suggestions': [None],
+                          'suggestions_size': 1,
+                          'total_count': 6}},
+  'age': { '5000000': { 'suggestion': 5000000,
+                        'suggestions': [5000000],
+                        'suggestions_size': 1,
+                        'total_count': 6}},
+  'date arrival': { '19800410': { 'suggestion': '1980/04/10',
+                                  'suggestions': ['1980/04/10'],
+                                  'suggestions_size': 1,
+                                  'total_count': 6}},
+  'function': { 'battle station': { 'suggestion': 'Battle Station',
+                                    'suggestions': ['Battle Station'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1},
+                'espionage': { 'suggestion': 'Espionage',
+                               'suggestions': ['Espionage'],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                'first lieutenant': { 'suggestion': 'First Lieutenant',
+                                      'suggestions': ['First Lieutenant'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                'leader': { 'suggestion': 'Leader',
+                            'suggestions': ['Leader'],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                'none': { 'suggestion': None,
+                          'suggestions': [None],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                'security': { 'suggestion': 'Security',
+                              'suggestions': ['Security'],
+                              'suggestions_size': 1,
+                              'total_count': 1}},
+  'height(ft)': { '130': { 'suggestion': 13.0,
+                           'suggestions': [13.0],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                  '170': { 'suggestion': 17.0,
+                           'suggestions': [17.0],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                  '260': { 'suggestion': 26.0,
+                           'suggestions': [26.0],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                  '280': { 'suggestion': -28.0,
+                           'suggestions': [-28.0],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                  '3000': { 'suggestion': 300.0,
+                            'suggestions': [300.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  'nan': { 'suggestion': nan,
+                           'suggestions': [nan],
+                           'suggestions_size': 1,
+                           'total_count': 1}},
+  'last date seen': { '20110410': { 'suggestion': '2011/04/10',
+                                    'suggestions': ['2011/04/10'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1},
+                      '20120510': { 'suggestion': '2012/05/10',
+                                    'suggestions': ['2012/05/10'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1},
+                      '20130610': { 'suggestion': '2013/06/10',
+                                    'suggestions': ['2013/06/10'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1},
+                      '20140710': { 'suggestion': '2014/07/10',
+                                    'suggestions': ['2014/07/10'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1},
+                      '20150810': { 'suggestion': '2015/08/10',
+                                    'suggestions': ['2015/08/10'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1},
+                      '20160910': { 'suggestion': '2016/09/10',
+                                    'suggestions': ['2016/09/10'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1}},
+  'last position seen': { '1064270771612534': { 'suggestion': '10.642707,-71.612534',
+                                                'suggestions': [ '10.642707,-71.612534'],
+                                                'suggestions_size': 1,
+                                                'total_count': 1},
+                          '1944273599201111': { 'suggestion': '19.442735,-99.201111',
+                                                'suggestions': [ '19.442735,-99.201111'],
+                                                'suggestions_size': 1,
+                                                'total_count': 1},
+                          '33670666117841553': { 'suggestion': '33.670666,-117.841553',
+                                                 'suggestions': [ '33.670666,-117.841553'],
+                                                 'suggestions_size': 1,
+                                                 'total_count': 1},
+                          '37789563122400356': { 'suggestion': '37.789563,-122.400356',
+                                                 'suggestions': [ '37.789563,-122.400356'],
+                                                 'suggestions_size': 1,
+                                                 'total_count': 1},
+                          'none': { 'suggestion': None,
+                                    'suggestions': [None],
+                                    'suggestions_size': 1,
+                                    'total_count': 2}},
+  'names': { 'bumblebee': { 'suggestion': 'bumbl#ebéé  ',
+                            'suggestions': ['bumbl#ebéé  '],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+             'ironhide': { 'suggestion': 'ironhide&',
+                           'suggestions': ['ironhide&'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'jazz': { 'suggestion': 'Jazz',
+                       'suggestions': ['Jazz'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'megatron': { 'suggestion': 'Megatron',
+                           'suggestions': ['Megatron'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'metroplex': { 'suggestion': 'Metroplex_)^$',
+                            'suggestions': ['Metroplex_)^$'],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+             'optimus': { 'suggestion': 'Optimus',
+                          'suggestions': ['Optimus'],
+                          'suggestions_size': 1,
+                          'total_count': 1}},
+  'rank': { '10': { 'suggestion': 10,
+                    'suggestions': [10],
+                    'suggestions_size': 1,
+                    'total_count': 2},
+            '7': { 'suggestion': 7,
+                   'suggestions': [7],
+                   'suggestions_size': 1,
+                   'total_count': 2},
+            '8': { 'suggestion': 8,
+                   'suggestions': [8],
+                   'suggestions_size': 1,
+                   'total_count': 2}},
+  'timestamp': { '20140624': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                               'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 6}},
+  'weight(t)': { '18': { 'suggestion': 1.8,
+                         'suggestions': [1.8],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '20': { 'suggestion': 2.0,
+                         'suggestions': [2.0],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '40': { 'suggestion': 4.0,
+                         'suggestions': [4.0],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '43': { 'suggestion': 4.3,
+                         'suggestions': [4.3],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '57': { 'suggestion': 5.7,
+                         'suggestions': [5.7],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 'nan': { 'suggestion': nan,
+                          'suggestions': [nan],
+                          'suggestions_size': 1,
+                          'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_all_levenshtein(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='levenshtein')
+        expected = { 'weight(t)': { '18': { 'suggestion': '18',
+                         'suggestions': ['18', '43'],
+                         'suggestions_size': 2,
+                         'total_count': 6},
+                 '20': { 'suggestion': '20',
+                         'suggestions': ['20', '40'],
+                         'suggestions_size': 2,
+                         'total_count': 6},
+                 '40': { 'suggestion': '40',
+                         'suggestions': ['40', '43'],
+                         'suggestions_size': 2,
+                         'total_count': 6},
+                 '43': { 'suggestion': '43',
+                         'suggestions': ['43', '40'],
+                         'suggestions_size': 2,
+                         'total_count': 6},
+                 '57': { 'suggestion': '57',
+                         'suggestions': ['57', '43'],
+                         'suggestions_size': 2,
+                         'total_count': 6},
+                 'nan': { 'suggestion': 'nan',
+                          'suggestions': ['nan', '43'],
+                          'suggestions_size': 2,
+                          'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_all_match_rating_codex(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='match_rating_codex')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_all_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='metaphone')
+        expected = { 'Cybertronian': { 'FLS': { 'suggestion': False,
+                             'suggestions': [False],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+                    'TR': { 'suggestion': True,
+                            'suggestions': [True],
+                            'suggestions_size': 1,
+                            'total_count': 5}},
+  'Date Type': { '': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
+                       'suggestions': [ Timestamp('2016-09-10 00:00:00'),
+                                        Timestamp('2015-08-10 00:00:00'),
+                                        Timestamp('2014-06-24 00:00:00'),
+                                        Timestamp('2013-06-24 00:00:00'),
+                                        Timestamp('2012-05-10 00:00:00'),
+                                        Timestamp('2011-04-10 00:00:00')],
+                       'suggestions_size': 6,
+                       'total_count': 6}},
+  'NullType': { 'NN': { 'suggestion': None,
+                        'suggestions': [None],
+                        'suggestions_size': 1,
+                        'total_count': 6}},
+  'age': { '': { 'suggestion': 5000000,
+                 'suggestions': [5000000],
+                 'suggestions_size': 1,
+                 'total_count': 6}},
+  'date arrival': { '': { 'suggestion': '1980/04/10',
+                          'suggestions': ['1980/04/10'],
+                          'suggestions_size': 1,
+                          'total_count': 6}},
+  'function': { 'BTL STXN': { 'suggestion': 'Battle Station',
+                              'suggestions': ['Battle Station'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+                'ESPNJ': { 'suggestion': 'Espionage',
+                           'suggestions': ['Espionage'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                'FRST LTNNT': { 'suggestion': 'First Lieutenant',
+                                'suggestions': ['First Lieutenant'],
+                                'suggestions_size': 1,
+                                'total_count': 1},
+                'LTR': { 'suggestion': 'Leader',
+                         'suggestions': ['Leader'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                'NN': { 'suggestion': None,
+                        'suggestions': [None],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+                'SKRT': { 'suggestion': 'Security',
+                          'suggestions': ['Security'],
+                          'suggestions_size': 1,
+                          'total_count': 1}},
+  'height(ft)': { '': { 'suggestion': -28.0,
+                        'suggestions': [-28.0, 17.0, 26.0, 13.0, 300.0],
+                        'suggestions_size': 5,
+                        'total_count': 5},
+                  'NN': { 'suggestion': nan,
+                          'suggestions': [nan],
+                          'suggestions_size': 1,
+                          'total_count': 1}},
+  'last date seen': { '': { 'suggestion': '2016/09/10',
+                            'suggestions': [ '2016/09/10',
+                                             '2015/08/10',
+                                             '2014/07/10',
+                                             '2013/06/10',
+                                             '2012/05/10',
+                                             '2011/04/10'],
+                            'suggestions_size': 6,
+                            'total_count': 6}},
+  'last position seen': { '': { 'suggestion': '19.442735,-99.201111',
+                                'suggestions': [ '19.442735,-99.201111',
+                                                 '10.642707,-71.612534',
+                                                 '37.789563,-122.400356',
+                                                 '33.670666,-117.841553'],
+                                'suggestions_size': 4,
+                                'total_count': 4},
+                          'NN': { 'suggestion': None,
+                                  'suggestions': [None],
+                                  'suggestions_size': 1,
+                                  'total_count': 2}},
+  'names': { 'BMBLB ': { 'suggestion': 'bumbl#ebéé  ',
+                         'suggestions': ['bumbl#ebéé  '],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+             'IRNHT': { 'suggestion': 'ironhide&',
+                        'suggestions': ['ironhide&'],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+             'JS': { 'suggestion': 'Jazz',
+                     'suggestions': ['Jazz'],
+                     'suggestions_size': 1,
+                     'total_count': 1},
+             'MKTRN': { 'suggestion': 'Megatron',
+                        'suggestions': ['Megatron'],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+             'MTRPLKS': { 'suggestion': 'Metroplex_)^$',
+                          'suggestions': ['Metroplex_)^$'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+             'OPTMS': { 'suggestion': 'Optimus',
+                        'suggestions': ['Optimus'],
+                        'suggestions_size': 1,
+                        'total_count': 1}},
+  'rank': { '': { 'suggestion': 10,
+                  'suggestions': [10, 7, 8],
+                  'suggestions_size': 3,
+                  'total_count': 6}},
+  'timestamp': { '': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                       'suggestions': [Timestamp('2014-06-24 00:00:00')],
+                       'suggestions_size': 1,
+                       'total_count': 6}},
+  'weight(t)': { '': { 'suggestion': 4.3,
+                       'suggestions': [4.3, 2.0, 4.0, 1.8, 5.7],
+                       'suggestions_size': 5,
+                       'total_count': 5},
+                 'NN': { 'suggestion': nan,
+                         'suggestions': [nan],
+                         'suggestions_size': 1,
+                         'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_all_ngram_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='ngram_fingerprint')
+        expected = { 'Cybertronian': { 'alfalsse': { 'suggestion': False,
+                                  'suggestions': [False],
+                                  'suggestions_size': 1,
+                                  'total_count': 1},
+                    'rutrue': { 'suggestion': True,
+                                'suggestions': [True],
+                                'suggestions_size': 1,
+                                'total_count': 5}},
+  'Date Type': { '010410112041': { 'suggestion': Timestamp('2011-04-10 00:00:00'),
+                                   'suggestions': [ Timestamp('2011-04-10 00:00:00')],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+                 '010510122051': { 'suggestion': Timestamp('2012-05-10 00:00:00'),
+                                   'suggestions': [ Timestamp('2012-05-10 00:00:00')],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+                 '01061320243062': { 'suggestion': Timestamp('2013-06-24 00:00:00'),
+                                     'suggestions': [ Timestamp('2013-06-24 00:00:00')],
+                                     'suggestions_size': 1,
+                                     'total_count': 1},
+                 '01061420244062': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                                     'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+                                     'suggestions_size': 1,
+                                     'total_count': 1},
+                 '01081015205081': { 'suggestion': Timestamp('2015-08-10 00:00:00'),
+                                     'suggestions': [ Timestamp('2015-08-10 00:00:00')],
+                                     'suggestions_size': 1,
+                                     'total_count': 1},
+                 '01091016206091': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
+                                     'suggestions': [ Timestamp('2016-09-10 00:00:00')],
+                                     'suggestions_size': 1,
+                                     'total_count': 1}},
+  'NullType': { 'nenoon': { 'suggestion': None,
+                            'suggestions': [None],
+                            'suggestions_size': 1,
+                            'total_count': 6}},
+  'age': { '0050': { 'suggestion': 5000000,
+                     'suggestions': [5000000],
+                     'suggestions_size': 1,
+                     'total_count': 6}},
+  'date arrival': { '00041019418098': { 'suggestion': '1980/04/10',
+                                        'suggestions': ['1980/04/10'],
+                                        'suggestions_size': 1,
+                                        'total_count': 6}},
+  'function': { 'addeeaerle': { 'suggestion': 'Leader',
+                                'suggestions': ['Leader'],
+                                'suggestions_size': 1,
+                                'total_count': 1},
+                'agesgeionaonpisp': { 'suggestion': 'Espionage',
+                                      'suggestions': ['Espionage'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                'aneneufiieirlinantrssttetlut': { 'suggestion': 'First '
+                                                                'Lieutenant',
+                                                  'suggestions': [ 'First '
+                                                                   'Lieutenant'],
+                                                  'suggestions_size': 1,
+                                                  'total_count': 1},
+                'atbaesioleonsttatitltt': { 'suggestion': 'Battle Station',
+                                            'suggestions': ['Battle Station'],
+                                            'suggestions_size': 1,
+                                            'total_count': 1},
+                'cuecitrisetyur': { 'suggestion': 'Security',
+                                    'suggestions': ['Security'],
+                                    'suggestions_size': 1,
+                                    'total_count': 1},
+                'nenoon': { 'suggestion': None,
+                            'suggestions': [None],
+                            'suggestions_size': 1,
+                            'total_count': 1}},
+  'height(ft)': { '0030': { 'suggestion': 300.0,
+                            'suggestions': [300.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  '1330': { 'suggestion': 13.0,
+                            'suggestions': [13.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  '1770': { 'suggestion': 17.0,
+                            'suggestions': [17.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  '2660': { 'suggestion': 26.0,
+                            'suggestions': [26.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  '2880': { 'suggestion': -28.0,
+                            'suggestions': [-28.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  'anna': { 'suggestion': nan,
+                            'suggestions': [nan],
+                            'suggestions_size': 1,
+                            'total_count': 1}},
+  'last date seen': { '010410112041': { 'suggestion': '2011/04/10',
+                                        'suggestions': ['2011/04/10'],
+                                        'suggestions_size': 1,
+                                        'total_count': 1},
+                      '010510122051': { 'suggestion': '2012/05/10',
+                                        'suggestions': ['2012/05/10'],
+                                        'suggestions_size': 1,
+                                        'total_count': 1},
+                      '01061013203061': { 'suggestion': '2013/06/10',
+                                          'suggestions': ['2013/06/10'],
+                                          'suggestions_size': 1,
+                                          'total_count': 1},
+                      '01071014204071': { 'suggestion': '2014/07/10',
+                                          'suggestions': ['2014/07/10'],
+                                          'suggestions_size': 1,
+                                          'total_count': 1},
+                      '01081015205081': { 'suggestion': '2015/08/10',
+                                          'suggestions': ['2015/08/10'],
+                                          'suggestions_size': 1,
+                                          'total_count': 1},
+                      '01091016206091': { 'suggestion': '2016/09/10',
+                                          'suggestions': ['2016/09/10'],
+                                          'suggestions_size': 1,
+                                          'total_count': 1}},
+  'last position seen': { '000312222431353740566377788995': { 'suggestion': '37.789563,-122.400356',
+                                                              'suggestions': [ '37.789563,-122.400356'],
+                                                              'suggestions_size': 1,
+                                                              'total_count': 1},
+                          '01111920273542445973929499': { 'suggestion': '19.442735,-99.201111',
+                                                          'suggestions': [ '19.442735,-99.201111'],
+                                                          'suggestions_size': 1,
+                                                          'total_count': 1},
+                          '060710121625273442536164707177': { 'suggestion': '10.642707,-71.612534',
+                                                              'suggestions': [ '10.642707,-71.612534'],
+                                                              'suggestions_size': 1,
+                                                              'total_count': 1},
+                          '061115173336415355616667707884': { 'suggestion': '33.670666,-117.841553',
+                                                              'suggestions': [ '33.670666,-117.841553'],
+                                                              'suggestions_size': 1,
+                                                              'total_count': 1},
+                          'nenoon': { 'suggestion': None,
+                                      'suggestions': [None],
+                                      'suggestions_size': 1,
+                                      'total_count': 2}},
+  'names': { 'ateggameonrotr': { 'suggestion': 'Megatron',
+                                 'suggestions': ['Megatron'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+             'azjazz': { 'suggestion': 'Jazz',
+                         'suggestions': ['Jazz'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+             'beblbuebeelembum': { 'suggestion': 'bumbl#ebéé  ',
+                                   'suggestions': ['bumbl#ebéé  '],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+             'dehiidirnhonro': { 'suggestion': 'ironhide&',
+                                 'suggestions': ['ironhide&'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+             'etexlemeopplrotr': { 'suggestion': 'Metroplex_)^$',
+                                   'suggestions': ['Metroplex_)^$'],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+             'immuoppttius': { 'suggestion': 'Optimus',
+                               'suggestions': ['Optimus'],
+                               'suggestions_size': 1,
+                               'total_count': 1}},
+  'rank': { '': { 'suggestion': 7,
+                  'suggestions': [7, 8],
+                  'suggestions_size': 2,
+                  'total_count': 4},
+            '10': { 'suggestion': 10,
+                    'suggestions': [10],
+                    'suggestions_size': 1,
+                    'total_count': 2}},
+  'timestamp': { '01061420244062': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                                     'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+                                     'suggestions_size': 1,
+                                     'total_count': 6}},
+  'weight(t)': { '18': { 'suggestion': 1.8,
+                         'suggestions': [1.8],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '20': { 'suggestion': 2.0,
+                         'suggestions': [2.0],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '40': { 'suggestion': 4.0,
+                         'suggestions': [4.0],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '43': { 'suggestion': 4.3,
+                         'suggestions': [4.3],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 '57': { 'suggestion': 5.7,
+                         'suggestions': [5.7],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                 'anna': { 'suggestion': nan,
+                           'suggestions': [nan],
+                           'suggestions_size': 1,
+                           'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_all_nysiis(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='nysiis')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_all_soundex(self):
+        df = self.df
+        result = df.string_clustering(cols='*', algorithm='soundex')
+        expected = { 'Cybertronian': { 'F420': { 'suggestion': False,
+                              'suggestions': [False],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+                    'T600': { 'suggestion': True,
+                              'suggestions': [True],
+                              'suggestions_size': 1,
+                              'total_count': 5}},
+  'Date Type': { '2000': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
+                           'suggestions': [ Timestamp('2016-09-10 00:00:00'),
+                                            Timestamp('2015-08-10 00:00:00'),
+                                            Timestamp('2014-06-24 00:00:00'),
+                                            Timestamp('2013-06-24 00:00:00'),
+                                            Timestamp('2012-05-10 00:00:00'),
+                                            Timestamp('2011-04-10 00:00:00')],
+                           'suggestions_size': 6,
+                           'total_count': 6}},
+  'NullType': { 'N500': { 'suggestion': None,
+                          'suggestions': [None],
+                          'suggestions_size': 1,
+                          'total_count': 6}},
+  'age': { '5000': { 'suggestion': 5000000,
+                     'suggestions': [5000000],
+                     'suggestions_size': 1,
+                     'total_count': 6}},
+  'date arrival': { '1000': { 'suggestion': '1980/04/10',
+                              'suggestions': ['1980/04/10'],
+                              'suggestions_size': 1,
+                              'total_count': 6}},
+  'function': { 'B342': { 'suggestion': 'Battle Station',
+                          'suggestions': ['Battle Station'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                'E215': { 'suggestion': 'Espionage',
+                          'suggestions': ['Espionage'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                'F623': { 'suggestion': 'First Lieutenant',
+                          'suggestions': ['First Lieutenant'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                'L360': { 'suggestion': 'Leader',
+                          'suggestions': ['Leader'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                'N500': { 'suggestion': None,
+                          'suggestions': [None],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                'S263': { 'suggestion': 'Security',
+                          'suggestions': ['Security'],
+                          'suggestions_size': 1,
+                          'total_count': 1}},
+  'height(ft)': { '-000': { 'suggestion': -28.0,
+                            'suggestions': [-28.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  '1000': { 'suggestion': 17.0,
+                            'suggestions': [17.0, 13.0],
+                            'suggestions_size': 2,
+                            'total_count': 2},
+                  '2000': { 'suggestion': 26.0,
+                            'suggestions': [26.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  '3000': { 'suggestion': 300.0,
+                            'suggestions': [300.0],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  'N500': { 'suggestion': nan,
+                            'suggestions': [nan],
+                            'suggestions_size': 1,
+                            'total_count': 1}},
+  'last date seen': { '2000': { 'suggestion': '2016/09/10',
+                                'suggestions': [ '2016/09/10',
+                                                 '2015/08/10',
+                                                 '2014/07/10',
+                                                 '2013/06/10',
+                                                 '2012/05/10',
+                                                 '2011/04/10'],
+                                'suggestions_size': 6,
+                                'total_count': 6}},
+  'last position seen': { '1000': { 'suggestion': '19.442735,-99.201111',
+                                    'suggestions': [ '19.442735,-99.201111',
+                                                     '10.642707,-71.612534'],
+                                    'suggestions_size': 2,
+                                    'total_count': 2},
+                          '3000': { 'suggestion': '37.789563,-122.400356',
+                                    'suggestions': [ '37.789563,-122.400356',
+                                                     '33.670666,-117.841553'],
+                                    'suggestions_size': 2,
+                                    'total_count': 2},
+                          'N500': { 'suggestion': None,
+                                    'suggestions': [None],
+                                    'suggestions_size': 1,
+                                    'total_count': 2}},
+  'names': { 'B514': { 'suggestion': 'bumbl#ebéé  ',
+                       'suggestions': ['bumbl#ebéé  '],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'I653': { 'suggestion': 'ironhide&',
+                       'suggestions': ['ironhide&'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'J200': { 'suggestion': 'Jazz',
+                       'suggestions': ['Jazz'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'M236': { 'suggestion': 'Megatron',
+                       'suggestions': ['Megatron'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'M361': { 'suggestion': 'Metroplex_)^$',
+                       'suggestions': ['Metroplex_)^$'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'O135': { 'suggestion': 'Optimus',
+                       'suggestions': ['Optimus'],
+                       'suggestions_size': 1,
+                       'total_count': 1}},
+  'rank': { '1000': { 'suggestion': 10,
+                      'suggestions': [10],
+                      'suggestions_size': 1,
+                      'total_count': 2},
+            '7000': { 'suggestion': 7,
+                      'suggestions': [7],
+                      'suggestions_size': 1,
+                      'total_count': 2},
+            '8000': { 'suggestion': 8,
+                      'suggestions': [8],
+                      'suggestions_size': 1,
+                      'total_count': 2}},
+  'timestamp': { '2000': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                           'suggestions': [Timestamp('2014-06-24 00:00:00')],
+                           'suggestions_size': 1,
+                           'total_count': 6}},
+  'weight(t)': { '1000': { 'suggestion': 1.8,
+                           'suggestions': [1.8],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                 '2000': { 'suggestion': 2.0,
+                           'suggestions': [2.0],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                 '4000': { 'suggestion': 4.3,
+                           'suggestions': [4.3, 4.0],
+                           'suggestions_size': 2,
+                           'total_count': 2},
+                 '5000': { 'suggestion': 5.7,
+                           'suggestions': [5.7],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                 'N500': { 'suggestion': nan,
+                           'suggestions': [nan],
+                           'suggestions_size': 1,
+                           'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_double_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='double_metaphone')
+        expected = { 'Cybertronian': { ('FLS', ''): { 'suggestion': False,
+                                   'suggestions': [False],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+                    ('TR', ''): { 'suggestion': True,
+                                  'suggestions': [True],
+                                  'suggestions_size': 1,
+                                  'total_count': 5}},
+  'NullType': { ('NN', ''): { 'suggestion': None,
+                              'suggestions': [None],
+                              'suggestions_size': 1,
+                              'total_count': 6}},
+  'timestamp': { ('', ''): { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                             'suggestions': [Timestamp('2014-06-24 00:00:00')],
+                             'suggestions_size': 1,
+                             'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='fingerprint')
+        expected = { 'Cybertronian': { 'false': { 'suggestion': False,
+                               'suggestions': [False],
+                               'suggestions_size': 1,
+                               'total_count': 1},
+                    'true': { 'suggestion': True,
+                              'suggestions': [True],
+                              'suggestions_size': 1,
+                              'total_count': 5}},
+  'NullType': { 'none': { 'suggestion': None,
+                          'suggestions': [None],
+                          'suggestions_size': 1,
+                          'total_count': 6}},
+  'timestamp': { '20140624': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                               'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+                               'suggestions_size': 1,
+                               'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_levenshtein(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='levenshtein')
+        expected = { 'timestamp': { '20140624': { 'suggestion': '20140624',
+                               'suggestions': ['20140624'],
+                               'suggestions_size': 1,
+                               'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_match_rating_codex(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='match_rating_codex')
+        expected = { 'Cybertronian': { 'FLS': { 'suggestion': False,
+                             'suggestions': [False],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+                    'TR': { 'suggestion': True,
+                            'suggestions': [True],
+                            'suggestions_size': 1,
+                            'total_count': 5}},
+  'NullType': { 'N': { 'suggestion': None,
+                       'suggestions': [None],
+                       'suggestions_size': 1,
+                       'total_count': 6}},
+  'timestamp': { '201-24': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                             'suggestions': [Timestamp('2014-06-24 00:00:00')],
+                             'suggestions_size': 1,
+                             'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='metaphone')
+        expected = { 'Cybertronian': { 'FLS': { 'suggestion': False,
+                             'suggestions': [False],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+                    'TR': { 'suggestion': True,
+                            'suggestions': [True],
+                            'suggestions_size': 1,
+                            'total_count': 5}},
+  'NullType': { 'NN': { 'suggestion': None,
+                        'suggestions': [None],
+                        'suggestions_size': 1,
+                        'total_count': 6}},
+  'timestamp': { '': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                       'suggestions': [Timestamp('2014-06-24 00:00:00')],
+                       'suggestions_size': 1,
+                       'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_ngram_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='ngram_fingerprint')
+        expected = { 'Cybertronian': { 'alfalsse': { 'suggestion': False,
+                                  'suggestions': [False],
+                                  'suggestions_size': 1,
+                                  'total_count': 1},
+                    'rutrue': { 'suggestion': True,
+                                'suggestions': [True],
+                                'suggestions_size': 1,
+                                'total_count': 5}},
+  'NullType': { 'nenoon': { 'suggestion': None,
+                            'suggestions': [None],
+                            'suggestions_size': 1,
+                            'total_count': 6}},
+  'timestamp': { '01061420244062': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                                     'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+                                     'suggestions_size': 1,
+                                     'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_nysiis(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='nysiis')
+        expected = { 'Cybertronian': { 'FALS': { 'suggestion': False,
+                              'suggestions': [False],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+                    'TR': { 'suggestion': True,
+                            'suggestions': [True],
+                            'suggestions_size': 1,
+                            'total_count': 5}},
+  'NullType': { 'NAN': { 'suggestion': None,
+                         'suggestions': [None],
+                         'suggestions_size': 1,
+                         'total_count': 6}},
+  'timestamp': { '2014-06-24': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                                 'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+                                 'suggestions_size': 1,
+                                 'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_multiple_soundex(self):
+        df = self.df
+        result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='soundex')
+        expected = { 'Cybertronian': { 'F420': { 'suggestion': False,
+                              'suggestions': [False],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+                    'T600': { 'suggestion': True,
+                              'suggestions': [True],
+                              'suggestions_size': 1,
+                              'total_count': 5}},
+  'NullType': { 'N500': { 'suggestion': None,
+                          'suggestions': [None],
+                          'suggestions_size': 1,
+                          'total_count': 6}},
+  'timestamp': { '2000': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
+                           'suggestions': [Timestamp('2014-06-24 00:00:00')],
+                           'suggestions_size': 1,
+                           'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_double_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='double_metaphone')
+        expected = { 'rank': { ('', ''): { 'suggestion': 10,
+                        'suggestions': [10, 7, 8],
+                        'suggestions_size': 3,
+                        'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='fingerprint')
+        expected = { 'rank': { '10': { 'suggestion': 10,
+                    'suggestions': [10],
+                    'suggestions_size': 1,
+                    'total_count': 2},
+            '7': { 'suggestion': 7,
+                   'suggestions': [7],
+                   'suggestions_size': 1,
+                   'total_count': 2},
+            '8': { 'suggestion': 8,
+                   'suggestions': [8],
+                   'suggestions_size': 1,
+                   'total_count': 2}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_levenshtein(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='levenshtein')
+        expected = { 'rank': { '10': { 'suggestion': '10',
+                    'suggestions': ['10'],
+                    'suggestions_size': 1,
+                    'total_count': 6},
+            '7': { 'suggestion': '7',
+                   'suggestions': ['7'],
+                   'suggestions_size': 1,
+                   'total_count': 6},
+            '8': { 'suggestion': '8',
+                   'suggestions': ['8'],
+                   'suggestions_size': 1,
+                   'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_match_rating_codex(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='match_rating_codex')
+        expected = { 'rank': { '10': { 'suggestion': 10,
+                    'suggestions': [10],
+                    'suggestions_size': 1,
+                    'total_count': 2},
+            '7': { 'suggestion': 7,
+                   'suggestions': [7],
+                   'suggestions_size': 1,
+                   'total_count': 2},
+            '8': { 'suggestion': 8,
+                   'suggestions': [8],
+                   'suggestions_size': 1,
+                   'total_count': 2}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='metaphone')
+        expected = { 'rank': { '': { 'suggestion': 10,
+                  'suggestions': [10, 7, 8],
+                  'suggestions_size': 3,
+                  'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_ngram_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='ngram_fingerprint')
+        expected = { 'rank': { '': { 'suggestion': 7,
+                  'suggestions': [7, 8],
+                  'suggestions_size': 2,
+                  'total_count': 4},
+            '10': { 'suggestion': 10,
+                    'suggestions': [10],
+                    'suggestions_size': 1,
+                    'total_count': 2}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_nysiis(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='nysiis')
+        expected = { 'rank': { '10': { 'suggestion': 10,
+                    'suggestions': [10],
+                    'suggestions_size': 1,
+                    'total_count': 2},
+            '7': { 'suggestion': 7,
+                   'suggestions': [7],
+                   'suggestions_size': 1,
+                   'total_count': 2},
+            '8': { 'suggestion': 8,
+                   'suggestions': [8],
+                   'suggestions_size': 1,
+                   'total_count': 2}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_numeric_soundex(self):
+        df = self.df
+        result = df.string_clustering(cols=['rank'], algorithm='soundex')
+        expected = { 'rank': { '1000': { 'suggestion': 10,
+                      'suggestions': [10],
+                      'suggestions_size': 1,
+                      'total_count': 2},
+            '7000': { 'suggestion': 7,
+                      'suggestions': [7],
+                      'suggestions_size': 1,
+                      'total_count': 2},
+            '8000': { 'suggestion': 8,
+                      'suggestions': [8],
+                      'suggestions_size': 1,
+                      'total_count': 2}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_string_double_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='double_metaphone')
+        expected = { 'names': { ('APTMS', ''): { 'suggestion': 'Optimus',
+                              'suggestions': ['Optimus'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+             ('ARNT', ''): { 'suggestion': 'ironhide&',
+                             'suggestions': ['ironhide&'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+             ('JS', 'AS'): { 'suggestion': 'Jazz',
+                             'suggestions': ['Jazz'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+             ('MKTRN', ''): { 'suggestion': 'Megatron',
+                              'suggestions': ['Megatron'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+             ('MTRPLKSKSKSKSKS', ''): { 'suggestion': 'Metroplex_)^$',
+                                        'suggestions': ['Metroplex_)^$'],
+                                        'suggestions_size': 1,
+                                        'total_count': 1},
+             ('PMPLLP', ''): { 'suggestion': 'bumbl#ebéé  ',
+                               'suggestions': ['bumbl#ebéé  '],
+                               'suggestions_size': 1,
+                               'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_string_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='fingerprint')
+        expected = { 'names': { 'bumblebee': { 'suggestion': 'bumbl#ebéé  ',
+                            'suggestions': ['bumbl#ebéé  '],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+             'ironhide': { 'suggestion': 'ironhide&',
+                           'suggestions': ['ironhide&'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'jazz': { 'suggestion': 'Jazz',
+                       'suggestions': ['Jazz'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'megatron': { 'suggestion': 'Megatron',
+                           'suggestions': ['Megatron'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'metroplex': { 'suggestion': 'Metroplex_)^$',
+                            'suggestions': ['Metroplex_)^$'],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+             'optimus': { 'suggestion': 'Optimus',
+                          'suggestions': ['Optimus'],
+                          'suggestions_size': 1,
+                          'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_string_levenshtein(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='levenshtein')
+        expected = { 'names': { 'bumblebee': { 'suggestion': 'bumblebee',
+                            'suggestions': ['bumblebee', 'ironhide'],
+                            'suggestions_size': 2,
+                            'total_count': 6},
+             'ironhide': { 'suggestion': 'ironhide',
+                           'suggestions': ['ironhide', 'optimus'],
+                           'suggestions_size': 2,
+                           'total_count': 6},
+             'jazz': { 'suggestion': 'jazz',
+                       'suggestions': ['jazz', 'optimus'],
+                       'suggestions_size': 2,
+                       'total_count': 6},
+             'megatron': { 'suggestion': 'megatron',
+                           'suggestions': ['megatron', 'metroplex'],
+                           'suggestions_size': 2,
+                           'total_count': 6},
+             'metroplex': { 'suggestion': 'metroplex',
+                            'suggestions': ['metroplex', 'megatron'],
+                            'suggestions_size': 2,
+                            'total_count': 6},
+             'optimus': { 'suggestion': 'optimus',
+                          'suggestions': ['optimus', 'ironhide'],
+                          'suggestions_size': 2,
+                          'total_count': 6}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_string_match_rating_codex(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='match_rating_codex')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_string_metaphone(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='metaphone')
+        expected = { 'names': { 'BMBLB ': { 'suggestion': 'bumbl#ebéé  ',
+                         'suggestions': ['bumbl#ebéé  '],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+             'IRNHT': { 'suggestion': 'ironhide&',
+                        'suggestions': ['ironhide&'],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+             'JS': { 'suggestion': 'Jazz',
+                     'suggestions': ['Jazz'],
+                     'suggestions_size': 1,
+                     'total_count': 1},
+             'MKTRN': { 'suggestion': 'Megatron',
+                        'suggestions': ['Megatron'],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+             'MTRPLKS': { 'suggestion': 'Metroplex_)^$',
+                          'suggestions': ['Metroplex_)^$'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+             'OPTMS': { 'suggestion': 'Optimus',
+                        'suggestions': ['Optimus'],
+                        'suggestions_size': 1,
+                        'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_string_ngram_fingerprint(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='ngram_fingerprint')
+        expected = { 'names': { 'ateggameonrotr': { 'suggestion': 'Megatron',
+                                 'suggestions': ['Megatron'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+             'azjazz': { 'suggestion': 'Jazz',
+                         'suggestions': ['Jazz'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+             'beblbuebeelembum': { 'suggestion': 'bumbl#ebéé  ',
+                                   'suggestions': ['bumbl#ebéé  '],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+             'dehiidirnhonro': { 'suggestion': 'ironhide&',
+                                 'suggestions': ['ironhide&'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+             'etexlemeopplrotr': { 'suggestion': 'Metroplex_)^$',
+                                   'suggestions': ['Metroplex_)^$'],
+                                   'suggestions_size': 1,
+                                   'total_count': 1},
+             'immuoppttius': { 'suggestion': 'Optimus',
+                               'suggestions': ['Optimus'],
+                               'suggestions_size': 1,
+                               'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+    def test_string_clustering_string_nysiis(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='nysiis')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_string_soundex(self):
+        df = self.df
+        result = df.string_clustering(cols=['names'], algorithm='soundex')
+        expected = { 'names': { 'B514': { 'suggestion': 'bumbl#ebéé  ',
+                       'suggestions': ['bumbl#ebéé  '],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'I653': { 'suggestion': 'ironhide&',
+                       'suggestions': ['ironhide&'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'J200': { 'suggestion': 'Jazz',
+                       'suggestions': ['Jazz'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'M236': { 'suggestion': 'Megatron',
+                       'suggestions': ['Megatron'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'M361': { 'suggestion': 'Metroplex_)^$',
+                       'suggestions': ['Metroplex_)^$'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+             'O135': { 'suggestion': 'Optimus',
+                       'suggestions': ['Optimus'],
+                       'suggestions_size': 1,
+                       'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
+
+class TestStringclusteringDask(TestStringclusteringPandas):
+    config = {'engine': 'dask', 'n_partitions': 1}
+
+
+class TestStringclusteringPartitionDask(TestStringclusteringPandas):
+    config = {'engine': 'dask', 'n_partitions': 2}
+
+
+try:
+    import cudf # pyright: reportMissingImports=false
+except:
+    pass
+else:
+    class TestStringclusteringCUDF(TestStringclusteringPandas):
+        config = {'engine': 'cudf'}
+
+
+try:
+    import dask_cudf # pyright: reportMissingImports=false
+except:
+    pass
+else:
+    class TestStringclusteringDC(TestStringclusteringPandas):
+        config = {'engine': 'dask_cudf', 'n_partitions': 1}
+
+
+try:
+    import dask_cudf # pyright: reportMissingImports=false
+except:
+    pass
+else:
+    class TestStringclusteringPartitionDC(TestStringclusteringPandas):
+        config = {'engine': 'dask_cudf', 'n_partitions': 2}
+
+
+try:
+    import pyspark # pyright: reportMissingImports=false
+except:
+    pass
+else:
+    class TestStringclusteringSpark(TestStringclusteringPandas):
+        config = {'engine': 'spark'}
+
+
+try:
+    import vaex # pyright: reportMissingImports=false
+except:
+    pass
+else:
+    class TestStringclusteringVaex(TestStringclusteringPandas):
+        config = {'engine': 'vaex'}

From 61fc3797ba6e5e39ce8d77b6144d3743b4fd01e4 Mon Sep 17 00:00:00 2001
From: Jose Angel Hernao <joseangelhernao@gmail.com>
Date: Mon, 6 Dec 2021 16:48:59 -0600
Subject: [PATCH 2/2] Add string clustering tests

---
 tests/creators/creator_stringclustering.py |   52 +-
 tests/test_created__stringclustering.py    | 1843 ++++++++++----------
 2 files changed, 939 insertions(+), 956 deletions(-)

diff --git a/tests/creators/creator_stringclustering.py b/tests/creators/creator_stringclustering.py
index 8a63d1f66..45430904b 100644
--- a/tests/creators/creator_stringclustering.py
+++ b/tests/creators/creator_stringclustering.py
@@ -19,10 +19,10 @@ def create():
         ('Date Type'): [datetime.datetime(2016, 9, 10), datetime.datetime(2015, 8, 10), datetime.datetime(2014, 6, 24), datetime.datetime(2013, 6, 24), datetime.datetime(2012, 5, 10), datetime.datetime(2011, 4, 10)],
         ('age', 'int'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000],
         ('function', 'string'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'],
-        ('names', 'str'): ['Optimus', 'bumbl#ebéé  ', 'ironhide&', 'Jazz', 'Megatron', 'Metroplex_)^$'],
+        ('names', 'str'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'],
         ('timestamp', 'time'): [datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0), datetime.datetime(2014, 6, 24, 0, 0)],
         ('weight(t)', 'float'): [4.3, 2.0, 4.0, 1.8, 5.7, None]
-    })
+        })
 
     t = TestCreator(op, df, name="stringclustering", configs=default_configs)
 
@@ -62,6 +62,54 @@ def create():
     t.create(method="string_clustering", variant="multiple_soundex", cols=["NullType","Cybertronian","timestamp"], algorithm="soundex")
     t.create(method="string_clustering", variant="multiple_levenshtein", cols=["NullType","Cybertronian","timestamp"], algorithm="levenshtein")
 
+    t.create(method="cols.fingerprint", variant="all", cols="*")
+    t.create(method="cols.fingerprint", variant="string", cols=["names"])
+    t.create(method="cols.fingerprint", variant="numeric", cols=["rank"], output_cols=["rk"])
+    t.create(method="cols.fingerprint", variant="multiple", cols=["NullType","Cybertronian","timestamp"], output_cols=["nt","ct","ts"])
+
+    t.create(method="cols.pos", variant="single", cols=["names"])
+    t.create(method="cols.pos", variant="multiple", cols=["date arrival","japanese name","last date seen"], output_cols=["da","jn","lds"])
+
+    t.create(method="cols.ngrams", variant="single", cols=["names"])
+    t.create(method="cols.ngrams", variant="multiple", cols=["date arrival","japanese name","last date seen"], n_size=1, output_cols=["da","jn","lds"])
+
+    t.create(method="cols.ngram_fingerprint", variant="all", cols="*")
+    t.create(method="cols.ngram_fingerprint", variant="string", cols=["function(binary)"], n_size=25)
+    t.create(method="cols.ngram_fingerprint", variant="numeric", cols=["rank"], output_cols=["rk"])
+    t.create(method="cols.ngram_fingerprint", variant="multiple", cols=["NullType","Cybertronian","timestamp"], n_size=4, output_cols=["nt","ct","ts"])
+
+    t.create(method="cols.metaphone", variant="all", cols="*")
+    t.create(method="cols.metaphone", variant="string", cols=["names"])
+    t.create(method="cols.metaphone", variant="numeric", cols=["rank"], output_cols=["rk"])
+    t.create(method="cols.metaphone", variant="multiple", cols=["NullType","Cybertronian","timestamp"], output_cols=["nt","ct","ts"])
+
+    t.create(method="cols.nysiis", variant="all", cols="*")
+    t.create(method="cols.nysiis", variant="string", cols=["names"])
+    t.create(method="cols.nysiis", variant="numeric", cols=["rank"], output_cols=["rk"])
+    t.create(method="cols.nysiis", variant="multiple", cols=["NullType","Cybertronian","timestamp"], output_cols=["nt","ct","ts"])
+
+    t.create(method="cols.match_rating_codex", variant="all", cols="*")
+    t.create(method="cols.match_rating_codex", variant="string", cols=["names"])
+    t.create(method="cols.match_rating_codex", variant="numeric", cols=["rank"], output_cols=["rk"])
+    t.create(method="cols.match_rating_codex", variant="multiple", cols=["NullType","Cybertronian","timestamp"], output_cols=["nt","ct","ts"])
+
+    t.create(method="cols.double_metaphone", variant="all", cols="*")
+    t.create(method="cols.double_metaphone", variant="string", cols=["names"])
+    t.create(method="cols.double_metaphone", variant="numeric", cols=["rank"], output_cols=["rk"])
+    t.create(method="cols.double_metaphone", variant="multiple", cols=["NullType","Cybertronian","timestamp"], output_cols=["nt","ct","ts"])
+
+    t.create(method="cols.soundex", variant="all", cols="*")
+    t.create(method="cols.soundex", variant="string", cols=["names"])
+    t.create(method="cols.soundex", variant="numeric", cols=["rank"], output_cols=["rk"])
+    t.create(method="cols.soundex", variant="multiple", cols=["NullType","Cybertronian","timestamp"], output_cols=["nt","ct","ts"])
+
+    t.create(method="cols.levenshtein", variant="all_value", cols="*", value=["1a#-s","ERR","d2e","0","[]","''","","1","lu","2016","5000000","aeiou","abc#&^","2014-06-23","nan."])
+    t.create(method="cols.levenshtein", variant="all_col", cols="*", other_cols=['date arrival','weight(t)','age','height(ft)','japanese name','rank','last date seen','names','last position seen','Cybertronian','NullType','Date Type','function(binary)','function','timestamp'])
+    t.create(method="cols.levenshtein", variant="single_value", cols=["names"], value="prime", output_cols="nms")
+    t.create(method="cols.levenshtein", variant="single_col", cols=["rank"], other_cols=["weight(t)"])
+    t.create(method="cols.levenshtein", variant="multiple_value", cols=["last position seen","age","japanese name"], value=["10005","000","['Bumble']"])
+    t.create(method="cols.levenshtein", variant="multiple_col", cols=["NullType","Cybertronian","timestamp"], other_cols=["height(ft)","function","Date Type"], output_cols=["nt-ht","ct-ft","ts-dt"])
+
     t.run()
 
 create()
\ No newline at end of file
diff --git a/tests/test_created__stringclustering.py b/tests/test_created__stringclustering.py
index 4542e89a6..f48a5093f 100644
--- a/tests/test_created__stringclustering.py
+++ b/tests/test_created__stringclustering.py
@@ -1,4 +1,5 @@
 import datetime
+import numpy as np
 from optimus.tests.base import TestBase
 from optimus.helpers.json import json_encoding
 from optimus.helpers.functions import deep_sort, df_dicts_equal, results_equal
@@ -8,408 +9,480 @@ def Timestamp(t):
     return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
 
 
+NaT = np.datetime64('NaT')
 nan = float("nan")
 inf = float("inf")
 
 
 class TestStringclusteringPandas(TestBase):
     config = {'engine': 'pandas'}
-    dict = {('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'ironhide&', 'Jazz', 'Megatron', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}
+    dict = {('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}
     maxDiff = None
 
+    def test_cols_double_metaphone_all(self):
+        df = self.df.copy()
+        result = df.cols.double_metaphone(cols='*')
+        expected = self.create_dataframe(data={('NullType', 'object'): [('NN', ''), ('NN', ''), ('NN', ''), ('NN', ''), ('NN', ''), ('NN', '')], ('date arrival', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('height(ft)', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('NN', ''), ('', '')], ('last date seen', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('last position seen', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('NN', ''), ('NN', '')], ('rank', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('Cybertronian', 'object'): [('TR', ''), ('TR', ''), ('TR', ''), ('TR', ''), ('TR', ''), ('FLS', '')], ('Date Type', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('age', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('function', 'object'): [('LTR', ''), ('ASPNJ', 'ASPNK'), ('SKRT', ''), ('FRSTLTNNT', ''), ('NN', ''), ('PTLSTXN', '')], ('names', 'object'): [('APTMS', ''), ('PMPLLP', ''), ('MTRPLKS', ''), ('PMPLP', ''), ('MTRPPLKS', ''), ('MTRPLKSKSKSKSKS', '')], ('timestamp', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('weight(t)', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('NN', '')]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_double_metaphone_multiple(self):
+        df = self.df.copy()
+        result = df.cols.double_metaphone(cols=['NullType', 'Cybertronian', 'timestamp'], output_cols=['nt', 'ct', 'ts'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('nt', 'object'): [('NN', ''), ('NN', ''), ('NN', ''), ('NN', ''), ('NN', ''), ('NN', '')], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('ct', 'object'): [('TR', ''), ('TR', ''), ('TR', ''), ('TR', ''), ('TR', ''), ('FLS', '')], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('ts', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_double_metaphone_numeric(self):
+        df = self.df.copy()
+        result = df.cols.double_metaphone(cols=['rank'], output_cols=['rk'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('rk', 'object'): [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_double_metaphone_string(self):
+        df = self.df.copy()
+        result = df.cols.double_metaphone(cols=['names'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): [('APTMS', ''), ('PMPLLP', ''), ('MTRPLKS', ''), ('PMPLP', ''), ('MTRPPLKS', ''), ('MTRPLKSKSKSKSKS', '')], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_fingerprint_all(self):
+        df = self.df.copy()
+        result = df.cols.fingerprint(cols='*')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_fingerprint_multiple(self):
+        df = self.df.copy()
+        result = df.cols.fingerprint(cols=['NullType', 'Cybertronian', 'timestamp'], output_cols=['nt', 'ct', 'ts'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_fingerprint_numeric(self):
+        df = self.df.copy()
+        result = df.cols.fingerprint(cols=['rank'], output_cols=['rk'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_fingerprint_string(self):
+        df = self.df.copy()
+        result = df.cols.fingerprint(cols=['names'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_levenshtein_all_col(self):
+        df = self.df.copy()
+        result = df.cols.levenshtein(cols='*', other_cols=['date arrival', 'weight(t)', 'age', 'height(ft)', 'japanese name', 'rank', 'last date seen', 'names', 'last position seen', 'Cybertronian', 'NullType', 'Date Type', 'function(binary)', 'function', 'timestamp'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_levenshtein_all_value(self):
+        df = self.df.copy()
+        result = df.cols.levenshtein(cols='*', value=['1a#-s', 'ERR', 'd2e', '0', '[]', "''", '', '1', 'lu', '2016', '5000000', 'aeiou', 'abc#&^', '2014-06-23', 'nan.'])
+        expected = self.create_dataframe(data={('NullType', 'int64'): [5, 5, 5, 5, 5, 5], ('date arrival', 'int64'): [10, 10, 10, 10, 10, 10], ('height(ft)', 'int64'): [4, 4, 4, 4, 3, 5], ('last date seen', 'int64'): [9, 9, 9, 9, 9, 9], ('last position seen', 'int64'): [20, 20, 21, 21, 4, 4], ('rank', 'int64'): [2, 2, 2, 2, 2, 2], ('Cybertronian', 'int64'): [4, 4, 4, 4, 4, 5], ('Date Type', 'int64'): [9, 9, 9, 9, 9, 9], ('age', 'int64'): [7, 7, 7, 7, 7, 7], ('function', 'int64'): [6, 9, 8, 16, 4, 14], ('names', 'int64'): [7, 12, 9, 9, 11, 13], ('timestamp', 'int64'): [10, 10, 10, 10, 10, 10], ('weight(t)', 'int64'): [6, 6, 6, 6, 6, 6]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_levenshtein_multiple_col(self):
+        df = self.df.copy()
+        result = df.cols.levenshtein(cols=['NullType', 'Cybertronian', 'timestamp'], other_cols=['height(ft)', 'function', 'Date Type'], output_cols=['nt-ht', 'ct-ft', 'ts-dt'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ("NullType_['nt-ht', 'ct-ft', 'ts-dt']", 'int64'): [5, 4, 4, 4, 3, 5], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ("Cybertronian_['nt-ht', 'ct-ft', 'ts-dt']", 'int64'): [5, 8, 7, 13, 3, 12], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ("timestamp_['nt-ht', 'ct-ft', 'ts-dt']", 'int64'): [4, 4, 0, 1, 4, 4], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_levenshtein_multiple_value(self):
+        df = self.df.copy()
+        result = df.cols.levenshtein(cols=['last position seen', 'age', 'japanese name'], value=['10005', '000', "['Bumble']"])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_levenshtein_single_col(self):
+        df = self.df.copy()
+        result = df.cols.levenshtein(cols=['rank'], other_cols=['weight(t)'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [3, 3, 3, 2, 3, 3], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_levenshtein_single_value(self):
+        df = self.df.copy()
+        result = df.cols.levenshtein(cols=['names'], value='prime', output_cols='nms')
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('nms', 'int64'): [4, 11, 7, 8, 9, 11], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_match_rating_codex_all(self):
+        df = self.df.copy()
+        result = df.cols.match_rating_codex(cols='*')
+        expected = self.create_dataframe(data={('NullType', 'object'): ['N', 'N', 'N', 'N', 'N', 'N'], ('date arrival', 'object'): ['198/10', '198/10', '198/10', '198/10', '198/10', '198/10'], ('height(ft)', 'object'): ['-28.0', '17.0', '26.0', '13.0', 'N', '30.0'], ('last date seen', 'object'): ['201/10', '201/10', '201/10', '201/10', '201/10', '201/10'], ('last position seen', 'object'): ['19.201', '10.534', '37.356', '3.6153', 'N', 'N'], ('rank', 'object'): ['10', '7', '7', '8', '10', '8'], ('Cybertronian', 'object'): ['TR', 'TR', 'TR', 'TR', 'TR', 'FLS'], ('Date Type', 'object'): ['201-10', '201-10', '201-24', '201-24', '201-10', '201-10'], ('age', 'object'): ['50', '50', '50', '50', '50', '50'], ('function', 'object'): ['LDR', 'ESPNG', 'SCRTY', 'FRSTNT', 'N', 'BTLSTN'], ('names', 'object'): ['OPTMS', 'BMB#BÉ', 'MTRPLX', 'BMBLB', 'MÉTL-X', 'MTR)^$'], ('timestamp', 'object'): ['201-24', '201-24', '201-24', '201-24', '201-24', '201-24'], ('weight(t)', 'object'): ['4.3', '2.0', '4.0', '1.8', '5.7', 'N']}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_match_rating_codex_multiple(self):
+        df = self.df.copy()
+        result = df.cols.match_rating_codex(cols=['NullType', 'Cybertronian', 'timestamp'], output_cols=['nt', 'ct', 'ts'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('nt', 'object'): ['N', 'N', 'N', 'N', 'N', 'N'], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('ct', 'object'): ['TR', 'TR', 'TR', 'TR', 'TR', 'FLS'], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('ts', 'object'): ['201-24', '201-24', '201-24', '201-24', '201-24', '201-24'], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_match_rating_codex_numeric(self):
+        df = self.df.copy()
+        result = df.cols.match_rating_codex(cols=['rank'], output_cols=['rk'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('rk', 'object'): ['10', '7', '7', '8', '10', '8'], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_match_rating_codex_string(self):
+        df = self.df.copy()
+        result = df.cols.match_rating_codex(cols=['names'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['OPTMS', 'BMB#BÉ', 'MTRPLX', 'BMBLB', 'MÉTL-X', 'MTR)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_metaphone_all(self):
+        df = self.df.copy()
+        result = df.cols.metaphone(cols='*')
+        expected = self.create_dataframe(data={('NullType', 'object'): ['NN', 'NN', 'NN', 'NN', 'NN', 'NN'], ('date arrival', 'object'): ['', '', '', '', '', ''], ('height(ft)', 'object'): ['', '', '', '', 'NN', ''], ('last date seen', 'object'): ['', '', '', '', '', ''], ('last position seen', 'object'): ['', '', '', '', 'NN', 'NN'], ('rank', 'object'): ['', '', '', '', '', ''], ('Cybertronian', 'object'): ['TR', 'TR', 'TR', 'TR', 'TR', 'FLS'], ('Date Type', 'object'): ['', '', '', '', '', ''], ('age', 'object'): ['', '', '', '', '', ''], ('function', 'object'): ['LTR', 'ESPNJ', 'SKRT', 'FRST LTNNT', 'NN', 'BTL STXN'], ('names', 'object'): ['OPTMS', 'BMBLB ', 'MTRPLKS', 'BMBLB', 'MTRP LKS', 'MTRPLKS'], ('timestamp', 'object'): ['', '', '', '', '', ''], ('weight(t)', 'object'): ['', '', '', '', '', 'NN']}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_metaphone_multiple(self):
+        df = self.df.copy()
+        result = df.cols.metaphone(cols=['NullType', 'Cybertronian', 'timestamp'], output_cols=['nt', 'ct', 'ts'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('nt', 'object'): ['NN', 'NN', 'NN', 'NN', 'NN', 'NN'], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('ct', 'object'): ['TR', 'TR', 'TR', 'TR', 'TR', 'FLS'], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('ts', 'object'): ['', '', '', '', '', ''], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_metaphone_numeric(self):
+        df = self.df.copy()
+        result = df.cols.metaphone(cols=['rank'], output_cols=['rk'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('rk', 'object'): ['', '', '', '', '', ''], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_metaphone_string(self):
+        df = self.df.copy()
+        result = df.cols.metaphone(cols=['names'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['OPTMS', 'BMBLB ', 'MTRPLKS', 'BMBLB', 'MTRP LKS', 'MTRPLKS'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_ngram_fingerprint_all(self):
+        df = self.df.copy()
+        result = df.cols.ngram_fingerprint(cols='*')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_ngram_fingerprint_multiple(self):
+        df = self.df.copy()
+        result = df.cols.ngram_fingerprint(cols=['NullType', 'Cybertronian', 'timestamp'], n_size=4, output_cols=['nt', 'ct', 'ts'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_ngram_fingerprint_numeric(self):
+        df = self.df.copy()
+        result = df.cols.ngram_fingerprint(cols=['rank'], output_cols=['rk'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_ngram_fingerprint_string(self):
+        df = self.df.copy()
+        result = df.cols.ngram_fingerprint(cols=['function(binary)'], n_size=25)
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_ngrams_multiple(self):
+        df = self.df.copy()
+        result = df.cols.ngrams(cols=['date arrival', 'japanese name', 'last date seen'], n_size=1, output_cols=['da', 'jn', 'lds'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_ngrams_single(self):
+        df = self.df.copy()
+        result = df.cols.ngrams(cols=['names'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): [['Op', 'pt', 'ti', 'im', 'mu', 'us'], ['bu', 'um', 'mb', 'bl', 'l#', '#e', 'eb', 'bé', 'éé', 'é ', '  '], ['Me', 'et', 'tr', 'ro', 'op', 'pl', 'le', 'ex'], ['bu', 'um', 'mb', 'bl', 'le', 'eb', 'be', 'ee'], ['mé', 'ét', 'tr', 'ro', 'op', 'p´', '´l', 'le', 'e-', '-x'], ['Me', 'et', 'tr', 'ro', 'op', 'pl', 'le', 'ex', 'x_', '_)', ')^', '^$']], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_nysiis_all(self):
+        df = self.df.copy()
+        result = df.cols.nysiis(cols='*')
+        expected = self.create_dataframe(data={('NullType', 'object'): ['NAN', 'NAN', 'NAN', 'NAN', 'NAN', 'NAN'], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'object'): ['-28.0', '17.0', '26.0', '13.0', 'NAN', '30.0'], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '201/04/10'], ('last position seen', 'object'): ['19.42735,-9.201', '10.642707,-71.612534', '37.789563,-12.40356', '3.6706,-17.84153', 'NAN', 'NAN'], ('rank', 'object'): ['10', '7', '7', '8', '10', '8'], ('Cybertronian', 'object'): ['TR', 'TR', 'TR', 'TR', 'TR', 'FALS'], ('Date Type', 'object'): ['2016-09-10', '2015-08-10', '2014-06-24', '2013-06-24', '2012-05-10', '201-04-10'], ('age', 'object'): ['50', '50', '50', '50', '50', '50'], ('function', 'object'): ['LADAR', 'ESPANAG', 'SACARATY', 'FARST', 'NAN', 'BATL'], ('names', 'object'): ['OPTAN', 'BANBL#ABÉ', 'MATRAPLAX', 'BANBLABY', 'MÉTRAP´LA-X', 'MATRAPLAX_)^$'], ('timestamp', 'object'): ['2014-06-24', '2014-06-24', '2014-06-24', '2014-06-24', '2014-06-24', '2014-06-24'], ('weight(t)', 'object'): ['4.3', '2.0', '4.0', '1.8', '5.7', 'NAN']}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_nysiis_multiple(self):
+        df = self.df.copy()
+        result = df.cols.nysiis(cols=['NullType', 'Cybertronian', 'timestamp'], output_cols=['nt', 'ct', 'ts'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('nt', 'object'): ['NAN', 'NAN', 'NAN', 'NAN', 'NAN', 'NAN'], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('ct', 'object'): ['TR', 'TR', 'TR', 'TR', 'TR', 'FALS'], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('ts', 'object'): ['2014-06-24', '2014-06-24', '2014-06-24', '2014-06-24', '2014-06-24', '2014-06-24'], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_nysiis_numeric(self):
+        df = self.df.copy()
+        result = df.cols.nysiis(cols=['rank'], output_cols=['rk'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('rk', 'object'): ['10', '7', '7', '8', '10', '8'], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_nysiis_string(self):
+        df = self.df.copy()
+        result = df.cols.nysiis(cols=['names'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['OPTAN', 'BANBL#ABÉ', 'MATRAPLAX', 'BANBLABY', 'MÉTRAP´LA-X', 'MATRAPLAX_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_pos_multiple(self):
+        df = self.df.copy()
+        result = df.cols.pos(cols=['date arrival', 'japanese name', 'last date seen'], output_cols=['da', 'jn', 'lds'])
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_pos_single(self):
+        df = self.df.copy()
+        result = df.cols.pos(cols=['names'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): [[('Optimus', 'NN')], [('bumbl#ebéé', 'NN')], [('Metroplex', 'NNP')], [('bumblebee', 'NN')], [('métrop´le-x', 'NN')], [('Metroplex_)^$', 'NN')]], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_soundex_all(self):
+        df = self.df.copy()
+        result = df.cols.soundex(cols='*')
+        expected = self.create_dataframe(data={('NullType', 'object'): ['N500', 'N500', 'N500', 'N500', 'N500', 'N500'], ('date arrival', 'object'): ['1000', '1000', '1000', '1000', '1000', '1000'], ('height(ft)', 'object'): ['-000', '1000', '2000', '1000', 'N500', '3000'], ('last date seen', 'object'): ['2000', '2000', '2000', '2000', '2000', '2000'], ('last position seen', 'object'): ['1000', '1000', '3000', '3000', 'N500', 'N500'], ('rank', 'object'): ['1000', '7000', '7000', '8000', '1000', '8000'], ('Cybertronian', 'object'): ['T600', 'T600', 'T600', 'T600', 'T600', 'F420'], ('Date Type', 'object'): ['2000', '2000', '2000', '2000', '2000', '2000'], ('age', 'object'): ['5000', '5000', '5000', '5000', '5000', '5000'], ('function', 'object'): ['L360', 'E215', 'S263', 'F623', 'N500', 'B342'], ('names', 'object'): ['O135', 'B514', 'M361', 'B514', 'M361', 'M361'], ('timestamp', 'object'): ['2000', '2000', '2000', '2000', '2000', '2000'], ('weight(t)', 'object'): ['4000', '2000', '4000', '1000', '5000', 'N500']}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_soundex_multiple(self):
+        df = self.df.copy()
+        result = df.cols.soundex(cols=['NullType', 'Cybertronian', 'timestamp'], output_cols=['nt', 'ct', 'ts'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('nt', 'object'): ['N500', 'N500', 'N500', 'N500', 'N500', 'N500'], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('ct', 'object'): ['T600', 'T600', 'T600', 'T600', 'T600', 'F420'], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('ts', 'object'): ['2000', '2000', '2000', '2000', '2000', '2000'], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_soundex_numeric(self):
+        df = self.df.copy()
+        result = df.cols.soundex(cols=['rank'], output_cols=['rk'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('rk', 'object'): ['1000', '7000', '7000', '8000', '1000', '8000'], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['Optimus', 'bumbl#ebéé  ', 'Metroplex', 'bumblebee', 'métrop´le-x', 'Metroplex_)^$'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_cols_soundex_string(self):
+        df = self.df.copy()
+        result = df.cols.soundex(cols=['names'])
+        expected = self.create_dataframe(data={('NullType', 'object'): [None, None, None, None, None, None], ('date arrival', 'object'): ['1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10', '1980/04/10'], ('height(ft)', 'float64'): [-28.0, 17.0, 26.0, 13.0, nan, 300.0], ('last date seen', 'object'): ['2016/09/10', '2015/08/10', '2014/07/10', '2013/06/10', '2012/05/10', '2011/04/10'], ('last position seen', 'object'): ['19.442735,-99.201111', '10.642707,-71.612534', '37.789563,-122.400356', '33.670666,-117.841553', None, None], ('rank', 'int64'): [10, 7, 7, 8, 10, 8], ('Cybertronian', 'bool'): [True, True, True, True, True, False], ('Date Type', 'datetime64[ns]'): [Timestamp('2016-09-10 00:00:00'), Timestamp('2015-08-10 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2013-06-24 00:00:00'), Timestamp('2012-05-10 00:00:00'), Timestamp('2011-04-10 00:00:00')], ('age', 'int64'): [5000000, 5000000, 5000000, 5000000, 5000000, 5000000], ('function', 'object'): ['Leader', 'Espionage', 'Security', 'First Lieutenant', None, 'Battle Station'], ('names', 'object'): ['O135', 'B514', 'M361', 'B514', 'M361', 'M361'], ('timestamp', 'datetime64[ns]'): [Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00'), Timestamp('2014-06-24 00:00:00')], ('weight(t)', 'float64'): [4.3, 2.0, 4.0, 1.8, 5.7, nan]}, force_data_types=True)
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
     def test_string_clustering_all_double_metaphone(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols='*', algorithm='double_metaphone')
-        expected = { 'Cybertronian': { ('FLS', ''): { 'suggestion': False,
-                                   'suggestions': [False],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-                    ('TR', ''): { 'suggestion': True,
-                                  'suggestions': [True],
-                                  'suggestions_size': 1,
-                                  'total_count': 5}},
-  'Date Type': { ('', ''): { 'suggestion': Timestamp('2016-09-10 00:00:00'),
-                             'suggestions': [ Timestamp('2016-09-10 00:00:00'),
-                                              Timestamp('2015-08-10 00:00:00'),
-                                              Timestamp('2014-06-24 00:00:00'),
-                                              Timestamp('2013-06-24 00:00:00'),
-                                              Timestamp('2012-05-10 00:00:00'),
-                                              Timestamp('2011-04-10 00:00:00')],
-                             'suggestions_size': 6,
-                             'total_count': 6}},
-  'NullType': { ('NN', ''): { 'suggestion': None,
-                              'suggestions': [None],
-                              'suggestions_size': 1,
-                              'total_count': 6}},
-  'age': { ('', ''): { 'suggestion': 5000000,
-                       'suggestions': [5000000],
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_all_fingerprint(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols='*', algorithm='fingerprint')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_all_levenshtein(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols='*', algorithm='levenshtein')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_all_match_rating_codex(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols='*', algorithm='match_rating_codex')
+        expected = { 'Cybertronian': { 'FLS': { 'suggestion': 'False',
+                             'suggestions': ['False'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+                    'TR': { 'suggestion': 'True',
+                            'suggestions': ['True'],
+                            'suggestions_size': 1,
+                            'total_count': 5}},
+  'Date Type': { '201-10': { 'suggestion': '2016-09-10',
+                             'suggestions': [ '2016-09-10',
+                                              '2015-08-10',
+                                              '2012-05-10',
+                                              '2011-04-10'],
+                             'suggestions_size': 4,
+                             'total_count': 4},
+                 '201-24': { 'suggestion': '2014-06-24',
+                             'suggestions': ['2014-06-24', '2013-06-24'],
+                             'suggestions_size': 2,
+                             'total_count': 2}},
+  'NullType': { 'N': { 'suggestion': 'None',
+                       'suggestions': ['None'],
                        'suggestions_size': 1,
                        'total_count': 6}},
-  'date arrival': { ('', ''): { 'suggestion': '1980/04/10',
+  'age': { '50': { 'suggestion': '5000000',
+                   'suggestions': ['5000000'],
+                   'suggestions_size': 1,
+                   'total_count': 6}},
+  'date arrival': { '198/10': { 'suggestion': '1980/04/10',
                                 'suggestions': ['1980/04/10'],
                                 'suggestions_size': 1,
                                 'total_count': 6}},
-  'function': { ('ASPNJ', 'ASPNK'): { 'suggestion': 'Espionage',
-                                      'suggestions': ['Espionage'],
-                                      'suggestions_size': 1,
-                                      'total_count': 1},
-                ('FRSTLTNNT', ''): { 'suggestion': 'First Lieutenant',
-                                     'suggestions': ['First Lieutenant'],
-                                     'suggestions_size': 1,
-                                     'total_count': 1},
-                ('LTR', ''): { 'suggestion': 'Leader',
-                               'suggestions': ['Leader'],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                ('NN', ''): { 'suggestion': None,
-                              'suggestions': [None],
-                              'suggestions_size': 1,
-                              'total_count': 1},
-                ('PTLSTXN', ''): { 'suggestion': 'Battle Station',
-                                   'suggestions': ['Battle Station'],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-                ('SKRT', ''): { 'suggestion': 'Security',
-                                'suggestions': ['Security'],
-                                'suggestions_size': 1,
-                                'total_count': 1}},
-  'height(ft)': { ('', ''): { 'suggestion': -28.0,
-                              'suggestions': [-28.0, 17.0, 26.0, 13.0, 300.0],
-                              'suggestions_size': 5,
-                              'total_count': 5},
-                  ('NN', ''): { 'suggestion': nan,
-                                'suggestions': [nan],
-                                'suggestions_size': 1,
-                                'total_count': 1}},
-  'last date seen': { ('', ''): { 'suggestion': '2016/09/10',
-                                  'suggestions': [ '2016/09/10',
-                                                   '2015/08/10',
-                                                   '2014/07/10',
-                                                   '2013/06/10',
-                                                   '2012/05/10',
-                                                   '2011/04/10'],
-                                  'suggestions_size': 6,
-                                  'total_count': 6}},
-  'last position seen': { ('', ''): { 'suggestion': '19.442735,-99.201111',
-                                      'suggestions': [ '19.442735,-99.201111',
-                                                       '10.642707,-71.612534',
-                                                       '37.789563,-122.400356',
-                                                       '33.670666,-117.841553'],
-                                      'suggestions_size': 4,
-                                      'total_count': 4},
-                          ('NN', ''): { 'suggestion': None,
-                                        'suggestions': [None],
-                                        'suggestions_size': 1,
-                                        'total_count': 2}},
-  'names': { ('APTMS', ''): { 'suggestion': 'Optimus',
-                              'suggestions': ['Optimus'],
-                              'suggestions_size': 1,
-                              'total_count': 1},
-             ('ARNT', ''): { 'suggestion': 'ironhide&',
-                             'suggestions': ['ironhide&'],
-                             'suggestions_size': 1,
-                             'total_count': 1},
-             ('JS', 'AS'): { 'suggestion': 'Jazz',
-                             'suggestions': ['Jazz'],
-                             'suggestions_size': 1,
-                             'total_count': 1},
-             ('MKTRN', ''): { 'suggestion': 'Megatron',
-                              'suggestions': ['Megatron'],
-                              'suggestions_size': 1,
-                              'total_count': 1},
-             ('MTRPLKSKSKSKSKS', ''): { 'suggestion': 'Metroplex_)^$',
-                                        'suggestions': ['Metroplex_)^$'],
-                                        'suggestions_size': 1,
-                                        'total_count': 1},
-             ('PMPLLP', ''): { 'suggestion': 'bumbl#ebéé  ',
-                               'suggestions': ['bumbl#ebéé  '],
-                               'suggestions_size': 1,
-                               'total_count': 1}},
-  'rank': { ('', ''): { 'suggestion': 10,
-                        'suggestions': [10, 7, 8],
-                        'suggestions_size': 3,
-                        'total_count': 6}},
-  'timestamp': { ('', ''): { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                             'suggestions': [Timestamp('2014-06-24 00:00:00')],
-                             'suggestions_size': 1,
-                             'total_count': 6}},
-  'weight(t)': { ('', ''): { 'suggestion': 4.3,
-                             'suggestions': [4.3, 2.0, 4.0, 1.8, 5.7],
-                             'suggestions_size': 5,
-                             'total_count': 5},
-                 ('NN', ''): { 'suggestion': nan,
-                               'suggestions': [nan],
-                               'suggestions_size': 1,
-                               'total_count': 1}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
-
-    def test_string_clustering_all_fingerprint(self):
-        df = self.df
-        result = df.string_clustering(cols='*', algorithm='fingerprint')
-        expected = { 'Cybertronian': { 'false': { 'suggestion': False,
-                               'suggestions': [False],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                    'true': { 'suggestion': True,
-                              'suggestions': [True],
-                              'suggestions_size': 1,
-                              'total_count': 5}},
-  'Date Type': { '20110410': { 'suggestion': Timestamp('2011-04-10 00:00:00'),
-                               'suggestions': [ Timestamp('2011-04-10 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                 '20120510': { 'suggestion': Timestamp('2012-05-10 00:00:00'),
-                               'suggestions': [ Timestamp('2012-05-10 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                 '20130624': { 'suggestion': Timestamp('2013-06-24 00:00:00'),
-                               'suggestions': [ Timestamp('2013-06-24 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                 '20140624': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                               'suggestions': [ Timestamp('2014-06-24 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                 '20150810': { 'suggestion': Timestamp('2015-08-10 00:00:00'),
-                               'suggestions': [ Timestamp('2015-08-10 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                 '20160910': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
-                               'suggestions': [ Timestamp('2016-09-10 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 1}},
-  'NullType': { 'none': { 'suggestion': None,
-                          'suggestions': [None],
-                          'suggestions_size': 1,
-                          'total_count': 6}},
-  'age': { '5000000': { 'suggestion': 5000000,
-                        'suggestions': [5000000],
-                        'suggestions_size': 1,
-                        'total_count': 6}},
-  'date arrival': { '19800410': { 'suggestion': '1980/04/10',
-                                  'suggestions': ['1980/04/10'],
-                                  'suggestions_size': 1,
-                                  'total_count': 6}},
-  'function': { 'battle station': { 'suggestion': 'Battle Station',
-                                    'suggestions': ['Battle Station'],
-                                    'suggestions_size': 1,
-                                    'total_count': 1},
-                'espionage': { 'suggestion': 'Espionage',
-                               'suggestions': ['Espionage'],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                'first lieutenant': { 'suggestion': 'First Lieutenant',
-                                      'suggestions': ['First Lieutenant'],
-                                      'suggestions_size': 1,
-                                      'total_count': 1},
-                'leader': { 'suggestion': 'Leader',
-                            'suggestions': ['Leader'],
+  'function': { 'BTLSTN': { 'suggestion': 'Battle Station',
+                            'suggestions': ['Battle Station'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                'none': { 'suggestion': None,
-                          'suggestions': [None],
-                          'suggestions_size': 1,
-                          'total_count': 1},
-                'security': { 'suggestion': 'Security',
-                              'suggestions': ['Security'],
-                              'suggestions_size': 1,
-                              'total_count': 1}},
-  'height(ft)': { '130': { 'suggestion': 13.0,
-                           'suggestions': [13.0],
-                           'suggestions_size': 1,
-                           'total_count': 1},
-                  '170': { 'suggestion': 17.0,
-                           'suggestions': [17.0],
-                           'suggestions_size': 1,
-                           'total_count': 1},
-                  '260': { 'suggestion': 26.0,
-                           'suggestions': [26.0],
-                           'suggestions_size': 1,
-                           'total_count': 1},
-                  '280': { 'suggestion': -28.0,
-                           'suggestions': [-28.0],
+                'ESPNG': { 'suggestion': 'Espionage',
+                           'suggestions': ['Espionage'],
                            'suggestions_size': 1,
                            'total_count': 1},
-                  '3000': { 'suggestion': 300.0,
-                            'suggestions': [300.0],
+                'FRSTNT': { 'suggestion': 'First Lieutenant',
+                            'suggestions': ['First Lieutenant'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  'nan': { 'suggestion': nan,
-                           'suggestions': [nan],
+                'LDR': { 'suggestion': 'Leader',
+                         'suggestions': ['Leader'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                'N': { 'suggestion': 'None',
+                       'suggestions': ['None'],
+                       'suggestions_size': 1,
+                       'total_count': 1},
+                'SCRTY': { 'suggestion': 'Security',
+                           'suggestions': ['Security'],
                            'suggestions_size': 1,
                            'total_count': 1}},
-  'last date seen': { '20110410': { 'suggestion': '2011/04/10',
-                                    'suggestions': ['2011/04/10'],
-                                    'suggestions_size': 1,
-                                    'total_count': 1},
-                      '20120510': { 'suggestion': '2012/05/10',
-                                    'suggestions': ['2012/05/10'],
-                                    'suggestions_size': 1,
-                                    'total_count': 1},
-                      '20130610': { 'suggestion': '2013/06/10',
-                                    'suggestions': ['2013/06/10'],
-                                    'suggestions_size': 1,
-                                    'total_count': 1},
-                      '20140710': { 'suggestion': '2014/07/10',
-                                    'suggestions': ['2014/07/10'],
-                                    'suggestions_size': 1,
-                                    'total_count': 1},
-                      '20150810': { 'suggestion': '2015/08/10',
-                                    'suggestions': ['2015/08/10'],
-                                    'suggestions_size': 1,
-                                    'total_count': 1},
-                      '20160910': { 'suggestion': '2016/09/10',
-                                    'suggestions': ['2016/09/10'],
-                                    'suggestions_size': 1,
-                                    'total_count': 1}},
-  'last position seen': { '1064270771612534': { 'suggestion': '10.642707,-71.612534',
-                                                'suggestions': [ '10.642707,-71.612534'],
-                                                'suggestions_size': 1,
-                                                'total_count': 1},
-                          '1944273599201111': { 'suggestion': '19.442735,-99.201111',
-                                                'suggestions': [ '19.442735,-99.201111'],
-                                                'suggestions_size': 1,
-                                                'total_count': 1},
-                          '33670666117841553': { 'suggestion': '33.670666,-117.841553',
-                                                 'suggestions': [ '33.670666,-117.841553'],
-                                                 'suggestions_size': 1,
-                                                 'total_count': 1},
-                          '37789563122400356': { 'suggestion': '37.789563,-122.400356',
-                                                 'suggestions': [ '37.789563,-122.400356'],
-                                                 'suggestions_size': 1,
-                                                 'total_count': 1},
-                          'none': { 'suggestion': None,
-                                    'suggestions': [None],
-                                    'suggestions_size': 1,
-                                    'total_count': 2}},
-  'names': { 'bumblebee': { 'suggestion': 'bumbl#ebéé  ',
-                            'suggestions': ['bumbl#ebéé  '],
+  'height(ft)': { '-28.0': { 'suggestion': '-28.0',
+                             'suggestions': ['-28.0'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+                  '13.0': { 'suggestion': '13.0',
+                            'suggestions': ['13.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-             'ironhide': { 'suggestion': 'ironhide&',
-                           'suggestions': ['ironhide&'],
-                           'suggestions_size': 1,
-                           'total_count': 1},
-             'jazz': { 'suggestion': 'Jazz',
-                       'suggestions': ['Jazz'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'megatron': { 'suggestion': 'Megatron',
-                           'suggestions': ['Megatron'],
-                           'suggestions_size': 1,
-                           'total_count': 1},
-             'metroplex': { 'suggestion': 'Metroplex_)^$',
-                            'suggestions': ['Metroplex_)^$'],
+                  '17.0': { 'suggestion': '17.0',
+                            'suggestions': ['17.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-             'optimus': { 'suggestion': 'Optimus',
-                          'suggestions': ['Optimus'],
-                          'suggestions_size': 1,
-                          'total_count': 1}},
-  'rank': { '10': { 'suggestion': 10,
-                    'suggestions': [10],
-                    'suggestions_size': 1,
-                    'total_count': 2},
-            '7': { 'suggestion': 7,
-                   'suggestions': [7],
-                   'suggestions_size': 1,
-                   'total_count': 2},
-            '8': { 'suggestion': 8,
-                   'suggestions': [8],
-                   'suggestions_size': 1,
-                   'total_count': 2}},
-  'timestamp': { '20140624': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                               'suggestions': [ Timestamp('2014-06-24 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 6}},
-  'weight(t)': { '18': { 'suggestion': 1.8,
-                         'suggestions': [1.8],
+                  '26.0': { 'suggestion': '26.0',
+                            'suggestions': ['26.0'],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  '30.0': { 'suggestion': '300.0',
+                            'suggestions': ['300.0'],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+                  'N': { 'suggestion': 'nan',
+                         'suggestions': ['nan'],
                          'suggestions_size': 1,
-                         'total_count': 1},
-                 '20': { 'suggestion': 2.0,
-                         'suggestions': [2.0],
+                         'total_count': 1}},
+  'last date seen': { '201/10': { 'suggestion': '2016/09/10',
+                                  'suggestions': [ '2016/09/10',
+                                                   '2015/08/10',
+                                                   '2014/07/10',
+                                                   '2013/06/10',
+                                                   '2012/05/10',
+                                                   '2011/04/10'],
+                                  'suggestions_size': 6,
+                                  'total_count': 6}},
+  'last position seen': { '10.534': { 'suggestion': '10.642707,-71.612534',
+                                      'suggestions': ['10.642707,-71.612534'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                          '19.201': { 'suggestion': '19.442735,-99.201111',
+                                      'suggestions': ['19.442735,-99.201111'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                          '3.6153': { 'suggestion': '33.670666,-117.841553',
+                                      'suggestions': ['33.670666,-117.841553'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                          '37.356': { 'suggestion': '37.789563,-122.400356',
+                                      'suggestions': ['37.789563,-122.400356'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                          'N': { 'suggestion': 'None',
+                                 'suggestions': ['None'],
+                                 'suggestions_size': 1,
+                                 'total_count': 2}},
+  'names': { 'BMB#BÉ': { 'suggestion': 'bumbl#ebéé  ',
+                         'suggestions': ['bumbl#ebéé  '],
                          'suggestions_size': 1,
                          'total_count': 1},
-                 '40': { 'suggestion': 4.0,
-                         'suggestions': [4.0],
+             'BMBLB': { 'suggestion': 'bumblebee',
+                        'suggestions': ['bumblebee'],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+             'MTR)^$': { 'suggestion': 'Metroplex_)^$',
+                         'suggestions': ['Metroplex_)^$'],
                          'suggestions_size': 1,
                          'total_count': 1},
-                 '43': { 'suggestion': 4.3,
-                         'suggestions': [4.3],
+             'MTRPLX': { 'suggestion': 'Metroplex',
+                         'suggestions': ['Metroplex'],
                          'suggestions_size': 1,
                          'total_count': 1},
-                 '57': { 'suggestion': 5.7,
-                         'suggestions': [5.7],
+             'MÉTL-X': { 'suggestion': 'métrop´le-x',
+                         'suggestions': ['métrop´le-x'],
                          'suggestions_size': 1,
                          'total_count': 1},
-                 'nan': { 'suggestion': nan,
-                          'suggestions': [nan],
+             'OPTMS': { 'suggestion': 'Optimus',
+                        'suggestions': ['Optimus'],
+                        'suggestions_size': 1,
+                        'total_count': 1}},
+  'rank': { '10': { 'suggestion': '10',
+                    'suggestions': ['10'],
+                    'suggestions_size': 1,
+                    'total_count': 2},
+            '7': { 'suggestion': '7',
+                   'suggestions': ['7'],
+                   'suggestions_size': 1,
+                   'total_count': 2},
+            '8': { 'suggestion': '8',
+                   'suggestions': ['8'],
+                   'suggestions_size': 1,
+                   'total_count': 2}},
+  'timestamp': { '201-24': { 'suggestion': '2014-06-24',
+                             'suggestions': ['2014-06-24'],
+                             'suggestions_size': 1,
+                             'total_count': 6}},
+  'weight(t)': { '1.8': { 'suggestion': '1.8',
+                          'suggestions': ['1.8'],
                           'suggestions_size': 1,
-                          'total_count': 1}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
-
-    def test_string_clustering_all_levenshtein(self):
-        df = self.df
-        result = df.string_clustering(cols='*', algorithm='levenshtein')
-        expected = { 'weight(t)': { '18': { 'suggestion': '18',
-                         'suggestions': ['18', '43'],
-                         'suggestions_size': 2,
-                         'total_count': 6},
-                 '20': { 'suggestion': '20',
-                         'suggestions': ['20', '40'],
-                         'suggestions_size': 2,
-                         'total_count': 6},
-                 '40': { 'suggestion': '40',
-                         'suggestions': ['40', '43'],
-                         'suggestions_size': 2,
-                         'total_count': 6},
-                 '43': { 'suggestion': '43',
-                         'suggestions': ['43', '40'],
-                         'suggestions_size': 2,
-                         'total_count': 6},
-                 '57': { 'suggestion': '57',
-                         'suggestions': ['57', '43'],
-                         'suggestions_size': 2,
-                         'total_count': 6},
-                 'nan': { 'suggestion': 'nan',
-                          'suggestions': ['nan', '43'],
-                          'suggestions_size': 2,
-                          'total_count': 6}}}
+                          'total_count': 1},
+                 '2.0': { 'suggestion': '2.0',
+                          'suggestions': ['2.0'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 '4.0': { 'suggestion': '4.0',
+                          'suggestions': ['4.0'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 '4.3': { 'suggestion': '4.3',
+                          'suggestions': ['4.3'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 '5.7': { 'suggestion': '5.7',
+                          'suggestions': ['5.7'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 'N': { 'suggestion': 'nan',
+                        'suggestions': ['nan'],
+                        'suggestions_size': 1,
+                        'total_count': 1}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
-    def test_string_clustering_all_match_rating_codex(self):
-        df = self.df
-        result = df.string_clustering(cols='*', algorithm='match_rating_codex')
-        # The following value does not represent a correct output of the operation
-        expected = self.dict
-        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
-
     def test_string_clustering_all_metaphone(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols='*', algorithm='metaphone')
-        expected = { 'Cybertronian': { 'FLS': { 'suggestion': False,
-                             'suggestions': [False],
+        expected = { 'Cybertronian': { 'FLS': { 'suggestion': 'False',
+                             'suggestions': ['False'],
                              'suggestions_size': 1,
                              'total_count': 1},
-                    'TR': { 'suggestion': True,
-                            'suggestions': [True],
+                    'TR': { 'suggestion': 'True',
+                            'suggestions': ['True'],
                             'suggestions_size': 1,
                             'total_count': 5}},
-  'Date Type': { '': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
-                       'suggestions': [ Timestamp('2016-09-10 00:00:00'),
-                                        Timestamp('2015-08-10 00:00:00'),
-                                        Timestamp('2014-06-24 00:00:00'),
-                                        Timestamp('2013-06-24 00:00:00'),
-                                        Timestamp('2012-05-10 00:00:00'),
-                                        Timestamp('2011-04-10 00:00:00')],
+  'Date Type': { '': { 'suggestion': '2016-09-10',
+                       'suggestions': [ '2016-09-10',
+                                        '2015-08-10',
+                                        '2014-06-24',
+                                        '2013-06-24',
+                                        '2012-05-10',
+                                        '2011-04-10'],
                        'suggestions_size': 6,
                        'total_count': 6}},
-  'NullType': { 'NN': { 'suggestion': None,
-                        'suggestions': [None],
+  'NullType': { 'NN': { 'suggestion': 'None',
+                        'suggestions': ['None'],
                         'suggestions_size': 1,
                         'total_count': 6}},
-  'age': { '': { 'suggestion': 5000000,
-                 'suggestions': [5000000],
+  'age': { '': { 'suggestion': '5000000',
+                 'suggestions': ['5000000'],
                  'suggestions_size': 1,
                  'total_count': 6}},
   'date arrival': { '': { 'suggestion': '1980/04/10',
@@ -432,20 +505,24 @@ def test_string_clustering_all_metaphone(self):
                          'suggestions': ['Leader'],
                          'suggestions_size': 1,
                          'total_count': 1},
-                'NN': { 'suggestion': None,
-                        'suggestions': [None],
+                'NN': { 'suggestion': 'None',
+                        'suggestions': ['None'],
                         'suggestions_size': 1,
                         'total_count': 1},
                 'SKRT': { 'suggestion': 'Security',
                           'suggestions': ['Security'],
                           'suggestions_size': 1,
                           'total_count': 1}},
-  'height(ft)': { '': { 'suggestion': -28.0,
-                        'suggestions': [-28.0, 17.0, 26.0, 13.0, 300.0],
+  'height(ft)': { '': { 'suggestion': '-28.0',
+                        'suggestions': [ '-28.0',
+                                         '17.0',
+                                         '26.0',
+                                         '13.0',
+                                         '300.0'],
                         'suggestions_size': 5,
                         'total_count': 5},
-                  'NN': { 'suggestion': nan,
-                          'suggestions': [nan],
+                  'NN': { 'suggestion': 'nan',
+                          'suggestions': ['nan'],
                           'suggestions_size': 1,
                           'total_count': 1}},
   'last date seen': { '': { 'suggestion': '2016/09/10',
@@ -464,288 +541,286 @@ def test_string_clustering_all_metaphone(self):
                                                  '33.670666,-117.841553'],
                                 'suggestions_size': 4,
                                 'total_count': 4},
-                          'NN': { 'suggestion': None,
-                                  'suggestions': [None],
+                          'NN': { 'suggestion': 'None',
+                                  'suggestions': ['None'],
                                   'suggestions_size': 1,
                                   'total_count': 2}},
-  'names': { 'BMBLB ': { 'suggestion': 'bumbl#ebéé  ',
+  'names': { 'BMBLB': { 'suggestion': 'bumblebee',
+                        'suggestions': ['bumblebee'],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+             'BMBLB ': { 'suggestion': 'bumbl#ebéé  ',
                          'suggestions': ['bumbl#ebéé  '],
                          'suggestions_size': 1,
                          'total_count': 1},
-             'IRNHT': { 'suggestion': 'ironhide&',
-                        'suggestions': ['ironhide&'],
-                        'suggestions_size': 1,
-                        'total_count': 1},
-             'JS': { 'suggestion': 'Jazz',
-                     'suggestions': ['Jazz'],
-                     'suggestions_size': 1,
-                     'total_count': 1},
-             'MKTRN': { 'suggestion': 'Megatron',
-                        'suggestions': ['Megatron'],
-                        'suggestions_size': 1,
-                        'total_count': 1},
-             'MTRPLKS': { 'suggestion': 'Metroplex_)^$',
-                          'suggestions': ['Metroplex_)^$'],
-                          'suggestions_size': 1,
-                          'total_count': 1},
+             'MTRP LKS': { 'suggestion': 'métrop´le-x',
+                           'suggestions': ['métrop´le-x'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'MTRPLKS': { 'suggestion': 'Metroplex',
+                          'suggestions': ['Metroplex', 'Metroplex_)^$'],
+                          'suggestions_size': 2,
+                          'total_count': 2},
              'OPTMS': { 'suggestion': 'Optimus',
                         'suggestions': ['Optimus'],
                         'suggestions_size': 1,
                         'total_count': 1}},
-  'rank': { '': { 'suggestion': 10,
-                  'suggestions': [10, 7, 8],
+  'rank': { '': { 'suggestion': '10',
+                  'suggestions': ['10', '7', '8'],
                   'suggestions_size': 3,
                   'total_count': 6}},
-  'timestamp': { '': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                       'suggestions': [Timestamp('2014-06-24 00:00:00')],
+  'timestamp': { '': { 'suggestion': '2014-06-24',
+                       'suggestions': ['2014-06-24'],
                        'suggestions_size': 1,
                        'total_count': 6}},
-  'weight(t)': { '': { 'suggestion': 4.3,
-                       'suggestions': [4.3, 2.0, 4.0, 1.8, 5.7],
+  'weight(t)': { '': { 'suggestion': '4.3',
+                       'suggestions': ['4.3', '2.0', '4.0', '1.8', '5.7'],
                        'suggestions_size': 5,
                        'total_count': 5},
-                 'NN': { 'suggestion': nan,
-                         'suggestions': [nan],
+                 'NN': { 'suggestion': 'nan',
+                         'suggestions': ['nan'],
                          'suggestions_size': 1,
                          'total_count': 1}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_all_ngram_fingerprint(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols='*', algorithm='ngram_fingerprint')
-        expected = { 'Cybertronian': { 'alfalsse': { 'suggestion': False,
-                                  'suggestions': [False],
-                                  'suggestions_size': 1,
-                                  'total_count': 1},
-                    'rutrue': { 'suggestion': True,
-                                'suggestions': [True],
-                                'suggestions_size': 1,
-                                'total_count': 5}},
-  'Date Type': { '010410112041': { 'suggestion': Timestamp('2011-04-10 00:00:00'),
-                                   'suggestions': [ Timestamp('2011-04-10 00:00:00')],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-                 '010510122051': { 'suggestion': Timestamp('2012-05-10 00:00:00'),
-                                   'suggestions': [ Timestamp('2012-05-10 00:00:00')],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-                 '01061320243062': { 'suggestion': Timestamp('2013-06-24 00:00:00'),
-                                     'suggestions': [ Timestamp('2013-06-24 00:00:00')],
-                                     'suggestions_size': 1,
-                                     'total_count': 1},
-                 '01061420244062': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                                     'suggestions': [ Timestamp('2014-06-24 00:00:00')],
-                                     'suggestions_size': 1,
-                                     'total_count': 1},
-                 '01081015205081': { 'suggestion': Timestamp('2015-08-10 00:00:00'),
-                                     'suggestions': [ Timestamp('2015-08-10 00:00:00')],
-                                     'suggestions_size': 1,
-                                     'total_count': 1},
-                 '01091016206091': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
-                                     'suggestions': [ Timestamp('2016-09-10 00:00:00')],
-                                     'suggestions_size': 1,
-                                     'total_count': 1}},
-  'NullType': { 'nenoon': { 'suggestion': None,
-                            'suggestions': [None],
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_all_nysiis(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols='*', algorithm='nysiis')
+        expected = { 'Cybertronian': { 'FALS': { 'suggestion': 'False',
+                              'suggestions': ['False'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+                    'TR': { 'suggestion': 'True',
+                            'suggestions': ['True'],
                             'suggestions_size': 1,
-                            'total_count': 6}},
-  'age': { '0050': { 'suggestion': 5000000,
-                     'suggestions': [5000000],
-                     'suggestions_size': 1,
-                     'total_count': 6}},
-  'date arrival': { '00041019418098': { 'suggestion': '1980/04/10',
-                                        'suggestions': ['1980/04/10'],
-                                        'suggestions_size': 1,
-                                        'total_count': 6}},
-  'function': { 'addeeaerle': { 'suggestion': 'Leader',
-                                'suggestions': ['Leader'],
+                            'total_count': 5}},
+  'Date Type': { '201-04-10': { 'suggestion': '2011-04-10',
+                                'suggestions': ['2011-04-10'],
                                 'suggestions_size': 1,
                                 'total_count': 1},
-                'agesgeionaonpisp': { 'suggestion': 'Espionage',
-                                      'suggestions': ['Espionage'],
-                                      'suggestions_size': 1,
-                                      'total_count': 1},
-                'aneneufiieirlinantrssttetlut': { 'suggestion': 'First '
-                                                                'Lieutenant',
-                                                  'suggestions': [ 'First '
-                                                                   'Lieutenant'],
-                                                  'suggestions_size': 1,
-                                                  'total_count': 1},
-                'atbaesioleonsttatitltt': { 'suggestion': 'Battle Station',
-                                            'suggestions': ['Battle Station'],
-                                            'suggestions_size': 1,
-                                            'total_count': 1},
-                'cuecitrisetyur': { 'suggestion': 'Security',
-                                    'suggestions': ['Security'],
+                 '2012-05-10': { 'suggestion': '2012-05-10',
+                                 'suggestions': ['2012-05-10'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+                 '2013-06-24': { 'suggestion': '2013-06-24',
+                                 'suggestions': ['2013-06-24'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+                 '2014-06-24': { 'suggestion': '2014-06-24',
+                                 'suggestions': ['2014-06-24'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+                 '2015-08-10': { 'suggestion': '2015-08-10',
+                                 'suggestions': ['2015-08-10'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1},
+                 '2016-09-10': { 'suggestion': '2016-09-10',
+                                 'suggestions': ['2016-09-10'],
+                                 'suggestions_size': 1,
+                                 'total_count': 1}},
+  'NullType': { 'NAN': { 'suggestion': 'None',
+                         'suggestions': ['None'],
+                         'suggestions_size': 1,
+                         'total_count': 6}},
+  'age': { '50': { 'suggestion': '5000000',
+                   'suggestions': ['5000000'],
+                   'suggestions_size': 1,
+                   'total_count': 6}},
+  'date arrival': { '1980/04/10': { 'suggestion': '1980/04/10',
+                                    'suggestions': ['1980/04/10'],
                                     'suggestions_size': 1,
-                                    'total_count': 1},
-                'nenoon': { 'suggestion': None,
-                            'suggestions': [None],
+                                    'total_count': 6}},
+  'function': { 'BATL': { 'suggestion': 'Battle Station',
+                          'suggestions': ['Battle Station'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                'ESPANAG': { 'suggestion': 'Espionage',
+                             'suggestions': ['Espionage'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+                'FARST': { 'suggestion': 'First Lieutenant',
+                           'suggestions': ['First Lieutenant'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                'LADAR': { 'suggestion': 'Leader',
+                           'suggestions': ['Leader'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+                'NAN': { 'suggestion': 'None',
+                         'suggestions': ['None'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+                'SACARATY': { 'suggestion': 'Security',
+                              'suggestions': ['Security'],
+                              'suggestions_size': 1,
+                              'total_count': 1}},
+  'height(ft)': { '-28.0': { 'suggestion': '-28.0',
+                             'suggestions': ['-28.0'],
+                             'suggestions_size': 1,
+                             'total_count': 1},
+                  '13.0': { 'suggestion': '13.0',
+                            'suggestions': ['13.0'],
                             'suggestions_size': 1,
-                            'total_count': 1}},
-  'height(ft)': { '0030': { 'suggestion': 300.0,
-                            'suggestions': [300.0],
+                            'total_count': 1},
+                  '17.0': { 'suggestion': '17.0',
+                            'suggestions': ['17.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  '1330': { 'suggestion': 13.0,
-                            'suggestions': [13.0],
+                  '26.0': { 'suggestion': '26.0',
+                            'suggestions': ['26.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  '1770': { 'suggestion': 17.0,
-                            'suggestions': [17.0],
+                  '30.0': { 'suggestion': '300.0',
+                            'suggestions': ['300.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  '2660': { 'suggestion': 26.0,
-                            'suggestions': [26.0],
+                  'NAN': { 'suggestion': 'nan',
+                           'suggestions': ['nan'],
+                           'suggestions_size': 1,
+                           'total_count': 1}},
+  'last date seen': { '201/04/10': { 'suggestion': '2011/04/10',
+                                     'suggestions': ['2011/04/10'],
+                                     'suggestions_size': 1,
+                                     'total_count': 1},
+                      '2012/05/10': { 'suggestion': '2012/05/10',
+                                      'suggestions': ['2012/05/10'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                      '2013/06/10': { 'suggestion': '2013/06/10',
+                                      'suggestions': ['2013/06/10'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                      '2014/07/10': { 'suggestion': '2014/07/10',
+                                      'suggestions': ['2014/07/10'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                      '2015/08/10': { 'suggestion': '2015/08/10',
+                                      'suggestions': ['2015/08/10'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1},
+                      '2016/09/10': { 'suggestion': '2016/09/10',
+                                      'suggestions': ['2016/09/10'],
+                                      'suggestions_size': 1,
+                                      'total_count': 1}},
+  'last position seen': { '10.642707,-71.612534': { 'suggestion': '10.642707,-71.612534',
+                                                    'suggestions': [ '10.642707,-71.612534'],
+                                                    'suggestions_size': 1,
+                                                    'total_count': 1},
+                          '19.42735,-9.201': { 'suggestion': '19.442735,-99.201111',
+                                               'suggestions': [ '19.442735,-99.201111'],
+                                               'suggestions_size': 1,
+                                               'total_count': 1},
+                          '3.6706,-17.84153': { 'suggestion': '33.670666,-117.841553',
+                                                'suggestions': [ '33.670666,-117.841553'],
+                                                'suggestions_size': 1,
+                                                'total_count': 1},
+                          '37.789563,-12.40356': { 'suggestion': '37.789563,-122.400356',
+                                                   'suggestions': [ '37.789563,-122.400356'],
+                                                   'suggestions_size': 1,
+                                                   'total_count': 1},
+                          'NAN': { 'suggestion': 'None',
+                                   'suggestions': ['None'],
+                                   'suggestions_size': 1,
+                                   'total_count': 2}},
+  'names': { 'BANBL#ABÉ': { 'suggestion': 'bumbl#ebéé  ',
+                            'suggestions': ['bumbl#ebéé  '],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  '2880': { 'suggestion': -28.0,
-                            'suggestions': [-28.0],
+             'BANBLABY': { 'suggestion': 'bumblebee',
+                           'suggestions': ['bumblebee'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'MATRAPLAX': { 'suggestion': 'Metroplex',
+                            'suggestions': ['Metroplex'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  'anna': { 'suggestion': nan,
-                            'suggestions': [nan],
-                            'suggestions_size': 1,
-                            'total_count': 1}},
-  'last date seen': { '010410112041': { 'suggestion': '2011/04/10',
-                                        'suggestions': ['2011/04/10'],
-                                        'suggestions_size': 1,
-                                        'total_count': 1},
-                      '010510122051': { 'suggestion': '2012/05/10',
-                                        'suggestions': ['2012/05/10'],
-                                        'suggestions_size': 1,
-                                        'total_count': 1},
-                      '01061013203061': { 'suggestion': '2013/06/10',
-                                          'suggestions': ['2013/06/10'],
-                                          'suggestions_size': 1,
-                                          'total_count': 1},
-                      '01071014204071': { 'suggestion': '2014/07/10',
-                                          'suggestions': ['2014/07/10'],
-                                          'suggestions_size': 1,
-                                          'total_count': 1},
-                      '01081015205081': { 'suggestion': '2015/08/10',
-                                          'suggestions': ['2015/08/10'],
-                                          'suggestions_size': 1,
-                                          'total_count': 1},
-                      '01091016206091': { 'suggestion': '2016/09/10',
-                                          'suggestions': ['2016/09/10'],
-                                          'suggestions_size': 1,
-                                          'total_count': 1}},
-  'last position seen': { '000312222431353740566377788995': { 'suggestion': '37.789563,-122.400356',
-                                                              'suggestions': [ '37.789563,-122.400356'],
-                                                              'suggestions_size': 1,
-                                                              'total_count': 1},
-                          '01111920273542445973929499': { 'suggestion': '19.442735,-99.201111',
-                                                          'suggestions': [ '19.442735,-99.201111'],
-                                                          'suggestions_size': 1,
-                                                          'total_count': 1},
-                          '060710121625273442536164707177': { 'suggestion': '10.642707,-71.612534',
-                                                              'suggestions': [ '10.642707,-71.612534'],
-                                                              'suggestions_size': 1,
-                                                              'total_count': 1},
-                          '061115173336415355616667707884': { 'suggestion': '33.670666,-117.841553',
-                                                              'suggestions': [ '33.670666,-117.841553'],
-                                                              'suggestions_size': 1,
-                                                              'total_count': 1},
-                          'nenoon': { 'suggestion': None,
-                                      'suggestions': [None],
-                                      'suggestions_size': 1,
-                                      'total_count': 2}},
-  'names': { 'ateggameonrotr': { 'suggestion': 'Megatron',
-                                 'suggestions': ['Megatron'],
-                                 'suggestions_size': 1,
-                                 'total_count': 1},
-             'azjazz': { 'suggestion': 'Jazz',
-                         'suggestions': ['Jazz'],
-                         'suggestions_size': 1,
-                         'total_count': 1},
-             'beblbuebeelembum': { 'suggestion': 'bumbl#ebéé  ',
-                                   'suggestions': ['bumbl#ebéé  '],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-             'dehiidirnhonro': { 'suggestion': 'ironhide&',
-                                 'suggestions': ['ironhide&'],
-                                 'suggestions_size': 1,
-                                 'total_count': 1},
-             'etexlemeopplrotr': { 'suggestion': 'Metroplex_)^$',
-                                   'suggestions': ['Metroplex_)^$'],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-             'immuoppttius': { 'suggestion': 'Optimus',
-                               'suggestions': ['Optimus'],
-                               'suggestions_size': 1,
-                               'total_count': 1}},
-  'rank': { '': { 'suggestion': 7,
-                  'suggestions': [7, 8],
-                  'suggestions_size': 2,
-                  'total_count': 4},
-            '10': { 'suggestion': 10,
-                    'suggestions': [10],
+             'MATRAPLAX_)^$': { 'suggestion': 'Metroplex_)^$',
+                                'suggestions': ['Metroplex_)^$'],
+                                'suggestions_size': 1,
+                                'total_count': 1},
+             'MÉTRAP´LA-X': { 'suggestion': 'métrop´le-x',
+                              'suggestions': ['métrop´le-x'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+             'OPTAN': { 'suggestion': 'Optimus',
+                        'suggestions': ['Optimus'],
+                        'suggestions_size': 1,
+                        'total_count': 1}},
+  'rank': { '10': { 'suggestion': '10',
+                    'suggestions': ['10'],
                     'suggestions_size': 1,
-                    'total_count': 2}},
-  'timestamp': { '01061420244062': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                                     'suggestions': [ Timestamp('2014-06-24 00:00:00')],
-                                     'suggestions_size': 1,
-                                     'total_count': 6}},
-  'weight(t)': { '18': { 'suggestion': 1.8,
-                         'suggestions': [1.8],
-                         'suggestions_size': 1,
-                         'total_count': 1},
-                 '20': { 'suggestion': 2.0,
-                         'suggestions': [2.0],
-                         'suggestions_size': 1,
-                         'total_count': 1},
-                 '40': { 'suggestion': 4.0,
-                         'suggestions': [4.0],
-                         'suggestions_size': 1,
-                         'total_count': 1},
-                 '43': { 'suggestion': 4.3,
-                         'suggestions': [4.3],
-                         'suggestions_size': 1,
-                         'total_count': 1},
-                 '57': { 'suggestion': 5.7,
-                         'suggestions': [5.7],
-                         'suggestions_size': 1,
-                         'total_count': 1},
-                 'anna': { 'suggestion': nan,
-                           'suggestions': [nan],
-                           'suggestions_size': 1,
-                           'total_count': 1}}}
+                    'total_count': 2},
+            '7': { 'suggestion': '7',
+                   'suggestions': ['7'],
+                   'suggestions_size': 1,
+                   'total_count': 2},
+            '8': { 'suggestion': '8',
+                   'suggestions': ['8'],
+                   'suggestions_size': 1,
+                   'total_count': 2}},
+  'timestamp': { '2014-06-24': { 'suggestion': '2014-06-24',
+                                 'suggestions': ['2014-06-24'],
+                                 'suggestions_size': 1,
+                                 'total_count': 6}},
+  'weight(t)': { '1.8': { 'suggestion': '1.8',
+                          'suggestions': ['1.8'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 '2.0': { 'suggestion': '2.0',
+                          'suggestions': ['2.0'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 '4.0': { 'suggestion': '4.0',
+                          'suggestions': ['4.0'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 '4.3': { 'suggestion': '4.3',
+                          'suggestions': ['4.3'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 '5.7': { 'suggestion': '5.7',
+                          'suggestions': ['5.7'],
+                          'suggestions_size': 1,
+                          'total_count': 1},
+                 'NAN': { 'suggestion': 'nan',
+                          'suggestions': ['nan'],
+                          'suggestions_size': 1,
+                          'total_count': 1}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
-    def test_string_clustering_all_nysiis(self):
-        df = self.df
-        result = df.string_clustering(cols='*', algorithm='nysiis')
-        # The following value does not represent a correct output of the operation
-        expected = self.dict
-        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
-
     def test_string_clustering_all_soundex(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols='*', algorithm='soundex')
-        expected = { 'Cybertronian': { 'F420': { 'suggestion': False,
-                              'suggestions': [False],
+        expected = { 'Cybertronian': { 'F420': { 'suggestion': 'False',
+                              'suggestions': ['False'],
                               'suggestions_size': 1,
                               'total_count': 1},
-                    'T600': { 'suggestion': True,
-                              'suggestions': [True],
+                    'T600': { 'suggestion': 'True',
+                              'suggestions': ['True'],
                               'suggestions_size': 1,
                               'total_count': 5}},
-  'Date Type': { '2000': { 'suggestion': Timestamp('2016-09-10 00:00:00'),
-                           'suggestions': [ Timestamp('2016-09-10 00:00:00'),
-                                            Timestamp('2015-08-10 00:00:00'),
-                                            Timestamp('2014-06-24 00:00:00'),
-                                            Timestamp('2013-06-24 00:00:00'),
-                                            Timestamp('2012-05-10 00:00:00'),
-                                            Timestamp('2011-04-10 00:00:00')],
+  'Date Type': { '2000': { 'suggestion': '2016-09-10',
+                           'suggestions': [ '2016-09-10',
+                                            '2015-08-10',
+                                            '2014-06-24',
+                                            '2013-06-24',
+                                            '2012-05-10',
+                                            '2011-04-10'],
                            'suggestions_size': 6,
                            'total_count': 6}},
-  'NullType': { 'N500': { 'suggestion': None,
-                          'suggestions': [None],
+  'NullType': { 'N500': { 'suggestion': 'None',
+                          'suggestions': ['None'],
                           'suggestions_size': 1,
                           'total_count': 6}},
-  'age': { '5000': { 'suggestion': 5000000,
-                     'suggestions': [5000000],
+  'age': { '5000': { 'suggestion': '5000000',
+                     'suggestions': ['5000000'],
                      'suggestions_size': 1,
                      'total_count': 6}},
   'date arrival': { '1000': { 'suggestion': '1980/04/10',
@@ -768,32 +843,32 @@ def test_string_clustering_all_soundex(self):
                           'suggestions': ['Leader'],
                           'suggestions_size': 1,
                           'total_count': 1},
-                'N500': { 'suggestion': None,
-                          'suggestions': [None],
+                'N500': { 'suggestion': 'None',
+                          'suggestions': ['None'],
                           'suggestions_size': 1,
                           'total_count': 1},
                 'S263': { 'suggestion': 'Security',
                           'suggestions': ['Security'],
                           'suggestions_size': 1,
                           'total_count': 1}},
-  'height(ft)': { '-000': { 'suggestion': -28.0,
-                            'suggestions': [-28.0],
+  'height(ft)': { '-000': { 'suggestion': '-28.0',
+                            'suggestions': ['-28.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  '1000': { 'suggestion': 17.0,
-                            'suggestions': [17.0, 13.0],
+                  '1000': { 'suggestion': '17.0',
+                            'suggestions': ['17.0', '13.0'],
                             'suggestions_size': 2,
                             'total_count': 2},
-                  '2000': { 'suggestion': 26.0,
-                            'suggestions': [26.0],
+                  '2000': { 'suggestion': '26.0',
+                            'suggestions': ['26.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  '3000': { 'suggestion': 300.0,
-                            'suggestions': [300.0],
+                  '3000': { 'suggestion': '300.0',
+                            'suggestions': ['300.0'],
                             'suggestions_size': 1,
                             'total_count': 1},
-                  'N500': { 'suggestion': nan,
-                            'suggestions': [nan],
+                  'N500': { 'suggestion': 'nan',
+                            'suggestions': ['nan'],
                             'suggestions_size': 1,
                             'total_count': 1}},
   'last date seen': { '2000': { 'suggestion': '2016/09/10',
@@ -815,526 +890,386 @@ def test_string_clustering_all_soundex(self):
                                                      '33.670666,-117.841553'],
                                     'suggestions_size': 2,
                                     'total_count': 2},
-                          'N500': { 'suggestion': None,
-                                    'suggestions': [None],
+                          'N500': { 'suggestion': 'None',
+                                    'suggestions': ['None'],
                                     'suggestions_size': 1,
                                     'total_count': 2}},
   'names': { 'B514': { 'suggestion': 'bumbl#ebéé  ',
-                       'suggestions': ['bumbl#ebéé  '],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'I653': { 'suggestion': 'ironhide&',
-                       'suggestions': ['ironhide&'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'J200': { 'suggestion': 'Jazz',
-                       'suggestions': ['Jazz'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'M236': { 'suggestion': 'Megatron',
-                       'suggestions': ['Megatron'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'M361': { 'suggestion': 'Metroplex_)^$',
-                       'suggestions': ['Metroplex_)^$'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
+                       'suggestions': ['bumbl#ebéé  ', 'bumblebee'],
+                       'suggestions_size': 2,
+                       'total_count': 2},
+             'M361': { 'suggestion': 'Metroplex',
+                       'suggestions': [ 'Metroplex',
+                                        'métrop´le-x',
+                                        'Metroplex_)^$'],
+                       'suggestions_size': 3,
+                       'total_count': 3},
              'O135': { 'suggestion': 'Optimus',
                        'suggestions': ['Optimus'],
                        'suggestions_size': 1,
                        'total_count': 1}},
-  'rank': { '1000': { 'suggestion': 10,
-                      'suggestions': [10],
+  'rank': { '1000': { 'suggestion': '10',
+                      'suggestions': ['10'],
                       'suggestions_size': 1,
                       'total_count': 2},
-            '7000': { 'suggestion': 7,
-                      'suggestions': [7],
+            '7000': { 'suggestion': '7',
+                      'suggestions': ['7'],
                       'suggestions_size': 1,
                       'total_count': 2},
-            '8000': { 'suggestion': 8,
-                      'suggestions': [8],
+            '8000': { 'suggestion': '8',
+                      'suggestions': ['8'],
                       'suggestions_size': 1,
                       'total_count': 2}},
-  'timestamp': { '2000': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                           'suggestions': [Timestamp('2014-06-24 00:00:00')],
+  'timestamp': { '2000': { 'suggestion': '2014-06-24',
+                           'suggestions': ['2014-06-24'],
                            'suggestions_size': 1,
                            'total_count': 6}},
-  'weight(t)': { '1000': { 'suggestion': 1.8,
-                           'suggestions': [1.8],
+  'weight(t)': { '1000': { 'suggestion': '1.8',
+                           'suggestions': ['1.8'],
                            'suggestions_size': 1,
                            'total_count': 1},
-                 '2000': { 'suggestion': 2.0,
-                           'suggestions': [2.0],
+                 '2000': { 'suggestion': '2.0',
+                           'suggestions': ['2.0'],
                            'suggestions_size': 1,
                            'total_count': 1},
-                 '4000': { 'suggestion': 4.3,
-                           'suggestions': [4.3, 4.0],
+                 '4000': { 'suggestion': '4.3',
+                           'suggestions': ['4.3', '4.0'],
                            'suggestions_size': 2,
                            'total_count': 2},
-                 '5000': { 'suggestion': 5.7,
-                           'suggestions': [5.7],
+                 '5000': { 'suggestion': '5.7',
+                           'suggestions': ['5.7'],
                            'suggestions_size': 1,
                            'total_count': 1},
-                 'N500': { 'suggestion': nan,
-                           'suggestions': [nan],
+                 'N500': { 'suggestion': 'nan',
+                           'suggestions': ['nan'],
                            'suggestions_size': 1,
                            'total_count': 1}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_multiple_double_metaphone(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='double_metaphone')
-        expected = { 'Cybertronian': { ('FLS', ''): { 'suggestion': False,
-                                   'suggestions': [False],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-                    ('TR', ''): { 'suggestion': True,
-                                  'suggestions': [True],
-                                  'suggestions_size': 1,
-                                  'total_count': 5}},
-  'NullType': { ('NN', ''): { 'suggestion': None,
-                              'suggestions': [None],
-                              'suggestions_size': 1,
-                              'total_count': 6}},
-  'timestamp': { ('', ''): { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                             'suggestions': [Timestamp('2014-06-24 00:00:00')],
-                             'suggestions_size': 1,
-                             'total_count': 6}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_multiple_fingerprint(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='fingerprint')
-        expected = { 'Cybertronian': { 'false': { 'suggestion': False,
-                               'suggestions': [False],
-                               'suggestions_size': 1,
-                               'total_count': 1},
-                    'true': { 'suggestion': True,
-                              'suggestions': [True],
-                              'suggestions_size': 1,
-                              'total_count': 5}},
-  'NullType': { 'none': { 'suggestion': None,
-                          'suggestions': [None],
-                          'suggestions_size': 1,
-                          'total_count': 6}},
-  'timestamp': { '20140624': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                               'suggestions': [ Timestamp('2014-06-24 00:00:00')],
-                               'suggestions_size': 1,
-                               'total_count': 6}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_multiple_levenshtein(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='levenshtein')
-        expected = { 'timestamp': { '20140624': { 'suggestion': '20140624',
-                               'suggestions': ['20140624'],
-                               'suggestions_size': 1,
-                               'total_count': 6}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_multiple_match_rating_codex(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='match_rating_codex')
-        expected = { 'Cybertronian': { 'FLS': { 'suggestion': False,
-                             'suggestions': [False],
+        expected = { 'Cybertronian': { 'FLS': { 'suggestion': 'False',
+                             'suggestions': ['False'],
                              'suggestions_size': 1,
                              'total_count': 1},
-                    'TR': { 'suggestion': True,
-                            'suggestions': [True],
+                    'TR': { 'suggestion': 'True',
+                            'suggestions': ['True'],
                             'suggestions_size': 1,
                             'total_count': 5}},
-  'NullType': { 'N': { 'suggestion': None,
-                       'suggestions': [None],
+  'NullType': { 'N': { 'suggestion': 'None',
+                       'suggestions': ['None'],
                        'suggestions_size': 1,
                        'total_count': 6}},
-  'timestamp': { '201-24': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                             'suggestions': [Timestamp('2014-06-24 00:00:00')],
+  'timestamp': { '201-24': { 'suggestion': '2014-06-24',
+                             'suggestions': ['2014-06-24'],
                              'suggestions_size': 1,
                              'total_count': 6}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_multiple_metaphone(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='metaphone')
-        expected = { 'Cybertronian': { 'FLS': { 'suggestion': False,
-                             'suggestions': [False],
+        expected = { 'Cybertronian': { 'FLS': { 'suggestion': 'False',
+                             'suggestions': ['False'],
                              'suggestions_size': 1,
                              'total_count': 1},
-                    'TR': { 'suggestion': True,
-                            'suggestions': [True],
+                    'TR': { 'suggestion': 'True',
+                            'suggestions': ['True'],
                             'suggestions_size': 1,
                             'total_count': 5}},
-  'NullType': { 'NN': { 'suggestion': None,
-                        'suggestions': [None],
+  'NullType': { 'NN': { 'suggestion': 'None',
+                        'suggestions': ['None'],
                         'suggestions_size': 1,
                         'total_count': 6}},
-  'timestamp': { '': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                       'suggestions': [Timestamp('2014-06-24 00:00:00')],
+  'timestamp': { '': { 'suggestion': '2014-06-24',
+                       'suggestions': ['2014-06-24'],
                        'suggestions_size': 1,
                        'total_count': 6}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_multiple_ngram_fingerprint(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='ngram_fingerprint')
-        expected = { 'Cybertronian': { 'alfalsse': { 'suggestion': False,
-                                  'suggestions': [False],
-                                  'suggestions_size': 1,
-                                  'total_count': 1},
-                    'rutrue': { 'suggestion': True,
-                                'suggestions': [True],
-                                'suggestions_size': 1,
-                                'total_count': 5}},
-  'NullType': { 'nenoon': { 'suggestion': None,
-                            'suggestions': [None],
-                            'suggestions_size': 1,
-                            'total_count': 6}},
-  'timestamp': { '01061420244062': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                                     'suggestions': [ Timestamp('2014-06-24 00:00:00')],
-                                     'suggestions_size': 1,
-                                     'total_count': 6}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_multiple_nysiis(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='nysiis')
-        expected = { 'Cybertronian': { 'FALS': { 'suggestion': False,
-                              'suggestions': [False],
+        expected = { 'Cybertronian': { 'FALS': { 'suggestion': 'False',
+                              'suggestions': ['False'],
                               'suggestions_size': 1,
                               'total_count': 1},
-                    'TR': { 'suggestion': True,
-                            'suggestions': [True],
+                    'TR': { 'suggestion': 'True',
+                            'suggestions': ['True'],
                             'suggestions_size': 1,
                             'total_count': 5}},
-  'NullType': { 'NAN': { 'suggestion': None,
-                         'suggestions': [None],
+  'NullType': { 'NAN': { 'suggestion': 'None',
+                         'suggestions': ['None'],
                          'suggestions_size': 1,
                          'total_count': 6}},
-  'timestamp': { '2014-06-24': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                                 'suggestions': [ Timestamp('2014-06-24 00:00:00')],
+  'timestamp': { '2014-06-24': { 'suggestion': '2014-06-24',
+                                 'suggestions': ['2014-06-24'],
                                  'suggestions_size': 1,
                                  'total_count': 6}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_multiple_soundex(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['NullType', 'Cybertronian', 'timestamp'], algorithm='soundex')
-        expected = { 'Cybertronian': { 'F420': { 'suggestion': False,
-                              'suggestions': [False],
+        expected = { 'Cybertronian': { 'F420': { 'suggestion': 'False',
+                              'suggestions': ['False'],
                               'suggestions_size': 1,
                               'total_count': 1},
-                    'T600': { 'suggestion': True,
-                              'suggestions': [True],
+                    'T600': { 'suggestion': 'True',
+                              'suggestions': ['True'],
                               'suggestions_size': 1,
                               'total_count': 5}},
-  'NullType': { 'N500': { 'suggestion': None,
-                          'suggestions': [None],
+  'NullType': { 'N500': { 'suggestion': 'None',
+                          'suggestions': ['None'],
                           'suggestions_size': 1,
                           'total_count': 6}},
-  'timestamp': { '2000': { 'suggestion': Timestamp('2014-06-24 00:00:00'),
-                           'suggestions': [Timestamp('2014-06-24 00:00:00')],
+  'timestamp': { '2000': { 'suggestion': '2014-06-24',
+                           'suggestions': ['2014-06-24'],
                            'suggestions_size': 1,
                            'total_count': 6}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_numeric_double_metaphone(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['rank'], algorithm='double_metaphone')
-        expected = { 'rank': { ('', ''): { 'suggestion': 10,
-                        'suggestions': [10, 7, 8],
-                        'suggestions_size': 3,
-                        'total_count': 6}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_numeric_fingerprint(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['rank'], algorithm='fingerprint')
-        expected = { 'rank': { '10': { 'suggestion': 10,
-                    'suggestions': [10],
-                    'suggestions_size': 1,
-                    'total_count': 2},
-            '7': { 'suggestion': 7,
-                   'suggestions': [7],
-                   'suggestions_size': 1,
-                   'total_count': 2},
-            '8': { 'suggestion': 8,
-                   'suggestions': [8],
-                   'suggestions_size': 1,
-                   'total_count': 2}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_numeric_levenshtein(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['rank'], algorithm='levenshtein')
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
+
+    def test_string_clustering_numeric_match_rating_codex(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols=['rank'], algorithm='match_rating_codex')
         expected = { 'rank': { '10': { 'suggestion': '10',
                     'suggestions': ['10'],
                     'suggestions_size': 1,
-                    'total_count': 6},
+                    'total_count': 2},
             '7': { 'suggestion': '7',
                    'suggestions': ['7'],
                    'suggestions_size': 1,
-                   'total_count': 6},
+                   'total_count': 2},
             '8': { 'suggestion': '8',
                    'suggestions': ['8'],
                    'suggestions_size': 1,
-                   'total_count': 6}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
-
-    def test_string_clustering_numeric_match_rating_codex(self):
-        df = self.df
-        result = df.string_clustering(cols=['rank'], algorithm='match_rating_codex')
-        expected = { 'rank': { '10': { 'suggestion': 10,
-                    'suggestions': [10],
-                    'suggestions_size': 1,
-                    'total_count': 2},
-            '7': { 'suggestion': 7,
-                   'suggestions': [7],
-                   'suggestions_size': 1,
-                   'total_count': 2},
-            '8': { 'suggestion': 8,
-                   'suggestions': [8],
-                   'suggestions_size': 1,
                    'total_count': 2}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_numeric_metaphone(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['rank'], algorithm='metaphone')
-        expected = { 'rank': { '': { 'suggestion': 10,
-                  'suggestions': [10, 7, 8],
+        expected = { 'rank': { '': { 'suggestion': '10',
+                  'suggestions': ['10', '7', '8'],
                   'suggestions_size': 3,
                   'total_count': 6}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_numeric_ngram_fingerprint(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['rank'], algorithm='ngram_fingerprint')
-        expected = { 'rank': { '': { 'suggestion': 7,
-                  'suggestions': [7, 8],
-                  'suggestions_size': 2,
-                  'total_count': 4},
-            '10': { 'suggestion': 10,
-                    'suggestions': [10],
-                    'suggestions_size': 1,
-                    'total_count': 2}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_numeric_nysiis(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['rank'], algorithm='nysiis')
-        expected = { 'rank': { '10': { 'suggestion': 10,
-                    'suggestions': [10],
+        expected = { 'rank': { '10': { 'suggestion': '10',
+                    'suggestions': ['10'],
                     'suggestions_size': 1,
                     'total_count': 2},
-            '7': { 'suggestion': 7,
-                   'suggestions': [7],
+            '7': { 'suggestion': '7',
+                   'suggestions': ['7'],
                    'suggestions_size': 1,
                    'total_count': 2},
-            '8': { 'suggestion': 8,
-                   'suggestions': [8],
+            '8': { 'suggestion': '8',
+                   'suggestions': ['8'],
                    'suggestions_size': 1,
                    'total_count': 2}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_numeric_soundex(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['rank'], algorithm='soundex')
-        expected = { 'rank': { '1000': { 'suggestion': 10,
-                      'suggestions': [10],
+        expected = { 'rank': { '1000': { 'suggestion': '10',
+                      'suggestions': ['10'],
                       'suggestions_size': 1,
                       'total_count': 2},
-            '7000': { 'suggestion': 7,
-                      'suggestions': [7],
+            '7000': { 'suggestion': '7',
+                      'suggestions': ['7'],
                       'suggestions_size': 1,
                       'total_count': 2},
-            '8000': { 'suggestion': 8,
-                      'suggestions': [8],
+            '8000': { 'suggestion': '8',
+                      'suggestions': ['8'],
                       'suggestions_size': 1,
                       'total_count': 2}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
     def test_string_clustering_string_double_metaphone(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['names'], algorithm='double_metaphone')
-        expected = { 'names': { ('APTMS', ''): { 'suggestion': 'Optimus',
-                              'suggestions': ['Optimus'],
-                              'suggestions_size': 1,
-                              'total_count': 1},
-             ('ARNT', ''): { 'suggestion': 'ironhide&',
-                             'suggestions': ['ironhide&'],
-                             'suggestions_size': 1,
-                             'total_count': 1},
-             ('JS', 'AS'): { 'suggestion': 'Jazz',
-                             'suggestions': ['Jazz'],
-                             'suggestions_size': 1,
-                             'total_count': 1},
-             ('MKTRN', ''): { 'suggestion': 'Megatron',
-                              'suggestions': ['Megatron'],
-                              'suggestions_size': 1,
-                              'total_count': 1},
-             ('MTRPLKSKSKSKSKS', ''): { 'suggestion': 'Metroplex_)^$',
-                                        'suggestions': ['Metroplex_)^$'],
-                                        'suggestions_size': 1,
-                                        'total_count': 1},
-             ('PMPLLP', ''): { 'suggestion': 'bumbl#ebéé  ',
-                               'suggestions': ['bumbl#ebéé  '],
-                               'suggestions_size': 1,
-                               'total_count': 1}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_string_fingerprint(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['names'], algorithm='fingerprint')
-        expected = { 'names': { 'bumblebee': { 'suggestion': 'bumbl#ebéé  ',
-                            'suggestions': ['bumbl#ebéé  '],
-                            'suggestions_size': 1,
-                            'total_count': 1},
-             'ironhide': { 'suggestion': 'ironhide&',
-                           'suggestions': ['ironhide&'],
-                           'suggestions_size': 1,
-                           'total_count': 1},
-             'jazz': { 'suggestion': 'Jazz',
-                       'suggestions': ['Jazz'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'megatron': { 'suggestion': 'Megatron',
-                           'suggestions': ['Megatron'],
-                           'suggestions_size': 1,
-                           'total_count': 1},
-             'metroplex': { 'suggestion': 'Metroplex_)^$',
-                            'suggestions': ['Metroplex_)^$'],
-                            'suggestions_size': 1,
-                            'total_count': 1},
-             'optimus': { 'suggestion': 'Optimus',
-                          'suggestions': ['Optimus'],
-                          'suggestions_size': 1,
-                          'total_count': 1}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+        # The following value does not represent a correct output of the operation
+        expected = self.dict
+        self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
     def test_string_clustering_string_levenshtein(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['names'], algorithm='levenshtein')
-        expected = { 'names': { 'bumblebee': { 'suggestion': 'bumblebee',
-                            'suggestions': ['bumblebee', 'ironhide'],
-                            'suggestions_size': 2,
-                            'total_count': 6},
-             'ironhide': { 'suggestion': 'ironhide',
-                           'suggestions': ['ironhide', 'optimus'],
-                           'suggestions_size': 2,
-                           'total_count': 6},
-             'jazz': { 'suggestion': 'jazz',
-                       'suggestions': ['jazz', 'optimus'],
-                       'suggestions_size': 2,
-                       'total_count': 6},
-             'megatron': { 'suggestion': 'megatron',
-                           'suggestions': ['megatron', 'metroplex'],
-                           'suggestions_size': 2,
-                           'total_count': 6},
-             'metroplex': { 'suggestion': 'metroplex',
-                            'suggestions': ['metroplex', 'megatron'],
-                            'suggestions_size': 2,
-                            'total_count': 6},
-             'optimus': { 'suggestion': 'optimus',
-                          'suggestions': ['optimus', 'ironhide'],
-                          'suggestions_size': 2,
-                          'total_count': 6}}}
-        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
-
-    def test_string_clustering_string_match_rating_codex(self):
-        df = self.df
-        result = df.string_clustering(cols=['names'], algorithm='match_rating_codex')
         # The following value does not represent a correct output of the operation
         expected = self.dict
         self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
-    def test_string_clustering_string_metaphone(self):
-        df = self.df
-        result = df.string_clustering(cols=['names'], algorithm='metaphone')
-        expected = { 'names': { 'BMBLB ': { 'suggestion': 'bumbl#ebéé  ',
+    def test_string_clustering_string_match_rating_codex(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols=['names'], algorithm='match_rating_codex')
+        expected = { 'names': { 'BMB#BÉ': { 'suggestion': 'bumbl#ebéé  ',
                          'suggestions': ['bumbl#ebéé  '],
                          'suggestions_size': 1,
                          'total_count': 1},
-             'IRNHT': { 'suggestion': 'ironhide&',
-                        'suggestions': ['ironhide&'],
+             'BMBLB': { 'suggestion': 'bumblebee',
+                        'suggestions': ['bumblebee'],
                         'suggestions_size': 1,
                         'total_count': 1},
-             'JS': { 'suggestion': 'Jazz',
-                     'suggestions': ['Jazz'],
-                     'suggestions_size': 1,
-                     'total_count': 1},
-             'MKTRN': { 'suggestion': 'Megatron',
-                        'suggestions': ['Megatron'],
-                        'suggestions_size': 1,
-                        'total_count': 1},
-             'MTRPLKS': { 'suggestion': 'Metroplex_)^$',
-                          'suggestions': ['Metroplex_)^$'],
-                          'suggestions_size': 1,
-                          'total_count': 1},
+             'MTR)^$': { 'suggestion': 'Metroplex_)^$',
+                         'suggestions': ['Metroplex_)^$'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+             'MTRPLX': { 'suggestion': 'Metroplex',
+                         'suggestions': ['Metroplex'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
+             'MÉTL-X': { 'suggestion': 'métrop´le-x',
+                         'suggestions': ['métrop´le-x'],
+                         'suggestions_size': 1,
+                         'total_count': 1},
              'OPTMS': { 'suggestion': 'Optimus',
                         'suggestions': ['Optimus'],
                         'suggestions_size': 1,
                         'total_count': 1}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
-    def test_string_clustering_string_ngram_fingerprint(self):
-        df = self.df
-        result = df.string_clustering(cols=['names'], algorithm='ngram_fingerprint')
-        expected = { 'names': { 'ateggameonrotr': { 'suggestion': 'Megatron',
-                                 'suggestions': ['Megatron'],
-                                 'suggestions_size': 1,
-                                 'total_count': 1},
-             'azjazz': { 'suggestion': 'Jazz',
-                         'suggestions': ['Jazz'],
+    def test_string_clustering_string_metaphone(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols=['names'], algorithm='metaphone')
+        expected = { 'names': { 'BMBLB': { 'suggestion': 'bumblebee',
+                        'suggestions': ['bumblebee'],
+                        'suggestions_size': 1,
+                        'total_count': 1},
+             'BMBLB ': { 'suggestion': 'bumbl#ebéé  ',
+                         'suggestions': ['bumbl#ebéé  '],
                          'suggestions_size': 1,
                          'total_count': 1},
-             'beblbuebeelembum': { 'suggestion': 'bumbl#ebéé  ',
-                                   'suggestions': ['bumbl#ebéé  '],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-             'dehiidirnhonro': { 'suggestion': 'ironhide&',
-                                 'suggestions': ['ironhide&'],
-                                 'suggestions_size': 1,
-                                 'total_count': 1},
-             'etexlemeopplrotr': { 'suggestion': 'Metroplex_)^$',
-                                   'suggestions': ['Metroplex_)^$'],
-                                   'suggestions_size': 1,
-                                   'total_count': 1},
-             'immuoppttius': { 'suggestion': 'Optimus',
-                               'suggestions': ['Optimus'],
-                               'suggestions_size': 1,
-                               'total_count': 1}}}
+             'MTRP LKS': { 'suggestion': 'métrop´le-x',
+                           'suggestions': ['métrop´le-x'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'MTRPLKS': { 'suggestion': 'Metroplex',
+                          'suggestions': ['Metroplex', 'Metroplex_)^$'],
+                          'suggestions_size': 2,
+                          'total_count': 2},
+             'OPTMS': { 'suggestion': 'Optimus',
+                        'suggestions': ['Optimus'],
+                        'suggestions_size': 1,
+                        'total_count': 1}}}
         self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
 
-    def test_string_clustering_string_nysiis(self):
-        df = self.df
-        result = df.string_clustering(cols=['names'], algorithm='nysiis')
+    def test_string_clustering_string_ngram_fingerprint(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols=['names'], algorithm='ngram_fingerprint')
         # The following value does not represent a correct output of the operation
         expected = self.dict
         self.assertTrue(result.equals(expected, decimal=True, assertion=True))
 
+    def test_string_clustering_string_nysiis(self):
+        df = self.df.copy()
+        result = df.string_clustering(cols=['names'], algorithm='nysiis')
+        expected = { 'names': { 'BANBL#ABÉ': { 'suggestion': 'bumbl#ebéé  ',
+                            'suggestions': ['bumbl#ebéé  '],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+             'BANBLABY': { 'suggestion': 'bumblebee',
+                           'suggestions': ['bumblebee'],
+                           'suggestions_size': 1,
+                           'total_count': 1},
+             'MATRAPLAX': { 'suggestion': 'Metroplex',
+                            'suggestions': ['Metroplex'],
+                            'suggestions_size': 1,
+                            'total_count': 1},
+             'MATRAPLAX_)^$': { 'suggestion': 'Metroplex_)^$',
+                                'suggestions': ['Metroplex_)^$'],
+                                'suggestions_size': 1,
+                                'total_count': 1},
+             'MÉTRAP´LA-X': { 'suggestion': 'métrop´le-x',
+                              'suggestions': ['métrop´le-x'],
+                              'suggestions_size': 1,
+                              'total_count': 1},
+             'OPTAN': { 'suggestion': 'Optimus',
+                        'suggestions': ['Optimus'],
+                        'suggestions_size': 1,
+                        'total_count': 1}}}
+        self.assertTrue(results_equal(result, expected, decimal=5, assertion=True))
+
     def test_string_clustering_string_soundex(self):
-        df = self.df
+        df = self.df.copy()
         result = df.string_clustering(cols=['names'], algorithm='soundex')
         expected = { 'names': { 'B514': { 'suggestion': 'bumbl#ebéé  ',
-                       'suggestions': ['bumbl#ebéé  '],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'I653': { 'suggestion': 'ironhide&',
-                       'suggestions': ['ironhide&'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'J200': { 'suggestion': 'Jazz',
-                       'suggestions': ['Jazz'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'M236': { 'suggestion': 'Megatron',
-                       'suggestions': ['Megatron'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
-             'M361': { 'suggestion': 'Metroplex_)^$',
-                       'suggestions': ['Metroplex_)^$'],
-                       'suggestions_size': 1,
-                       'total_count': 1},
+                       'suggestions': ['bumbl#ebéé  ', 'bumblebee'],
+                       'suggestions_size': 2,
+                       'total_count': 2},
+             'M361': { 'suggestion': 'Metroplex',
+                       'suggestions': [ 'Metroplex',
+                                        'métrop´le-x',
+                                        'Metroplex_)^$'],
+                       'suggestions_size': 3,
+                       'total_count': 3},
              'O135': { 'suggestion': 'Optimus',
                        'suggestions': ['Optimus'],
                        'suggestions_size': 1,