|
1 |
| -import numpy as np |
2 | 1 | import pandas as pd
|
3 | 2 | import pytest
|
4 | 3 |
|
5 | 4 | from utils.constants import BASE_FILEPATH
|
6 |
| -from utils.linkage import ( |
7 |
| - calculate_row_similarity, |
8 |
| - calculate_string_similarity, |
9 |
| - deduplicate_perfect_matches, |
10 |
| - row_matches, |
11 |
| -) |
| 5 | +from utils.linkage import deduplicate_perfect_matches |
12 | 6 |
|
13 | 7 | """
|
14 | 8 | Module for testing functions in linkage.py
|
15 | 9 | """
|
16 | 10 |
|
17 |
| -# Creating a test for calculate_row_similarity and row_matches |
18 |
| - |
19 |
| -# to put in data: |
20 |
| -d = { |
21 |
| - "name": ["bob von rosevich", "anantarya smith", "bob j vonrosevich"], |
22 |
| - "address": [ |
23 |
| - "3 Blue Drive, Chicago", |
24 |
| - "4 Blue Drive, Chicago", |
25 |
| - "8 Fancy Way, Chicago", |
26 |
| - ], |
27 |
| -} |
28 |
| - |
29 |
| -test_df = pd.DataFrame(data=d) |
30 |
| - |
31 |
| - |
32 |
| -@pytest.fixture |
33 |
| -def row_similarity_scen_1(): |
34 |
| - return test_df |
35 |
| - |
36 |
| - |
37 |
| -@pytest.fixture |
38 |
| -def row_similarity_scen_2(): |
39 |
| - return test_df |
40 |
| - |
41 |
| - |
42 |
| -def test_row_similarity_scen_1(row_similarity_scen_1): |
43 |
| - wrong = calculate_row_similarity( |
44 |
| - row_similarity_scen_1.iloc[[0]], |
45 |
| - row_similarity_scen_1.iloc[[1]], |
46 |
| - np.array([0.8, 0.2]), |
47 |
| - calculate_string_similarity, |
48 |
| - ) |
49 |
| - right = calculate_row_similarity( |
50 |
| - row_similarity_scen_1.iloc[[0]], |
51 |
| - row_similarity_scen_1.iloc[[2]], |
52 |
| - np.array([0.8, 0.2]), |
53 |
| - calculate_string_similarity, |
54 |
| - ) |
55 |
| - |
56 |
| - assert right > wrong |
57 |
| - |
58 |
| - |
59 |
| -def test_row_similarity_scen_2(row_similarity_scen_2): |
60 |
| - wrong = calculate_row_similarity( |
61 |
| - row_similarity_scen_2.iloc[[0]], |
62 |
| - row_similarity_scen_2.iloc[[1]], |
63 |
| - np.array([0.2, 0.8]), |
64 |
| - calculate_string_similarity, |
65 |
| - ) |
66 |
| - right = calculate_row_similarity( |
67 |
| - row_similarity_scen_2.iloc[[0]], |
68 |
| - row_similarity_scen_2.iloc[[2]], |
69 |
| - np.array([0.2, 0.8]), |
70 |
| - calculate_string_similarity, |
71 |
| - ) |
72 |
| - |
73 |
| - assert right < wrong |
74 |
| - |
75 |
| - |
76 |
| -d2 = { |
77 |
| - "name": [ |
78 |
| - "bob von rosevich", |
79 |
| - "anantarya smith", |
80 |
| - "bob j vonrosevich", |
81 |
| - "missy elliot", |
82 |
| - "mr johnson", |
83 |
| - "quarantin directino", |
84 |
| - "missy eliot", |
85 |
| - "joseph johnson", |
86 |
| - ], |
87 |
| - "address": [ |
88 |
| - "3 Blue Drive, Chicago", |
89 |
| - "4 Blue Drive, Chicago", |
90 |
| - "8 Fancy Way, Chicago", |
91 |
| - "8 Fancy Way, Evanston", |
92 |
| - "17 Regular Road, Chicago", |
93 |
| - "42 Hollywood Boulevard, Chicago", |
94 |
| - "8 Fancy Way, Evanston", |
95 |
| - "17 Regular Road, Chicago", |
96 |
| - ], |
97 |
| -} |
98 |
| -test_df2 = pd.DataFrame(data=d2) |
99 |
| - |
100 |
| - |
101 |
| -@pytest.fixture |
102 |
| -def row_match_scen1(): |
103 |
| - return test_df2 |
104 |
| - |
105 |
| - |
106 |
| -def test_row_matches(row_match_scen1): |
107 |
| - res = row_matches( |
108 |
| - row_match_scen1, np.array([0.8, 0.2]), 0.9, calculate_string_similarity |
109 |
| - ) |
110 |
| - |
111 |
| - assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} |
112 |
| - |
113 | 11 |
|
114 | 12 | # Test for dedupe function
|
115 | 13 | @pytest.fixture
|
|
0 commit comments