Merge pull request #39 from Code4GovTech/api_markdown_fix

testcases & markdown handler changes
Code4GovTech · Jul 24, 2024 · c385f15 · c385f15
2 parents 47550b2 + ca8b54a
commit c385f15
Show file tree

Hide file tree

Showing 2 changed files with 167 additions and 1 deletion.
diff --git a/tests.py b/tests.py
@@ -0,0 +1,122 @@
+import unittest
+from v2_utils import remove_unmatched_tags
+from app import app
+import json,random
+
+
+class CustomTestResult(unittest.TextTestResult):
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        print(f"{test._testMethodName} - passed")
+
+
+class CustomTestRunner(unittest.TextTestRunner):
+    resultclass = CustomTestResult
+
+
+class TestRemoveUnmatchedTags(unittest.TestCase):
+    """
+    Static test case input & output for check markdown handler function
+    """
+    def test_remove_unmatched_tags_basic(self):
+        input_text = "<div>Test content</p></div>"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_opening(self):
+        input_text = "<div>Test content"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_closing(self):
+        input_text = "<div><span><p>Test content</div>"
+        expected_output = "<div><span><p>Test content</p></span></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_nested_tags(self):
+        input_text = "<div><p>Test content</p></p></div>"
+        expected_output = "<div><p>Test content</p></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_nested_opening(self):
+        input_text = "<div><p>Test content</div>"
+        expected_output = "<div><p>Test content</p></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_nested_closing(self):
+        input_text = "<div>Test content</p></div>"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_multiple_unmatched_tags(self):
+        input_text = "<div>Test</div><p>Content</p><span>Here"
+        expected_output = "<div>Test</div><p>Content</p><span>Here</span>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_text_with_no_tags(self):
+        input_text = "Plain text with no tags"
+        expected_output = "Plain text with no tags"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_empty_string(self):
+        input_text = ""
+        expected_output = ""
+        self.assertEqual(len(remove_unmatched_tags(input_text)),len(expected_output))
+
+
+class TestIssuesEndpoints(unittest.TestCase):
+
+    def setUp(self):
+        self.app = app.test_client()
+        self.app.testing = True
+        self.issues_data = None  # To store issues data for use in subsequent tests
+
+        # Fetch issues data during setup
+        self._fetch_issues_data()
+
+    def _fetch_issues_data(self):
+        # Validate the /issues endpoint and store the issues data
+        response = self.app.get('/issues')
+        self.assertEqual(response.status_code, 200)
+
+        data = json.loads(response.data)
+        self.issues_data = data.get('issues', [])
+        self.assertTrue(len(self.issues_data) > 0, "No issues found in response")
+
+    def test_get_issues_success(self):
+        # Check if issues data is correctly fetched
+        self.assertIsNotNone(self.issues_data, "Issues data is not populated")
+
+    def test_get_issues_detail_success(self):
+        # Ensure the /issues endpoint was successfully called and issues data is available
+        if not self.issues_data:
+            self.skipTest("Skipping detail test as /issues endpoint did not return data")
+
+        # Use first data from /issues response to form the endpoint URL
+
+        index = random.randrange(1,len(self.issues_data)-1)
+        sample_issue = self.issues_data[index]['issues'][0]
+        issue_id = sample_issue['id']
+        orgname = self.issues_data[index]['org_name']
+
+        endpoint = f'/v2/issues/{orgname}/{issue_id}'
+
+        response = self.app.get(endpoint)
+        self.assertEqual(response.status_code, 200)
+
+    def test_get_repo_detail_success(self):
+        # Ensure the /issues endpoint was successfully called and issues data is available
+        if not self.issues_data:
+            self.skipTest("Skipping detail test as /issues endpoint did not return data")
+
+        # Use first data from /issues response to form the endpoint URL
+        index = random.randrange(1,len(self.issues_data)-1)
+        orgname = self.issues_data[index]['org_name']
+        endpoint = f'/issues/{orgname}'        
+        response = self.app.get(endpoint)
+        self.assertEqual(response.status_code, 200)
+
+
+
+if __name__ == '__main__':
+    unittest.main(testRunner=CustomTestRunner())
diff --git a/v2_utils.py b/v2_utils.py
@@ -27,15 +27,53 @@ def define_link_data(usernames):
         logging.info(f"{e}---define_link_data")
         return []
 
+def preprocess_nested_tags(text):
+    try:        
+        segments = re.split(r'(<[^>]+>)', text)
+        tag_stack = []
+        corrected_segments = []
+
+        for segment in segments:
+            if re.match(r'<[^/][^>]*>', segment):  # Opening tag
+                tag_stack.append(segment)
+                corrected_segments.append(segment)
+            elif re.match(r'</[^>]+>', segment):  # Closing tag
+                if tag_stack and tag_stack[-1][1:].split()[0] == segment[2:].split()[0]:
+                    tag_stack.pop()
+                    corrected_segments.append(segment)
+                else:
+                    continue  # Ignore unmatched closing tag
+            else:
+                corrected_segments.append(segment)
+
+        while tag_stack:
+            open_tag = tag_stack.pop()
+            tag_name = re.match(r'<([^ ]+)', open_tag).group(1)
+            corrected_segments.append(f'</{tag_name}>')
+
+        return ''.join(corrected_segments)
+
+    except Exception as e:
+        print(e,"error in preprocess_nested_tags function")
+        return text
+
+
 
 def remove_unmatched_tags(text):
     try:
+        # Preprocess text to handle unmatched nested tags
+        text = preprocess_nested_tags(text)
+
         # Remove unmatched closing tags at the beginning of the string
         text = re.sub(r'^\s*</[^>]+>\s*', '', text)
         # Regex pattern to find matched or unmatched tags
         pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL)
         matches = pattern.findall(text)
 
+        #If get text without html tags
+        if matches == []:
+            return text
+
         cleaned_text = ''
         open_tags = []
 
@@ -55,12 +93,18 @@ def remove_unmatched_tags(text):
             tag = open_tags.pop()
             cleaned_text += f'</{tag}>'
 
+        # Remove extra unmatched angle brackets
+        cleaned_text = re.sub(r'>+', '>', cleaned_text)
+        cleaned_text = re.sub(r'<+', '<', cleaned_text)
+
         return cleaned_text
 
     except Exception as e:
         print(e)
         return text
-
+
+
+
 
 
 def week_data_formatter(html_content, type):