Skip to content

Commit

Permalink
Merge pull request #39 from Code4GovTech/api_markdown_fix
Browse files Browse the repository at this point in the history
testcases & markdown handler changes
  • Loading branch information
karntrehan authored Jul 24, 2024
2 parents 47550b2 + ca8b54a commit c385f15
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 1 deletion.
122 changes: 122 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import unittest
from v2_utils import remove_unmatched_tags
from app import app
import json,random


class CustomTestResult(unittest.TextTestResult):
def addSuccess(self, test):
super().addSuccess(test)
print(f"{test._testMethodName} - passed")


class CustomTestRunner(unittest.TextTestRunner):
resultclass = CustomTestResult


class TestRemoveUnmatchedTags(unittest.TestCase):
"""
Static test case input & output for check markdown handler function
"""
def test_remove_unmatched_tags_basic(self):
input_text = "<div>Test content</p></div>"
expected_output = "<div>Test content</div>"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_unmatched_opening(self):
input_text = "<div>Test content"
expected_output = "<div>Test content</div>"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_unmatched_closing(self):
input_text = "<div><span><p>Test content</div>"
expected_output = "<div><span><p>Test content</p></span></div>"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_nested_tags(self):
input_text = "<div><p>Test content</p></p></div>"
expected_output = "<div><p>Test content</p></div>"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_unmatched_nested_opening(self):
input_text = "<div><p>Test content</div>"
expected_output = "<div><p>Test content</p></div>"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_unmatched_nested_closing(self):
input_text = "<div>Test content</p></div>"
expected_output = "<div>Test content</div>"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_multiple_unmatched_tags(self):
input_text = "<div>Test</div><p>Content</p><span>Here"
expected_output = "<div>Test</div><p>Content</p><span>Here</span>"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_text_with_no_tags(self):
input_text = "Plain text with no tags"
expected_output = "Plain text with no tags"
self.assertEqual(remove_unmatched_tags(input_text), expected_output)

def test_remove_unmatched_tags_empty_string(self):
input_text = ""
expected_output = ""
self.assertEqual(len(remove_unmatched_tags(input_text)),len(expected_output))


class TestIssuesEndpoints(unittest.TestCase):

def setUp(self):
self.app = app.test_client()
self.app.testing = True
self.issues_data = None # To store issues data for use in subsequent tests

# Fetch issues data during setup
self._fetch_issues_data()

def _fetch_issues_data(self):
# Validate the /issues endpoint and store the issues data
response = self.app.get('/issues')
self.assertEqual(response.status_code, 200)

data = json.loads(response.data)
self.issues_data = data.get('issues', [])
self.assertTrue(len(self.issues_data) > 0, "No issues found in response")

def test_get_issues_success(self):
# Check if issues data is correctly fetched
self.assertIsNotNone(self.issues_data, "Issues data is not populated")

def test_get_issues_detail_success(self):
# Ensure the /issues endpoint was successfully called and issues data is available
if not self.issues_data:
self.skipTest("Skipping detail test as /issues endpoint did not return data")

# Use first data from /issues response to form the endpoint URL

index = random.randrange(1,len(self.issues_data)-1)
sample_issue = self.issues_data[index]['issues'][0]
issue_id = sample_issue['id']
orgname = self.issues_data[index]['org_name']

endpoint = f'/v2/issues/{orgname}/{issue_id}'

response = self.app.get(endpoint)
self.assertEqual(response.status_code, 200)

def test_get_repo_detail_success(self):
# Ensure the /issues endpoint was successfully called and issues data is available
if not self.issues_data:
self.skipTest("Skipping detail test as /issues endpoint did not return data")

# Use first data from /issues response to form the endpoint URL
index = random.randrange(1,len(self.issues_data)-1)
orgname = self.issues_data[index]['org_name']
endpoint = f'/issues/{orgname}'
response = self.app.get(endpoint)
self.assertEqual(response.status_code, 200)



if __name__ == '__main__':
unittest.main(testRunner=CustomTestRunner())
46 changes: 45 additions & 1 deletion v2_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,53 @@ def define_link_data(usernames):
logging.info(f"{e}---define_link_data")
return []

def preprocess_nested_tags(text):
try:
segments = re.split(r'(<[^>]+>)', text)
tag_stack = []
corrected_segments = []

for segment in segments:
if re.match(r'<[^/][^>]*>', segment): # Opening tag
tag_stack.append(segment)
corrected_segments.append(segment)
elif re.match(r'</[^>]+>', segment): # Closing tag
if tag_stack and tag_stack[-1][1:].split()[0] == segment[2:].split()[0]:
tag_stack.pop()
corrected_segments.append(segment)
else:
continue # Ignore unmatched closing tag
else:
corrected_segments.append(segment)

while tag_stack:
open_tag = tag_stack.pop()
tag_name = re.match(r'<([^ ]+)', open_tag).group(1)
corrected_segments.append(f'</{tag_name}>')

return ''.join(corrected_segments)

except Exception as e:
print(e,"error in preprocess_nested_tags function")
return text



def remove_unmatched_tags(text):
try:
# Preprocess text to handle unmatched nested tags
text = preprocess_nested_tags(text)

# Remove unmatched closing tags at the beginning of the string
text = re.sub(r'^\s*</[^>]+>\s*', '', text)
# Regex pattern to find matched or unmatched tags
pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL)
matches = pattern.findall(text)

#If get text without html tags
if matches == []:
return text

cleaned_text = ''
open_tags = []

Expand All @@ -55,12 +93,18 @@ def remove_unmatched_tags(text):
tag = open_tags.pop()
cleaned_text += f'</{tag}>'

# Remove extra unmatched angle brackets
cleaned_text = re.sub(r'>+', '>', cleaned_text)
cleaned_text = re.sub(r'<+', '<', cleaned_text)

return cleaned_text

except Exception as e:
print(e)
return text






def week_data_formatter(html_content, type):
Expand Down

0 comments on commit c385f15

Please sign in to comment.