From 21ba8986318d2e4c6ac7a2134435812a7e78b8f2 Mon Sep 17 00:00:00 2001 From: Advik Kabra <64316822+advikkabra@users.noreply.github.com> Date: Mon, 19 Feb 2024 05:24:02 +0530 Subject: [PATCH] Refactor string attributes and add constant string implementations (#2524) --- integration_tests/test_str_01.py | 36 ---------- integration_tests/test_str_attributes.py | 50 ++++++++++++++ src/lpython/semantics/python_ast_to_asr.cpp | 75 +++++++++++++++------ src/runtime/lpython_builtin.py | 4 +- 4 files changed, 104 insertions(+), 61 deletions(-) diff --git a/integration_tests/test_str_01.py b/integration_tests/test_str_01.py index be31827fac..1d65fa9efc 100644 --- a/integration_tests/test_str_01.py +++ b/integration_tests/test_str_01.py @@ -37,23 +37,6 @@ def test_str_slice(): # TODO: # assert a[0:5:-1] == "" -def test_str_isalpha(): - a: str = "helloworld" - b: str = "hj kl" - c: str = "a12(){}A" - d: str = " " - e: str = "" - res: bool = a.isalpha() - res2: bool = b.isalpha() - res3: bool = c.isalpha() - res4: bool = d.isalpha() - res5: bool = e.isalpha() - assert res == True - assert res2 == False - assert res3 == False - assert res4 == False - assert res5 == False - def test_str_title(): a: str = "hello world" @@ -69,23 +52,6 @@ def test_str_title(): assert res3 == "Hello World" assert res4 == "{Hel1O}World" -def test_str_istitle(): - a: str = "Hello World" - b: str = "Hj'kl" - c: str = "hELlo wOrlD" - d: str = " Hello" - e: str = " " - res: bool = a.istitle() - res2: bool = b.istitle() - res3: bool = c.istitle() - res4: bool = d.istitle() - res5: bool = e.istitle() - assert res == True - assert res2 == False - assert res3 == False - assert res4 == True - assert res5 == False - def test_str_repeat(): a: str a = "Xyz" @@ -160,8 +126,6 @@ def check(): test_str_join_empty_list() test_constant_str_subscript() test_str_title() - test_str_istitle() - test_str_isalpha() test_str_split() check() diff --git a/integration_tests/test_str_attributes.py b/integration_tests/test_str_attributes.py index f19c7a1712..fe522434f5 100755 --- a/integration_tests/test_str_attributes.py +++ b/integration_tests/test_str_attributes.py @@ -306,6 +306,53 @@ def is_ascii(): assert s.isascii() == True +def is_alpha(): + a: str = "helloworld" + b: str = "hj kl" + c: str = "a12(){}A" + d: str = " " + e: str = "" + res: bool = a.isalpha() + res2: bool = b.isalpha() + res3: bool = c.isalpha() + res4: bool = d.isalpha() + res5: bool = e.isalpha() + assert res == True + assert res2 == False + assert res3 == False + assert res4 == False + assert res5 == False + + assert "helloworld".isalpha() == True + assert "hj kl".isalpha() == False + assert "a12(){}A".isalpha() == False + assert " ".isalpha() == False + assert "".isalpha() == False + + +def is_title(): + a: str = "Hello World" + b: str = "Hj'kl" + c: str = "hELlo wOrlD" + d: str = " Hello" + e: str = " " + res: bool = a.istitle() + res2: bool = b.istitle() + res3: bool = c.istitle() + res4: bool = d.istitle() + res5: bool = e.istitle() + assert res == True + assert res2 == False + assert res3 == False + assert res4 == True + assert res5 == False + + assert "Hello World".istitle() == True + assert "Hj'kl".istitle() == False + assert "hELlo wOrlD".istitle() == False + assert " Hello".istitle() == True + assert " ".istitle() == False + def is_space(): assert "\n".isspace() == True assert " ".isspace() == True @@ -320,6 +367,7 @@ def is_space(): assert s.isspace() == False + def check(): capitalize() lower() @@ -335,6 +383,8 @@ def check(): is_upper() is_decimal() is_ascii() + is_alpha() + is_title() is_space() diff --git a/src/lpython/semantics/python_ast_to_asr.cpp b/src/lpython/semantics/python_ast_to_asr.cpp index 6c3fe7ea9d..4e11fe01ed 100644 --- a/src/lpython/semantics/python_ast_to_asr.cpp +++ b/src/lpython/semantics/python_ast_to_asr.cpp @@ -6577,26 +6577,6 @@ class BodyVisitor : public CommonVisitor { arg.loc = loc; arg.m_value = s_var; fn_args.push_back(al, arg); - } else if (attr_name == "isalpha") { - if (args.size() != 0) { - throw SemanticError("str.isalpha() takes no arguments", - loc); - } - fn_call_name = "_lpython_str_isalpha"; - ASR::call_arg_t arg; - arg.loc = loc; - arg.m_value = s_var; - fn_args.push_back(al, arg); - } else if (attr_name == "istitle") { - if (args.size() != 0) { - throw SemanticError("str.istitle() takes no arguments", - loc); - } - fn_call_name = "_lpython_str_istitle"; - ASR::call_arg_t arg; - arg.loc = loc; - arg.m_value = s_var; - fn_args.push_back(al, arg); } else if (attr_name == "title") { if (args.size() != 0) { throw SemanticError("str.title() takes no arguments", @@ -6813,7 +6793,7 @@ class BodyVisitor : public CommonVisitor { /* String Validation Methods i.e all "is" based functions are handled here */ - std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space"}; // Database of validation methods supported + std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha", "title"}; // Database of validation methods supported std::string method_name = attr_name.substr(2); if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) { @@ -7116,7 +7096,7 @@ class BodyVisitor : public CommonVisitor { * islower() method is limited to English Alphabets currently * TODO: We can support other characters from Unicode Library */ - std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space"}; // Database of validation methods supported + std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha", "title"}; // Database of validation methods supported std::string method_name = attr_name.substr(2); if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) { throw SemanticError("String method not implemented: " + attr_name, loc); @@ -7214,6 +7194,57 @@ we will have to use something else. tmp = ASR::make_LogicalConstant_t(al, loc, is_space, ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4))); return; + } else if (attr_name == "isalpha") { + /* + * Specification - + Return True if all characters in the string are alphabets, + and there is at least one character in the string. + */ + bool is_alpha = (s_var.size() != 0); + for (auto &i : s_var) { + if (!((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z'))) { + is_alpha = false; + break; + } + } + tmp = ASR::make_LogicalConstant_t(al, loc, is_alpha, + ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4))); + return; + } else if (attr_name == "istitle") { + /* + * Specification - + Returns True if all words in the string are in title case, + and there is at least one character in the string. + */ + bool is_title = (s_var.size() != 0); + + bool in_word = false; // Represents if we are in a word or not + bool is_alpha_present = false; + for (auto &i : s_var) { + if (i >= 'A' && i <= 'Z') { + is_alpha_present = true; + if (in_word) { + // We have come across an uppercase character in the middle of a word + is_title = false; + break; + } else { + in_word = true; + } + } else if (i >= 'a' && i <= 'z') { + is_alpha_present = true; + if (!in_word) { + //We have come across a lowercase character at the start of a word + is_title = false; + break; + } + } else { + in_word = false; + } + } + is_title = is_title && is_alpha_present; + tmp = ASR::make_LogicalConstant_t(al, loc, is_title, + ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4))); + return; } else { throw SemanticError("'str' object has no attribute '" + attr_name + "'", loc); } diff --git a/src/runtime/lpython_builtin.py b/src/runtime/lpython_builtin.py index 6bf35acbd3..843624d011 100644 --- a/src/runtime/lpython_builtin.py +++ b/src/runtime/lpython_builtin.py @@ -764,9 +764,7 @@ def _lpython_str_istitle(s: str) -> bool: ch: str only_whitespace: bool = True for ch in s: - if (ch == ' ' or ch == '\t' or ch == '\n') and word_start: - continue # Found a space character at the start of a word - elif ch.isalpha() and (ord('A') <= ord(ch) and ord(ch) <= ord('Z')): + if ch.isalpha() and (ord('A') <= ord(ch) and ord(ch) <= ord('Z')): only_whitespace = False if word_start: word_start = False