Skip to content

Commit

Permalink
o'clock fix (#13)
Browse files Browse the repository at this point in the history
* Implement extra function to fix and enhance o'clock behaviour, 9 tests failing

* Bug fix to solve 9 previous test failures.

* Implement function to convert word numbers to digit numbers + add tests

* Modify default value + add tests to improve coverage
  • Loading branch information
umerhasan17 authored Feb 20, 2020
1 parent 1393a66 commit 6a601be
Showing 1 changed file with 54 additions and 3 deletions.
57 changes: 54 additions & 3 deletions timefhuman/categorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def categorize(tokens, now):
[7/17/2018, 3 pm, '-', 7/19/2018, 2 pm]
"""
tokens = list(tokens)
tokens = convert_words_to_numbers(tokens)
tokens = convert_day_of_week(tokens, now)
tokens = convert_relative_days(tokens, now)
tokens = convert_time_of_day(tokens)
Expand All @@ -37,6 +38,24 @@ def categorize(tokens, now):
return tokens


# TODO: add conversions for thirty, fourty-five
# TODO: maybe set default for seven o'clock to 7 pm not am?
def convert_words_to_numbers(tokens):
"""
Converts numbers in word format into number format
>>> convert_words_to_numbers(['five', "o'clock"])
['5', "o'clock"]
>>> convert_words_to_numbers(['seven', "o'clock"])
['7', "o'clock"]
"""
number_words = ["zero", "one", "two", "three", "four", "five", "six",
"seven", "eight", "nine", "ten", "eleven", "twelve"]
for index, token in enumerate(tokens):
if token.lower() in number_words:
tokens[index] = str(number_words.index(token.lower()))
return tokens


# TODO: "monday next week"
def convert_day_of_week(tokens, now=datetime.datetime.now()):
"""Convert day-of-week vernacular into date-like string.
Expand Down Expand Up @@ -90,7 +109,6 @@ def convert_relative_days(tokens, now=datetime.datetime.now()):
return tokens



# TODO: convert to new token-based system
def extract_weeks_offset(tokens, end=None, key_tokens=(
'next', 'previous', 'last', 'upcoming', 'past', 'prev')):
Expand Down Expand Up @@ -316,6 +334,35 @@ def extract_hour_minute(string, time_of_day=None):
return TimeToken(relative_hour=hour, minute=minute, time_of_day=time_of_day)


def extract_hour_minute_token(tokens, time_of_day=None):
"""
Attempt to extract the exact token which contains the hour and minute and convert it into a number.
This will either be 1 before or 2 before the am/pm token.
12:00 is the default token to prevent failure
Tests for this helper function are included in maybe_substitute_hour_minute
>>> extract_hour_minute_token(["3", "o'clock"])
-2, 3
>>> extract_hour_minute_token(["Gibberish", "twice"])
-1, 12
>>> extract_hour_minute_token(["only one value"])
-1 12
"""

# look at previous n tokens
n = 2
for i in range(1, n+1):
try:
return -i, extract_hour_minute(tokens[-i], time_of_day)
# if nothing is returned from extract_hour_minute
except ValueError:
pass
# if the tokens list is only of length 1
except IndexError:
pass
# default return value
return -1, 12


def maybe_substitute_hour_minute(tokens):
"""Attempt to extract hour and minute.
Expand All @@ -340,15 +387,19 @@ def maybe_substitute_hour_minute(tokens):
['7/17/18', 3 pm]
>>> maybe_substitute_hour_minute(['3', 'p.m.', '-', '4', 'p.m.'])
[3 pm, '-', 4 pm]
>>> maybe_substitute_hour_minute(['5', "o'clock", 'pm'])
[5 pm]
>>> maybe_substitute_hour_minute(['12', "o'clock", 'pm'])
[12 pm]
"""
remove_dots = lambda token: token.replace('.', '')
temp_tokens = clean_tokens(tokens, remove_dots)

for time_of_day in ('am', 'pm'):
while time_of_day in temp_tokens:
index = temp_tokens.index(time_of_day)
time_token = extract_hour_minute(temp_tokens[index-1], time_of_day)
tokens = tokens[:index-1] + [time_token] + tokens[index+1:]
(unchanged_index, time_token) = extract_hour_minute_token(temp_tokens[:index], time_of_day)
tokens = tokens[:index+unchanged_index] + [time_token] + tokens[index+1:]
temp_tokens = clean_tokens(tokens, remove_dots)

tokens = [extract_hour_minute(token, None)
Expand Down

0 comments on commit 6a601be

Please sign in to comment.