Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NL: Switch NL place-detector default to dc #2963

Merged
merged 4 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions server/integration_tests/nlnext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run_sequence(self,
detector='hybrid',
check_place_detection=False,
expected_detectors=[],
place_detector='ner',
place_detector='dc',
failure=''):
if detector == 'heuristic':
detection_method = 'Heuristic Based'
Expand Down Expand Up @@ -213,7 +213,8 @@ def test_place_detection_e2e_ner(self):
'show me the population of mexico city',
'counties in the US with the most poverty',
],
check_place_detection=True)
check_place_detection=True,
place_detector='ner')

# This test uses DC's Recognize Places API.
# TODO: "US" is not detected by RecognizePlaces.
Expand All @@ -227,8 +228,7 @@ def test_place_detection_e2e_dc(self):
'show me the population of mexico city',
'counties in the US with the most poverty',
],
check_place_detection=True,
place_detector='dc')
check_place_detection=True)

def test_international(self):
self.run_sequence('international', [
Expand All @@ -249,7 +249,7 @@ def test_demo_usa_map_types(self):
# Shows map of tracts.
'household median income across tracts of Placer County',
# Shows map of ZCTAs.
'how many people are unemployed in zip codes of washington state?'
'how many people are unemployed in zip codes of washington?'
])

def test_sdg(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,19 @@
"items": [
{
"displayName": "California",
"nlQuery": "how many people are unemployed in zip codes of california state?"
"nlQuery": "how many people are unemployed in zip codes of california?"
}
]
},
"NL_RELATED_QUERY_TYPE": {
"items": [
{
"displayName": "Show highest",
"nlQuery": "how many people are unemployed in zip codes of washington state? - show highest"
"nlQuery": "how many people are unemployed in zip codes of washington? - show highest"
},
{
"displayName": "Show lowest",
"nlQuery": "how many people are unemployed in zip codes of washington state? - show lowest"
"nlQuery": "how many people are unemployed in zip codes of washington? - show lowest"
}
]
}
Expand Down
9 changes: 6 additions & 3 deletions server/lib/nl/detection/place.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def detect_from_query_ner(cleaned_query: str, orig_query: str,
# Uses NER to detect place names, recons to DCIDs, produces PlaceDetection object.
#
def detect_from_query_dc(orig_query: str, debug_logs: Dict) -> PlaceDetection:
query_items = dc.recognize_places(orig_query)
# Recognize Places uses comma as a signal for contained-in-place.
query = utils.remove_punctuations(orig_query, include_comma=True)

query_items = dc.recognize_places(query)

nonplace_query_parts = []
places_str = []
Expand Down Expand Up @@ -131,15 +134,15 @@ def detect_from_query_dc(orig_query: str, debug_logs: Dict) -> PlaceDetection:
# Set PlaceDetection.
query_without_place_substr = ' '.join(nonplace_query_parts)
place_detection = PlaceDetection(
query_original=orig_query,
query_original=query,
query_without_place_substr=query_without_place_substr,
query_places_mentioned=places_str,
places_found=resolved_places,
main_place=main_place)
_set_query_detection_debug_logs(place_detection, debug_logs)
# This only makes sense for this flow.
debug_logs["query_transformations"] = {
"place_detection_input": orig_query,
"place_detection_input": query,
"place_detection_with_places_removed": query_without_place_substr,
}
return place_detection
Expand Down
16 changes: 9 additions & 7 deletions server/routes/nl/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def data():
if not current_app.config.get('NL_BAD_WORDS'):
logging.error('Missing NL_BAD_WORDS config!')
flask.abort(404)
nl_bad_words = current_app.config['NL_BAD_WORDS']

disaster_config = current_app.config['NL_DISASTER_CONFIG']
if current_app.config['LOCAL']:
Expand All @@ -82,27 +83,28 @@ def data():
'detector', default=RequestedDetectorType.Heuristic.value, type=str)

place_detector_type = request.args.get('place_detector',
default='ner',
default='dc',
type=str).lower()
if place_detector_type not in [PlaceDetectorType.NER, PlaceDetectorType.DC]:
logging.error(f'Unknown place_detector {place_detector_type}')
place_detector_type = PlaceDetectorType.NER
else:
place_detector_type = PlaceDetectorType(place_detector_type)

query = str(escape(shared_utils.remove_punctuations(original_query)))
if not query:
return _abort('Received an empty query, please type a few words :)',
original_query, context_history)

#
# Check offensive words
#
if not bad_words.is_safe(original_query, current_app.config['NL_BAD_WORDS']):
if (not bad_words.is_safe(original_query, nl_bad_words) or
not bad_words.is_safe(query, nl_bad_words)):
return _abort(
'The query was rejected due to the ' +
'presence of inappropriate words.', original_query, context_history)

query = str(escape(shared_utils.remove_punctuations(original_query)))
if not query:
return _abort('Received an empty query, please type a few words :)',
original_query, context_history)

counters = ctr.Counters()
query_detection_debug_logs = {}
query_detection_debug_logs["original_query"] = query
Expand Down
7 changes: 5 additions & 2 deletions shared/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,12 @@ def combine_stop_words() -> Set[str]:
return stop_words


def remove_punctuations(s):
def remove_punctuations(s, include_comma=False):
s = s.replace('\'s', '')
s = re.sub(r'[^\w\s]', ' ', s)
if include_comma:
s = re.sub(r'[^\w\s,]', ' ', s)
else:
s = re.sub(r'[^\w\s]', ' ', s)
return " ".join(s.split())


Expand Down
8 changes: 3 additions & 5 deletions static/js/apps/nl_interface/app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ export function App(): JSX.Element {
const [detector, setDetector] = useState(
getUrlTokenOrDefault(NL_URL_PARAMS.DETECTOR, NL_DETECTOR_VALS.HEURISTIC)
);
const [placeDetector, setPlaceDetector] = useState(
const placeDetector = useRef<string>(
getUrlTokenOrDefault(
NL_URL_PARAMS.PLACE_DETECTOR,
NL_PLACE_DETECTOR_VALS.NER
NL_PLACE_DETECTOR_VALS.DC
)
);
const urlPrompts = useRef(getUrlPrompts());
Expand Down Expand Up @@ -190,7 +190,7 @@ export function App(): JSX.Element {
query={q}
indexType={indexType}
detector={detector}
placeDetector={placeDetector}
placeDetector={placeDetector.current}
contextHistory={getContextHistory(i)}
addContextCallback={addContext}
showData={false}
Expand Down Expand Up @@ -221,10 +221,8 @@ export function App(): JSX.Element {
}}
indexType={indexType}
detector={detector}
placeDetector={placeDetector}
setIndexType={setIndexType}
setDetector={setDetector}
setPlaceDetector={setPlaceDetector}
/>
{isStartState && <QueryHistory onItemClick={onQueryItemClick} />}
</div>
Expand Down
31 changes: 0 additions & 31 deletions static/js/apps/nl_interface/nl_options.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ interface NLOptionsPropType {
setIndexType: (idx: string) => void;
detector: string;
setDetector: (v: string) => void;
placeDetector: string;
setPlaceDetector: (v: string) => void;
}

export function NLOptions(props: NLOptionsPropType): JSX.Element {
Expand Down Expand Up @@ -109,35 +107,6 @@ export function NLOptions(props: NLOptionsPropType): JSX.Element {
</Label>
</FormGroup>
</div>
<div className="nl-options-label">Place:</div>
<div className="nl-options-input-radio">
<FormGroup>
<Label>
<Input
checked={props.placeDetector === NL_PLACE_DETECTOR_VALS.NER}
id="nl-place-ner"
type="radio"
value={NL_PLACE_DETECTOR_VALS.NER}
onChange={() => {
props.setPlaceDetector(NL_PLACE_DETECTOR_VALS.NER);
}}
/>
NER
</Label>
<Label>
<Input
checked={props.placeDetector === NL_PLACE_DETECTOR_VALS.DC}
id="nl-place-dc"
type="radio"
value={NL_PLACE_DETECTOR_VALS.DC}
onChange={() => {
props.setPlaceDetector(NL_PLACE_DETECTOR_VALS.DC);
}}
/>
DC
</Label>
</FormGroup>
</div>
</div>
);
}
4 changes: 0 additions & 4 deletions static/js/apps/nl_interface/query_search.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ interface QuerySearchPropType {
setIndexType: (idx: string) => void;
detector: string;
setDetector: (v: string) => void;
placeDetector: string;
setPlaceDetector: (v: string) => void;
}

export function QuerySearch(props: QuerySearchPropType): JSX.Element {
Expand Down Expand Up @@ -104,8 +102,6 @@ export function QuerySearch(props: QuerySearchPropType): JSX.Element {
setIndexType={props.setIndexType}
detector={props.detector}
setDetector={props.setDetector}
placeDetector={props.placeDetector}
setPlaceDetector={props.setPlaceDetector}
/>
</Container>
</div>
Expand Down