Skip to content

Commit

Permalink
NL: Switch NL place-detector default to dc (#2963)
Browse files Browse the repository at this point in the history
Also, remove the check-box from frontend. The diffs are pretty clean
(except for an underlying bug -
#2962).

Finally, check for bad-words also from punctuation-removed query.
  • Loading branch information
pradh authored Jul 19, 2023
1 parent 0f35e26 commit 844333f
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 60 deletions.
10 changes: 5 additions & 5 deletions server/integration_tests/nlnext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run_sequence(self,
detector='hybrid',
check_place_detection=False,
expected_detectors=[],
place_detector='ner',
place_detector='dc',
failure=''):
if detector == 'heuristic':
detection_method = 'Heuristic Based'
Expand Down Expand Up @@ -213,7 +213,8 @@ def test_place_detection_e2e_ner(self):
'show me the population of mexico city',
'counties in the US with the most poverty',
],
check_place_detection=True)
check_place_detection=True,
place_detector='ner')

# This test uses DC's Recognize Places API.
# TODO: "US" is not detected by RecognizePlaces.
Expand All @@ -227,8 +228,7 @@ def test_place_detection_e2e_dc(self):
'show me the population of mexico city',
'counties in the US with the most poverty',
],
check_place_detection=True,
place_detector='dc')
check_place_detection=True)

def test_international(self):
self.run_sequence('international', [
Expand All @@ -249,7 +249,7 @@ def test_demo_usa_map_types(self):
# Shows map of tracts.
'household median income across tracts of Placer County',
# Shows map of ZCTAs.
'how many people are unemployed in zip codes of washington state?'
'how many people are unemployed in zip codes of washington?'
])

def test_sdg(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,19 @@
"items": [
{
"displayName": "California",
"nlQuery": "how many people are unemployed in zip codes of california state?"
"nlQuery": "how many people are unemployed in zip codes of california?"
}
]
},
"NL_RELATED_QUERY_TYPE": {
"items": [
{
"displayName": "Show highest",
"nlQuery": "how many people are unemployed in zip codes of washington state? - show highest"
"nlQuery": "how many people are unemployed in zip codes of washington? - show highest"
},
{
"displayName": "Show lowest",
"nlQuery": "how many people are unemployed in zip codes of washington state? - show lowest"
"nlQuery": "how many people are unemployed in zip codes of washington? - show lowest"
}
]
}
Expand Down
9 changes: 6 additions & 3 deletions server/lib/nl/detection/place.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def detect_from_query_ner(cleaned_query: str, orig_query: str,
# Uses NER to detect place names, recons to DCIDs, produces PlaceDetection object.
#
def detect_from_query_dc(orig_query: str, debug_logs: Dict) -> PlaceDetection:
query_items = dc.recognize_places(orig_query)
# Recognize Places uses comma as a signal for contained-in-place.
query = utils.remove_punctuations(orig_query, include_comma=True)

query_items = dc.recognize_places(query)

nonplace_query_parts = []
places_str = []
Expand Down Expand Up @@ -131,15 +134,15 @@ def detect_from_query_dc(orig_query: str, debug_logs: Dict) -> PlaceDetection:
# Set PlaceDetection.
query_without_place_substr = ' '.join(nonplace_query_parts)
place_detection = PlaceDetection(
query_original=orig_query,
query_original=query,
query_without_place_substr=query_without_place_substr,
query_places_mentioned=places_str,
places_found=resolved_places,
main_place=main_place)
_set_query_detection_debug_logs(place_detection, debug_logs)
# This only makes sense for this flow.
debug_logs["query_transformations"] = {
"place_detection_input": orig_query,
"place_detection_input": query,
"place_detection_with_places_removed": query_without_place_substr,
}
return place_detection
Expand Down
16 changes: 9 additions & 7 deletions server/routes/nl/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def data():
if not current_app.config.get('NL_BAD_WORDS'):
logging.error('Missing NL_BAD_WORDS config!')
flask.abort(404)
nl_bad_words = current_app.config['NL_BAD_WORDS']

disaster_config = current_app.config['NL_DISASTER_CONFIG']
if current_app.config['LOCAL']:
Expand All @@ -82,27 +83,28 @@ def data():
'detector', default=RequestedDetectorType.Heuristic.value, type=str)

place_detector_type = request.args.get('place_detector',
default='ner',
default='dc',
type=str).lower()
if place_detector_type not in [PlaceDetectorType.NER, PlaceDetectorType.DC]:
logging.error(f'Unknown place_detector {place_detector_type}')
place_detector_type = PlaceDetectorType.NER
else:
place_detector_type = PlaceDetectorType(place_detector_type)

query = str(escape(shared_utils.remove_punctuations(original_query)))
if not query:
return _abort('Received an empty query, please type a few words :)',
original_query, context_history)

#
# Check offensive words
#
if not bad_words.is_safe(original_query, current_app.config['NL_BAD_WORDS']):
if (not bad_words.is_safe(original_query, nl_bad_words) or
not bad_words.is_safe(query, nl_bad_words)):
return _abort(
'The query was rejected due to the ' +
'presence of inappropriate words.', original_query, context_history)

query = str(escape(shared_utils.remove_punctuations(original_query)))
if not query:
return _abort('Received an empty query, please type a few words :)',
original_query, context_history)

counters = ctr.Counters()
query_detection_debug_logs = {}
query_detection_debug_logs["original_query"] = query
Expand Down
7 changes: 5 additions & 2 deletions shared/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,12 @@ def combine_stop_words() -> Set[str]:
return stop_words


def remove_punctuations(s):
def remove_punctuations(s, include_comma=False):
s = s.replace('\'s', '')
s = re.sub(r'[^\w\s]', ' ', s)
if include_comma:
s = re.sub(r'[^\w\s,]', ' ', s)
else:
s = re.sub(r'[^\w\s]', ' ', s)
return " ".join(s.split())


Expand Down
8 changes: 3 additions & 5 deletions static/js/apps/nl_interface/app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ export function App(): JSX.Element {
const [detector, setDetector] = useState(
getUrlTokenOrDefault(NL_URL_PARAMS.DETECTOR, NL_DETECTOR_VALS.HEURISTIC)
);
const [placeDetector, setPlaceDetector] = useState(
const placeDetector = useRef<string>(
getUrlTokenOrDefault(
NL_URL_PARAMS.PLACE_DETECTOR,
NL_PLACE_DETECTOR_VALS.NER
NL_PLACE_DETECTOR_VALS.DC
)
);
const urlPrompts = useRef(getUrlPrompts());
Expand Down Expand Up @@ -190,7 +190,7 @@ export function App(): JSX.Element {
query={q}
indexType={indexType}
detector={detector}
placeDetector={placeDetector}
placeDetector={placeDetector.current}
contextHistory={getContextHistory(i)}
addContextCallback={addContext}
showData={false}
Expand Down Expand Up @@ -221,10 +221,8 @@ export function App(): JSX.Element {
}}
indexType={indexType}
detector={detector}
placeDetector={placeDetector}
setIndexType={setIndexType}
setDetector={setDetector}
setPlaceDetector={setPlaceDetector}
/>
{isStartState && <QueryHistory onItemClick={onQueryItemClick} />}
</div>
Expand Down
31 changes: 0 additions & 31 deletions static/js/apps/nl_interface/nl_options.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ interface NLOptionsPropType {
setIndexType: (idx: string) => void;
detector: string;
setDetector: (v: string) => void;
placeDetector: string;
setPlaceDetector: (v: string) => void;
}

export function NLOptions(props: NLOptionsPropType): JSX.Element {
Expand Down Expand Up @@ -109,35 +107,6 @@ export function NLOptions(props: NLOptionsPropType): JSX.Element {
</Label>
</FormGroup>
</div>
<div className="nl-options-label">Place:</div>
<div className="nl-options-input-radio">
<FormGroup>
<Label>
<Input
checked={props.placeDetector === NL_PLACE_DETECTOR_VALS.NER}
id="nl-place-ner"
type="radio"
value={NL_PLACE_DETECTOR_VALS.NER}
onChange={() => {
props.setPlaceDetector(NL_PLACE_DETECTOR_VALS.NER);
}}
/>
NER
</Label>
<Label>
<Input
checked={props.placeDetector === NL_PLACE_DETECTOR_VALS.DC}
id="nl-place-dc"
type="radio"
value={NL_PLACE_DETECTOR_VALS.DC}
onChange={() => {
props.setPlaceDetector(NL_PLACE_DETECTOR_VALS.DC);
}}
/>
DC
</Label>
</FormGroup>
</div>
</div>
);
}
4 changes: 0 additions & 4 deletions static/js/apps/nl_interface/query_search.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ interface QuerySearchPropType {
setIndexType: (idx: string) => void;
detector: string;
setDetector: (v: string) => void;
placeDetector: string;
setPlaceDetector: (v: string) => void;
}

export function QuerySearch(props: QuerySearchPropType): JSX.Element {
Expand Down Expand Up @@ -104,8 +102,6 @@ export function QuerySearch(props: QuerySearchPropType): JSX.Element {
setIndexType={props.setIndexType}
detector={props.detector}
setDetector={props.setDetector}
placeDetector={props.placeDetector}
setPlaceDetector={props.setPlaceDetector}
/>
</Container>
</div>
Expand Down

0 comments on commit 844333f

Please sign in to comment.