Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support meta-data for suspicion reasons #41

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions osmcha/changeset.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ class InvalidChangesetError(Exception):
pass


class Reason:
"""Details and meta-data for a suspicion reason."""
def __init__(self, identifier, label=None, source="osmcha"):
self.identifier = identifier
self.label = label if label else identifier
self.source = source


def get_user_details(user_id):
"""Get information about number of changesets, blocks and mapping days of a
user, using both the OSM API and the Mapbox comments APIself.
Expand All @@ -56,7 +64,7 @@ def get_user_details(user_id):
changesets = [i for i in xml_data if i.tag == 'changesets'][0]
blocks = [i for i in xml_data if i.tag == 'blocks'][0]
if int(changesets.get('count')) <= 5:
reasons.append('New mapper')
reasons.append(Reason('New mapper'))
elif int(changesets.get('count')) <= 30:
url = MAPBOX_USERS_API.format(
user_id=requests.compat.quote(user_id)
Expand All @@ -67,9 +75,9 @@ def get_user_details(user_id):
user_request.json().get('extra').get('mapping_days')
)
if mapping_days <= 5:
reasons.append('New mapper')
reasons.append(Reason('New mapper'))
if int(blocks[0].get('count')) > 1:
reasons.append('User has multiple blocks')
reasons.append(Reason('User has multiple blocks'))
except Exception as e:
message = 'Could not verify user of the changeset: {}, {}'
print(message.format(user_id, str(e)))
Expand Down Expand Up @@ -239,7 +247,6 @@ def filter(self):
if get_bounds(ch).intersects(self.area)
]


class Analyse(object):
"""Analyse a changeset and define if it is suspect."""
def __init__(self, changeset, create_threshold=200, modify_threshold=200,
Expand Down Expand Up @@ -299,6 +306,7 @@ def set_fields(self, changeset):
'%Y-%m-%dT%H:%M:%SZ'
)
self.suspicion_reasons = []
self.detailed_reasons = {}
self.is_suspect = False
self.powerfull_editor = False
self.warning_tags = [
Expand All @@ -307,7 +315,8 @@ def set_fields(self, changeset):

def label_suspicious(self, reason):
"""Add suspicion reason and set the suspicious flag."""
self.suspicion_reasons.append(reason)
self.suspicion_reasons.append(reason.identifier)
self.detailed_reasons[reason.identifier] = reason
self.is_suspect = True

def full_analysis(self):
Expand All @@ -318,12 +327,12 @@ def full_analysis(self):
self.verify_warning_tags()

if self.review_requested == 'yes':
self.label_suspicious('Review requested')
self.label_suspicious(Reason('Review requested'))

def verify_warning_tags(self):
for tag in self.warning_tags:
if tag in self.enabled_warnings.keys():
self.label_suspicious(self.enabled_warnings.get(tag))
self.label_suspicious(Reason(self.enabled_warnings.get(tag), None, self.editor))

def verify_user(self):
"""Verify if the changeset was made by a inexperienced mapper (anyone
Expand All @@ -338,21 +347,21 @@ def verify_words(self):
"""
if self.comment:
if find_words(self.comment, self.suspect_words, self.excluded_words):
self.label_suspicious('suspect_word')
self.label_suspicious(Reason('suspect_word'))

if self.source:
for word in self.illegal_sources:
if word in self.source.lower():
if word == 'yandex' and 'yandex panorama' in self.source.lower():
pass
else:
self.label_suspicious('suspect_word')
self.label_suspicious(Reason('suspect_word'))
break

if self.imagery_used:
for word in self.illegal_sources:
if word in self.imagery_used.lower():
self.label_suspicious('suspect_word')
self.label_suspicious(Reason('suspect_word'))
break

self.suspicion_reasons = list(set(self.suspicion_reasons))
Expand Down Expand Up @@ -383,10 +392,10 @@ def verify_editor(self):
'mapwith.ai'
]
if self.host.split('://')[-1].split('/')[0] not in trusted_hosts:
self.label_suspicious('Unknown iD instance')
self.label_suspicious(Reason('Unknown iD instance'))
else:
self.powerfull_editor = True
self.label_suspicious('Software editor was not declared')
self.label_suspicious(Reason('Software editor was not declared'))

def count(self):
"""Count the number of elements created, modified and deleted by the
Expand All @@ -404,14 +413,14 @@ def count(self):
if (self.create / len(actions) > self.percentage and
self.create > self.create_threshold and
(self.powerfull_editor or self.create > self.top_threshold)):
self.label_suspicious('possible import')
self.label_suspicious(Reason('possible import'))
elif (self.modify / len(actions) > self.percentage and
self.modify > self.modify_threshold):
self.label_suspicious('mass modification')
self.label_suspicious(Reason('mass modification'))
elif ((self.delete / len(actions) > self.percentage and
self.delete > self.delete_threshold) or
self.delete > self.top_threshold):
self.label_suspicious('mass deletion')
self.label_suspicious(Reason('mass deletion'))
except ZeroDivisionError:
print('It seems this changeset was redacted')

Expand Down
12 changes: 11 additions & 1 deletion osmcha/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,19 @@ def cli(id):
'Created: %s. Modified: %s. Deleted: %s' % (ch.create, ch.modify, ch.delete)
)
if ch.is_suspect:
reasonDescriptions = [formatted_reason(reason) for reason in ch.detailed_reasons.values()]
click.echo('The changeset {} is suspect! Reasons: {}'.format(
id,
', '.join(ch.suspicion_reasons)
', '.join(reasonDescriptions)
))
else:
click.echo('The changeset %s is not suspect!' % id)

def formatted_reason(reason):
"""
Generate a formatted description of the reason using its label and appending
the source of the reason if it was not from osmcha (e.g. if it originated
from an editor warning)
"""
sourceLabel = ' ({})'.format(reason.source) if reason.source != 'osmcha' else ''
return '{}{}'.format(reason.label, sourceLabel)
13 changes: 9 additions & 4 deletions tests/test_mod.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from shapely.geometry import Polygon

from osmcha.changeset import ChangesetList
from osmcha.changeset import Reason
from osmcha.changeset import Analyse
from osmcha.changeset import WORDS
from osmcha.changeset import find_words
Expand Down Expand Up @@ -100,8 +101,9 @@ def test_analyse_label_suspicious():
])
}
ch = Analyse(ch_dict)
ch.label_suspicious('some reason')
ch.label_suspicious(Reason('some reason'))
assert 'some reason' in ch.suspicion_reasons
assert ch.detailed_reasons['some reason'].identifier == 'some reason'
assert ch.is_suspect


Expand Down Expand Up @@ -601,10 +603,11 @@ def test_get_dict():
assert 'is_suspect' in ch.get_dict().keys()
assert 'powerfull_editor' in ch.get_dict().keys()
assert 'suspicion_reasons' in ch.get_dict().keys()
assert 'detailed_reasons' in ch.get_dict().keys()
assert 'create' in ch.get_dict().keys()
assert 'modify' in ch.get_dict().keys()
assert 'delete' in ch.get_dict().keys()
assert len(ch.get_dict().keys()) == 15
assert len(ch.get_dict().keys()) == 16

# An iD changeset with warnings:
ch = Analyse(72783703)
Expand All @@ -621,10 +624,11 @@ def test_get_dict():
assert 'is_suspect' in ch.get_dict().keys()
assert 'powerfull_editor' in ch.get_dict().keys()
assert 'suspicion_reasons' in ch.get_dict().keys()
assert 'detailed_reasons' in ch.get_dict().keys()
assert 'create' in ch.get_dict().keys()
assert 'modify' in ch.get_dict().keys()
assert 'delete' in ch.get_dict().keys()
assert len(ch.get_dict().keys()) == 15
assert len(ch.get_dict().keys()) == 16

# A JOSM changeset
ch = Analyse(46315321)
Expand All @@ -641,10 +645,11 @@ def test_get_dict():
assert 'is_suspect' in ch.get_dict().keys()
assert 'powerfull_editor' in ch.get_dict().keys()
assert 'suspicion_reasons' in ch.get_dict().keys()
assert 'detailed_reasons' in ch.get_dict().keys()
assert 'create' in ch.get_dict().keys()
assert 'modify' in ch.get_dict().keys()
assert 'delete' in ch.get_dict().keys()
assert len(ch.get_dict().keys()) == 15
assert len(ch.get_dict().keys()) == 16


def test_changeset_without_tags():
Expand Down