Skip to content

Commit

Permalink
Make output stable for some scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
cuihaoleo committed Jun 21, 2023
1 parent 6d6dd7b commit 7d79a1b
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
4 changes: 2 additions & 2 deletions analyses/term-definitions/check-misleading-definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def main():
writer = csv.DictWriter(fout, fieldnames=["type", "parent", "child"])
writer.writeheader()

for u, v in bad_datatype_links:
for u, v in sorted(bad_datatype_links):
writer.writerow(dict(type="data", parent=u, child=v))

for u, v in bad_entity_links:
for u, v in sorted(bad_entity_links):
writer.writerow(dict(type="entity", parent=u, child=v))

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion docs/usenix-artifact-evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,8 @@ This step will create a `misleading_definitions.csv` file in each privacy policy
```
$ cat dedup/f98d62977bb0ac2e5ebe3252295b577be43a35815482bd1c07334f76c064a3b2/misleading_definitions.csv
type,parent,child
data,non-personal information,ip address
data,non-personal information,device information
data,non-personal information,ip address
```

Step 3. Run `check-self-defined-terms.py` to aggregate non-standard terms found in privacy policies into a CSV file:
Expand Down
8 changes: 4 additions & 4 deletions evals/tuples/export_policylint_tuples.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,24 @@ def main():
logging.info("Processing %s ...", d)

app_id = os.path.basename(os.path.realpath(d))
app_tuples = defaultdict(set)
app_tuples = defaultdict(list)

for sentence, entity, datatype in con.execute("""
SELECT sentenceId, entity, data FROM AppPolicySentences S, Policy P
WHERE S.policyId == P.policyId AND P.collect == "collect" AND S.appId == ?
""", (app_id,)):
if datatype in DATATYPE_MAPPING:
if entity == "we":
app_tuples[("we", datatype)].add(sentence)
app_tuples[("we", datatype)].append(sentence)
else:
app_tuples[("3rd-party", datatype)].add(sentence)
app_tuples[("3rd-party", datatype)].append(sentence)

for (entity, datatype), all_text in app_tuples.items():
writer.writerow({
"app_id": app_id,
"entity": entity,
"datatype": datatype,
"text": "\n".join(json.dumps(s) for s in all_text),
"text": "\n".join(json.dumps(s) for s in dict.fromkeys(all_text)),
})


Expand Down

0 comments on commit 7d79a1b

Please sign in to comment.