-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbilou2bio.py
executable file
·30 lines (25 loc) · 911 Bytes
/
bilou2bio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/env python3
# coding=utf-8
#
# Copyright 2018 Institute of Formal and Applied Linguistics, Faculty of
# Mathematics and Physics, Charles University, Czech Republic.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Converts CoNLL file from BILOU to BIO encoding."""
import sys
if __name__ == "__main__":
import argparse
lines = []
for line in sys.stdin:
line = line.rstrip("\r\n")
if not line:
print()
else:
form, lemma, tag, label = line.split()
if label.startswith("U-"):
label = label.replace("U-", "B-")
if label.startswith("L-"):
label = label.replace("L-", "I-")
print("\t".join([form, lemma, tag, label]))