Skip to content

Commit

Permalink
add train script and data for thu 2013 conf
Browse files Browse the repository at this point in the history
  • Loading branch information
hailiang-wang committed Mar 19, 2018
1 parent 1b1c038 commit a577e97
Show file tree
Hide file tree
Showing 11 changed files with 205,056 additions and 66 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ sftp-config.json
*.iws
*.idea
app/ml/build
admin/localrc
26 changes: 26 additions & 0 deletions admin/eager.ewt.test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#! /bin/bash
###########################################
#
###########################################

# constants
baseDir=$(cd `dirname "$0"`;pwd)
. $baseDir/util.sh


#######################
# variables
#######################
PY=$baseDir/../app/eager.py
MODEL=$baseDir/../tmp/eager.ewt.model
TEST_DATA=$baseDir/../data/UD_English-EWT/en-ud-test.conllu
TEST_RESULT=$baseDir/../tmp/en-ud-test.results
LOG_VERBOSITY=0 # info

# functions


# main
[ -z "${BASH_SOURCE[0]}" -o "${BASH_SOURCE[0]}" = "$0" ] || return
set -x
test $PY $LOG_VERBOSITY $MODEL $TEST_DATA $TEST_RESULT
26 changes: 26 additions & 0 deletions admin/eager.ewt.train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#! /bin/bash
###########################################
#
###########################################

# constants
baseDir=$(cd `dirname "$0"`;pwd)
. $baseDir/util.sh


#######################
# variables
#######################
PY=$baseDir/../app/eager.py
TRAIN_DATA=$baseDir/../data/UD_English-EWT/en-ud-train.conllu
MODEL=$baseDir/../tmp/eager.ewt.model
EPOCH=10
LOG_VERBOSITY=0 # info

# functions


# main
[ -z "${BASH_SOURCE[0]}" -o "${BASH_SOURCE[0]}" = "$0" ] || return
set -x
train $PY $LOG_VERBOSITY $MODEL $TRAIN_DATA $EPOCH
26 changes: 26 additions & 0 deletions admin/eager.thu.test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#! /bin/bash
###########################################
#
###########################################

# constants
baseDir=$(cd `dirname "$0"`;pwd)
. $baseDir/util.sh


#######################
# variables
#######################
PY=$baseDir/../app/eager.py
MODEL=$baseDir/../tmp/eager.thu.model
TEST_DATA=$baseDir/../data/evsam05/THU/dev.conllu
TEST_RESULT=$baseDir/../tmp/en-ud-test.results
LOG_VERBOSITY=0 # info

# functions


# main
[ -z "${BASH_SOURCE[0]}" -o "${BASH_SOURCE[0]}" = "$0" ] || return
set -x
test $PY $LOG_VERBOSITY $MODEL $TEST_DATA $TEST_RESULT
26 changes: 26 additions & 0 deletions admin/eager.thu.train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#! /bin/bash
###########################################
#
###########################################

# constants
baseDir=$(cd `dirname "$0"`;pwd)
. $baseDir/util.sh


#######################
# variables
#######################
PY=$baseDir/../app/eager.py
TRAIN_DATA=$baseDir/../data/evsam05/THU/train.conllu
MODEL=$baseDir/../tmp/eager.thu.model
EPOCH=10
LOG_VERBOSITY=0 # info

# functions


# main
[ -z "${BASH_SOURCE[0]}" -o "${BASH_SOURCE[0]}" = "$0" ] || return
set -x
train $PY $LOG_VERBOSITY $MODEL $TRAIN_DATA $EPOCH
33 changes: 0 additions & 33 deletions admin/test.sh

This file was deleted.

33 changes: 0 additions & 33 deletions admin/train.sh

This file was deleted.

22 changes: 22 additions & 0 deletions admin/util.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#! /bin/bash
###########################################
#
###########################################

function test(){
python $1 \
--verbosity=$2 \
--test=True \
--model=$3 \
--test_data=$4 \
--test_results=$5
}

function train(){
python $1 \
--verbosity=$2 \
--model=$3 \
--train=True \
--train_data=$4 \
--epoch=$5
}
86 changes: 86 additions & 0 deletions app/pio/thu_2013_data_format_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#===============================================================================
#
# Copyright (c) 2017 <> All Rights Reserved
# 第二届自然语言处理与中文计算会议(NLP&CC 2013) 清华大学提供的数据格式和CoNLL-u 格式不一致。
# 本程序将其转化为 CoNLL-u 格式。
# File: /Users/hain/ai/text-dependency-parser/app/pio/thu_2013_data_format_converter.py
# Author: Hai Liang Wang
# Date: 2018-03-19:15:04:09
#
#===============================================================================

"""
"""
from __future__ import print_function
from __future__ import division

__copyright__ = "Copyright (c) 2017 . All Rights Reserved"
__author__ = "Hai Liang Wang"
__date__ = "2018-03-19:15:04:09"


import os
import sys
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(curdir)

if sys.version_info[0] < 3:
reload(sys)
sys.setdefaultencoding("utf-8")
# raise "Must be using Python 3"

# Get ENV
ENVIRON = os.environ.copy()



def conv(from_, to_):
result = []
with open(from_, "r") as fin:
for x,y in enumerate(list(fin.readlines())):
s = y.strip()
if s:
o = s.split("\t")
assert len(o) == 8, "wrong text length"
result.append("\t".join(o) + "\t_\t_\n")
else:
# print("index: %s | black" % x)
result.append(y)
# o = x.split("\t")
# print("conv: %s" % x.strip())

with open(to_, "w") as fout:
fout.writelines(result)
print("done %s" % to_)


import unittest

# run testcase: python /Users/hain/ai/text-dependency-parser/app/pio/thu_2013_data_format_converter.py Test.testExample
class Test(unittest.TestCase):
'''
'''
def setUp(self):
pass

def tearDown(self):
pass

def test_convert(self):
print("test_convert")
form_ = ["dev.conll", "train.conll"]
to_ = os.path.join(curdir, os.path.pardir, os.path.pardir, "data", "evsam05", "THU")
for x in form_:
f = os.path.join(curdir, os.path.pardir, os.path.pardir, "data", "evsam05", "THU", x)
t = "%su" % f
conv(f, t)

def test():
unittest.main()

if __name__ == '__main__':
test()
Loading

0 comments on commit a577e97

Please sign in to comment.