-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #117 from ucokzeko/new/add-assembler
NEW: Introduce assembler
- Loading branch information
Showing
96 changed files
with
1,181 additions
and
911 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
FROM python:3.6.5 | ||
|
||
# Set the working directory to /app | ||
WORKDIR /app | ||
|
||
# Copy the current directory contents into the container at /app | ||
COPY . /app | ||
|
||
# Update pip3 version to latest version | ||
RUN pip3 install --upgrade pip | ||
|
||
# Install any needed packages specified in requirements.txt | ||
RUN pip3 install --trusted-host pypi.python.org -r requirements.txt | ||
|
||
# Install surround into the image | ||
RUN python3 setup.py install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,2 @@ | ||
include surround/*.yaml | ||
include surround/runner/web/upload.html | ||
graft templates |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
docker build --tag=surround_dev . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,59 @@ | ||
import subprocess | ||
import logging | ||
import os | ||
from surround import Validator, Filter, Estimator, SurroundData, Assembler, Config | ||
|
||
process = subprocess.Popen(['python3', 'examples/dump-output/dump_output.py', '-c=examples/dump-output/config.yaml']) | ||
process.wait() | ||
if not (process.returncode is None) and process.returncode > 0: | ||
raise Exception('Failed to run sample dump output app. Error code: ' + str(process.returncode)) | ||
hello_file_path = "/stages/WriteHello/Output.txt" | ||
world_file_path = "/stages/WriteWorld/Output.txt" | ||
|
||
class WriteHello(Filter): | ||
def __init__(self, dir_path): | ||
self.dir_path = dir_path | ||
|
||
def dump_output(self, surround_data, config): | ||
text_file = open(self.dir_path + hello_file_path, "w") | ||
text_file.write(surround_data.text) | ||
text_file.close() | ||
|
||
def operate(self, surround_data, config): | ||
surround_data.text = "Hello" | ||
|
||
|
||
class WriteWorld(Estimator): | ||
def __init__(self, dir_path): | ||
self.dir_path = dir_path | ||
|
||
def dump_output(self, surround_data, config): | ||
text_file = open(self.dir_path + world_file_path, "w") | ||
text_file.write(surround_data.text) | ||
text_file.close() | ||
|
||
def estimate(self, surround_data, config): | ||
surround_data.text = "World" | ||
|
||
def fit(self, surround_data, config): | ||
print("Not training implementation") | ||
|
||
|
||
class BasicData(SurroundData): | ||
text = None | ||
|
||
|
||
class ValidateData(Validator): | ||
def validate(self, surround_data, config): | ||
if surround_data.text: | ||
raise ValueError("'text' is not None") | ||
|
||
if __name__ == "__main__": | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
path = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
app_config = Config() | ||
app_config.read_config_files([path + "/config.yaml"]) | ||
assembler = Assembler("Dump output example", ValidateData()) | ||
assembler.set_config(app_config) | ||
assembler.set_estimator(WriteWorld(path), [WriteHello(path)]) | ||
assembler.run(BasicData()) | ||
|
||
print("Hello output.txt contains '%s'" % open(path + hello_file_path, "r").read()) | ||
print("World output.txt contains '%s'" % open(path + world_file_path, "r").read()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,23 @@ | ||
# File system runner example | ||
This example takes a csv file as input, whose path is specified in the argument -f0 and extracts the word count of a | ||
This example takes a csv file as input, whose path is specified in the argument -f0 and extracts the word count of a | ||
client's complaint (column 'Consumer complaint narrative'). | ||
If specified in the config file (argument -c), the company's name (column 'Company') is extracted as well. | ||
The word count and (optionally) the company's name are saved in a file created in the folder specified in the -o | ||
(output folder) argument. | ||
|
||
- The command line arguments are parsed and validated in the `FileSystemRunner` constructor (i.e., the `__init__()` method). | ||
The settings specified in the config file are assigned to the surround object. | ||
|
||
- The `transform()` method of the same class pre-processes the input data. In this case, the method is implemented in | ||
the `CustomFileSystemRunner` class to read each row of the csv file, create an instance of `BasicData`, call the | ||
`surround.process()` method and save the output of all processed rows in `output.txt`. | ||
|
||
|
||
- `CSVLoader` inherits from `Loader`. It's responsible to load your your data before it's ready to processed. | ||
|
||
- `CSVValidator` inherits from `Validator` will validate that `inputs` is properly loaded. | ||
|
||
- `BasicData` inherits from `SurroundData` and consists of three fields: **row_dict** (the row as read from the csv file), | ||
**word_count** and **company**. | ||
|
||
- The `surround.process()` method calls the `operate()` method of the stage `ProcessCSV`, where the word_count | ||
- The `assembler.run()` method calls the `estimate()` method of the stage `ProcessCSV`, where the word_count | ||
and company values are extracted from the row_dict field, and set on their corresponding fields in the `BasicData` object. | ||
|
||
## Run | ||
The easiest way to run the example is by running `main.py` from surround's root folder. The arguments needed are pre-set in `main.py`: | ||
```bash | ||
python3 examples/file-adapter/main.py | ||
``` | ||
To use different arguments, we can either modify them in `main.py`, or run `file_adapter.py` from the example's folder and specify | ||
the arguments in the command line: | ||
```bash | ||
python3 file_adapter.py -f0=data/input.csv -c=config.yaml -o data/ | ||
``` | ||
|
||
|
||
To use different arguments, modify `config.yaml` to set different `input` and `output` values. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,6 @@ | ||
Surround: | ||
Loader: | ||
input: "data/input.csv" | ||
output: "data/output.txt" | ||
ProcessCSV: | ||
include_company: True |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.