initial import

khimaros · Dec 9, 2022 · d6294fd · d6294fd
commit d6294fd
Show file tree

Hide file tree

Showing 8 changed files with 1,127 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/**
+output/**
+transformable.toml
diff --git a/LICENSE b/LICENSE
diff --git a/MODELS.md b/MODELS.md
@@ -0,0 +1,10 @@
+# MODELS
+
+notes for working with transformer models
+
+## KNOWN WORKING
+
+ - xlnet-large-cased [text-generation]
+ - distilbert-base-uncased-distilled-squad [question-answering]
+ - facebook/blenderbot-400M-distill [conversational]
+
diff --git a/README.md b/README.md
@@ -0,0 +1,102 @@
+# TRANSFORMABLE
+
+simple command line utility for text with HuggingFace transformer pipelines (supports CPU inference)
+
+## FEATURES
+
+ - works with or without a GPU for [almost any](MODELS.md) transformer model
+ - by default runs completely offline, on your local machine
+ - automate an unlimited sequence of generation tasks for set-and-forget use
+ - simple TOML configuration file for storing parameters and prompts
+ - can be configured to auto-download models from HuggingFace
+ - simple and fun command line interface
+
+## USAGE
+
+clone this repository.
+
+install python requirements:
+
+```shell
+pip install -r requirements.txt
+```
+
+install git-lfs for large file storage support.
+
+clone huggingface repositories into `~/src/huggingface.co/`:
+
+```shell
+git clone --recurse-submodules \
+    https://huggingface.co/distilbert-base-uncased-distilled-squad/ \
+    ~/src/huggingface.co/distilbert-base-uncased-distilled-squad/
+```
+
+generate a few responses using the default model (`distilbert-base-uncased-distilled-squad`):
+
+```shell
+python ./transformable.py -c ''
+```
+
+enable automatic fetching of a model from huggingface and use a manual seed:
+
+```shell
+python ./transformable.py \
+    --download_models \
+    --model=xlnet-large-cased \
+    --seed=31911 \
+    'The meaning of life is'
+```
+
+automatic model downloads will be stored in `~/.cache/huggingface/`
+
+to dump the configuration for inspection, append the `--dump` flag to any command.
+
+to repeat tasks (useful with random seed), use the `--repeat` flag.
+
+for more detailed usage, see help:
+
+```shell
+python ./transformable.py --help
+```
+
+## CONFIGURATION
+
+transformable tasks can be configured in TOML format.
+
+by default, tasks will be read from `./transformable.toml` if it exists.
+
+the config file uses the same keys as the flag names above.
+
+see [transformable.example.toml](transformable.example.toml) for an example.
+
+to execute a task from the config:
+
+```shell
+$ python ./transformable.py -c ./transformable.example.toml -t introductions
+```
+
+by default, the TOML section is used as the output name.
+
+you can override config options with flags:
+
+```shell
+python ./transformable.py \
+    -c ./transformable.example.toml \
+    -t home-planet \
+    -x 'Most humans are currently living on Mars.'
+```
+
+it is also possible to run multiple tasks in sequence:
+
+```shell
+python ./transformable.py \
+    -c ./transformable.example.toml \
+    -t introductions \
+    -t meaning-of-life
+```
+
+to run all tasks from the config in sequence:
+
+```shell
+python ./transformable.py -c ./transformable.example.toml -a
+```
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -0,0 +1,19 @@
+# ROADMAP
+
+## BACKLOG
+
+```
+[ ] support additional --kind with various models
+```
+
+## COMPLETE
+
+```
+[x] add `--repeat` flag for repeating tasks
+[x] flag to run all tasks from config file
+[x] run multiple tasks from configuration file
+[x] override config values using command line flags
+[x] add support for reading prompts from a file
+[x] add support for manually defining seed value
+[x] add a flag to fetch models from huggingface
+```
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,9 @@
+sentencepiece
+torch
+torchvision
+torchaudio
+transformers
+scipyftfy
+accelerate
+tensorboard
+toml
diff --git a/transformable.example.toml b/transformable.example.toml
@@ -0,0 +1,64 @@
+[DEFAULT]
+
+# Default values are commented out below.
+# Uncomment and change to set config wide defaults.
+
+# number of images to produce per prompt
+#num_outputs = 1
+
+# output directory for generated images
+#output_dir = "./output/"
+
+# directory to look for model repositories
+#models_dir = "~/src/huggingface.co/"
+
+# kind of pipeline (text-generation, question-answering, conversational)
+#kind = "text-generation"
+
+# transformer model to use for inference
+#model = "distilbert-base-uncased-distilled-squad"
+
+# allow automatic downloading of models
+#download_models = false
+
+# seed to use for generator
+#seed = 0
+
+# number of repetitions for task
+#repeat = 1
+
+
+[introductions]
+
+# kind of pipeline (text-generation, question-answering, conversational)
+kind = "conversational"
+
+# transformer model to use for inference
+model = "facebook/blenderbot-400M-distill"
+
+# prompts to generate text from
+prompts = ["Hi, my name is Amy, what is your name?"]
+
+
+[home-planet]
+
+# kind of pipeline (text-generation, question-answering, conversational)
+kind = "question-answering"
+
+# context to use for answering the question
+context = "Planet earth is the third planet from the sun our solar system. Most humans who were ever born currently live there."
+
+# prompts to generate text from
+prompts = ["I am a human, which planet am I from?"]
+
+
+[meaning-of-life]
+
+# kind of pipeline (text-generation, question-answering, conversational)
+kind = "text-generation"
+
+# transformer model to use for inference
+model = "xlnet-large-cased"
+
+# prompts to generate text from
+prompts = ["The meaning of life is"]