diff --git a/README.md b/README.md index 705c144..d0d000a 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,14 @@ Turkish LM Tuner is a library for fine-tuning Turkish language models on various ## Installation -You can use the following command to install the library: +You can install `turkish-lm-tuner` via PyPI: + +```bash + +pip install turkish-lm-tuner +``` + +Alternatively, you can use the following command to install the library: ```bash diff --git a/docs/index.md b/docs/index.md index f9fd717..8493c5c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,9 +16,17 @@ Turkish LM Tuner is a library for fine-tuning Turkish language models on various ## Installation -You can use the following command to install the library: +You can install `turkish-lm-tuner` via PyPI: ```bash + +pip install turkish-lm-tuner +``` + +Alternatively, you can use the following command to install the library: + +```bash + pip install git+https://github.com/boun-tabi-LMG/turkish-lm-tuner.git ``` diff --git a/docs/tutorials/finetuning.ipynb b/docs/tutorials/finetuning.ipynb index 22ea134..97dc806 100644 --- a/docs/tutorials/finetuning.ipynb +++ b/docs/tutorials/finetuning.ipynb @@ -22,7 +22,7 @@ "The library can be installed as follows:\n", "\n", "```bash\n", - "pip install git+https://github.com/boun-tabi-LMG/turkish-lm-tuner.git\n", + "pip install turkish-lm-tuner\n", "```" ] }, @@ -53,7 +53,7 @@ "task = \"summarization\"\n", "task_mode = '' # either '', '[NLU]', '[NLG]', '[S2S]'\n", "task_format=\"conditional_generation\"\n", - "model_name = \"boun-tabi-lmt/TURNA\"\n", + "model_name = \"boun-tabi-LMG/TURNA\"\n", "max_input_length = 764\n", "max_target_length = 128\n", "\n", @@ -121,8 +121,8 @@ "\n", "model_trainer = TrainerForConditionalGeneration(\n", " model_name=model_name, task=task,\n", - " optimizer_params=optimizer_params,\n", " training_params=training_params,\n", + " optimizer_params=optimizer_params,\n", " model_save_path=\"turna_summarization_tr_news\",\n", " max_input_length=max_input_length,\n", " max_target_length=max_target_length, \n", diff --git a/docs/tutorials/getting-started.ipynb b/docs/tutorials/getting-started.ipynb index 6b65e05..79b5ed2 100644 --- a/docs/tutorials/getting-started.ipynb +++ b/docs/tutorials/getting-started.ipynb @@ -20,7 +20,7 @@ "`turkish-lm-tuner` can be installed as follows:\n", "\n", "```bash\n", - "pip install git+https://github.com/boun-tabi-LMG/turkish-lm-tuner.git\n", + "pip install turkish-lm-tuner\n", "```\n", "\n" ] @@ -42,7 +42,7 @@ "dataset_name = \"tr_news\"\n", "task = \"summarization\"\n", "task_format = \"conditional_generation\"\n", - "model_name = \"boun-tabi-lmt/TURNA\"\n", + "model_name = \"boun-tabi-LMG/TURNA\"\n", "max_input_length = 764\n", "max_target_length = 128\n", "\n", @@ -76,13 +76,13 @@ " 'optimizer_type': 'adafactor',\n", " 'scheduler': False\n", "}\n", + "model_save_path = \"turna_summarization_tr_news\"\n", "\n", "# Finetuning the model\n", - "model_trainer = TrainerForConditionalGeneration(model_name, task, optimizer_params, training_params, \"turna_summarization_tr_news\", max_input_length, max_target_length, dataset_processor.dataset.postprocess_data)\n", + "model_trainer = TrainerForConditionalGeneration(model_name, task, training_params, optimizer_params, model_save_path, max_input_length, max_target_length, dataset_processor.dataset.postprocess_data)\n", "trainer, model = model_trainer.train_and_evaluate(train_dataset, eval_dataset, None)\n", "\n", "# Save the model\n", - "model_save_path = \"turna_summarization_tr_news\"\n", "model.save_pretrained(model_save_path)\n", "dataset_processor.tokenizer.save_pretrained(model_save_path)\n", "```\n" diff --git a/docs/tutorials/inference.ipynb b/docs/tutorials/inference.ipynb index 3a9c31d..cbe7a56 100644 --- a/docs/tutorials/inference.ipynb +++ b/docs/tutorials/inference.ipynb @@ -2,33 +2,33 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "# Inference with `turkish-lm-tuner`" - ], "metadata": { "id": "JeY6-b1PObTg" - } + }, + "source": [ + "# Inference with `turkish-lm-tuner`" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "Qzs86vgGNxMa" + }, "source": [ "## Installation\n", "The library can be installed as follows:\n" - ], - "metadata": { - "id": "Qzs86vgGNxMa" - } + ] }, { "cell_type": "code", - "source": [ - "!pip install git+https://github.com/boun-tabi-LMG/turkish-lm-tuner.git" - ], + "execution_count": null, "metadata": { "id": "Qt1tuXI_MP6a" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!pip install turkish-lm-tuner" + ] }, { "cell_type": "markdown", @@ -49,258 +49,258 @@ }, { "cell_type": "markdown", - "source": [ - "#### Text Generation" - ], "metadata": { "id": "E62WwFMDOCpj" - } + }, + "source": [ + "#### Text Generation" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "enGIbwrRN7F5" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/TURNA', task='generation')\n", "predictor.predict('[S2S] Bir varmış, bir yokmuş, evvel zaman içinde, kalbur saman içinde, uzak diyarların birinde bir turna')" - ], - "metadata": { - "id": "enGIbwrRN7F5" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### Named Entity Recognition" - ], "metadata": { "id": "1ga8TmP6OABA" - } + }, + "source": [ + "#### Named Entity Recognition" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8GZ-YQlFN-hi" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_ner_milliyet', task='ner')\n", "predictor.predict('Ecevit, Irak hükümetinin de Ankara Büyükelçiliği için agreman istediğini belirtti.')" - ], - "metadata": { - "id": "8GZ-YQlFN-hi" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### Text Summarization" - ], "metadata": { "id": "fmatSgmJOWTp" - } + }, + "source": [ + "#### Text Summarization" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_ivo7sbgXFxq" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_summarization_tr_news', task='summarization')\n", "predictor.predict('Kalp krizi geçirenlerin yaklaşık üçte birinin kısa bir süre önce grip atlattığı düşünülüyor. Peki grip virüsü ne yapıyor da kalp krizine yol açıyor? Karpuz şöyle açıkladı: Grip virüsü kanın yapışkanlığını veya pıhtılaşmasını artırıyor.')" - ], - "metadata": { - "id": "_ivo7sbgXFxq" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### Text Categorization\n" - ], "metadata": { "id": "15i0u_j7R3e3" - } + }, + "source": [ + "#### Text Categorization\n" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8LDS7dAPR2b2" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_classification_ttc4900', task='categorization')\n", "predictor.predict('anadolu_efes e 18 lik star ! beko_basketbol_ligi nde iddialı bir kadroyla sezona giren anadolu_efes transfer harekatına devam ediyor avrupa basınında yer alan iddialara göre lacivert beyazlıların son hedefi kk zagreb de forma giyen 1994 doğumlu dario saric hırvat oyuncunun anadolu_efes ile kesin anlaşmaya vardığı iddia edilirken efes in bu transfer için kk zagreb e 550 bin euro ödeyeceği ifade edildi saric in sezon sonuna kadar şu anda kiralık olarak formasını giydiği kk split te kalacağı ve sezon sonunda anadolu_efes e katılacağı belirtildi hırvat basketbolunun gelecek vaadeden isimlerinden biri olarak gösterilen saric 2 05 boyunda ve kısa forvet pozisyonunda görev yapıyor yıldız basketbolcu 2012 18 yaş altı avrupa_basketbol_şampiyonasında hırvatistan ı şampiyonluğa taşımıştı final karşılaşmasında litvanya potasına 39 sayı bırakan saric turnuvayı 25 6 sayı 10 1 ribaund ve 3 3 asist ortalamasıyla tamamlamıştı')" - ], - "metadata": { - "id": "8LDS7dAPR2b2" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### Natural Language Inference" - ], "metadata": { "id": "0ZdtTXfKTnJX" - } + }, + "source": [ + "#### Natural Language Inference" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PyArZhLATp2y" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_nli_nli_tr', task='nli')\n", "hypothesis = \"Temple Bar'da çok sanatçı var.\"\n", "premise = \"Temple Bar herhangi bir müzisyen veya sanatçıdan tamamen yoksundur.\"\n", "predictor.predict(f\"hipotez: {hypothesis} önerme: {premise}\")" - ], - "metadata": { - "id": "PyArZhLATp2y" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "3ncVZ4poTrWL" + }, "source": [ "#### Sentiment Classification\n", "\n", "Models for sentiment classification were fine-tuned on two separate datasets:\n", "- [Product Reviews](https://huggingface.co/datasets/turkish_product_reviews), which contains two labels: \"negatif\" and \"pozitif\"\n", "- [Tweet Sentiment](https://ieeexplore.ieee.org/document/8554037), which contains three labels: \"olumsuz\", \"nötr\", and \"olumlu\"" - ], - "metadata": { - "id": "3ncVZ4poTrWL" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a_pEu75UTw0e" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_classification_17bintweet_sentiment', task='sentiment')\n", "predictor.predict('sonunda bugün kurtuldum senden')" - ], - "metadata": { - "id": "a_pEu75UTw0e" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6SgUeohDUJ73" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_classification_tr_product_reviews', task='sentiment')\n", "predictor.predict('Bu kadar iyi bir ürün kullanmadım!')" - ], - "metadata": { - "id": "6SgUeohDUJ73" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### Part-of-speech Tagging" - ], "metadata": { "id": "dfngLbTmWm_F" - } + }, + "source": [ + "#### Part-of-speech Tagging" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ALeDh3UcW8lp" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_pos_boun', task='pos_tagging')\n", "predictor.predict('Çünkü her kişinin bir başka yolu, bir başka yöntemi olmak gerektir.')" - ], - "metadata": { - "id": "ALeDh3UcW8lp" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### Text Paraphrasing" - ], "metadata": { "id": "FO9IV2PaXJkL" - } + }, + "source": [ + "#### Text Paraphrasing" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GT68kbguXLq4" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_paraphrasing_tatoeba', task='paraphrasing')\n", "predictor.predict('Kalp krizi geçirenlerin yaklaşık üçte birinin kısa bir süre önce grip atlattığı düşünülüyor. ')" - ], - "metadata": { - "id": "GT68kbguXLq4" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### News Title Generation" - ], "metadata": { "id": "GeUiA8q2XaAw" - } + }, + "source": [ + "#### News Title Generation" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q-RZtJJdXeJE" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_title_generation_mlsum', task='title_generation')\n", "predictor.predict('Kalp krizi geçirenlerin yaklaşık üçte birinin kısa bir süre önce grip atlattığı düşünülüyor. Peki grip virüsü ne yapıyor da kalp krizine yol açıyor? Karpuz şöyle açıkladı: Grip virüsü kanın yapışkanlığını veya pıhtılaşmasını artırıyor.')" - ], - "metadata": { - "id": "Q-RZtJJdXeJE" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "#### Semantic Textual Similarity" - ], "metadata": { "id": "qHnmClyhXr6B" - } + }, + "source": [ + "#### Semantic Textual Similarity" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wz6Eos6wXv8Q" + }, + "outputs": [], "source": [ "from turkish_lm_tuner import TextPredictor\n", "predictor = TextPredictor(model_name='boun-tabi-LMG/turna_semantic_similarity_stsb_tr', task='sts')\n", "first_text = \"Bugün okula gitmedim.\"\n", "second_text = \"Ben okula gitmedim bugün.\"\n", "predictor.predict(f\"ilk cümle: {first_text} ikinci cümle: {second_text}\")" - ], - "metadata": { - "id": "wz6Eos6wXv8Q" - }, - "execution_count": null, - "outputs": [] + ] } ], "metadata": { - "language_info": { - "name": "python" - }, "colab": { "provenance": [] }, "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/mkdocs.yml b/mkdocs.yml index 4b12aa7..f7da17c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -16,6 +16,7 @@ nav: - Getting Started: tutorials/getting-started.ipynb - Finetuning: tutorials/finetuning.ipynb - Evaluation: tutorials/evaluation.ipynb + - Inference: tutorials/inference.ipynb - License: license.md