diff --git a/playground/GPT DEV_1_become_one_with_data copy.ipynb b/playground/GPT DEV_1_become_one_with_data copy.ipynb new file mode 100644 index 0000000..77d86e4 --- /dev/null +++ b/playground/GPT DEV_1_become_one_with_data copy.ipynb @@ -0,0 +1,303 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "- [A Recipe for Training Neural Networks\n", + "](https://karpathy.github.io/2019/04/25/recipe/)\n", + "- [Harvard CS197 AI Research Experiences](https://docs.google.com/document/d/1uvAbEhbgS_M-uDMTzmOWRlYxqCkogKRXdbKYYT98ooc/edit#heading=h.2z3yllpny6or)\n", + "- [Unit tests for machine learning research](https://semla.polymtl.ca/wp-content/uploads/2022/11/Pablo-Unit-tests-for-ML-code-SEMLA-talk.pdf)\n", + "- [CS 329S: Machine Learning Systems Design](https://stanford-cs329s.github.io/syllabus.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Become one with the data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "length of dataset in characters: 1115394\n", + "First Citizen:\n", + "Before we proceed any further, hear me speak.\n", + "\n", + "All:\n", + "Speak, speak.\n", + "\n", + "First Citizen:\n", + "You\n" + ] + } + ], + "source": [ + "# !wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\n", + "\n", + "with open('input.txt', 'r', encoding='utf-8') as f:\n", + " text = f.read()\n", + "\n", + "print(\"length of dataset in characters: \", len(text))\n", + "print(text[:100])\n", + "train_data = text[:int(len(text)*0.9)]\n", + "val_data = text[int(len(text)*0.9):]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['First', ' ', 'Citizen:', '\\n', 'Before', ' ', 'we', ' ', 'proceed', ' ', 'any', ' ', 'further,', ' ', 'hear', ' ', 'me', ' ', 'speak.']\n", + "[(' ', 169892), ('\\n', 40000), ('', 7242), ('the', 5437), ('I', 4403)]\n", + "[('open;', 1), ('standing,', 1), ('moving,', 1), ('sleep--die,', 1), (\"wink'st\", 1)]\n", + "splitted 419785 unique_word 25673\n" + ] + } + ], + "source": [ + "import re\n", + "\n", + "def split_string(input_string):\n", + " # 正規表現で改行(\\n)やスペース( )で区切り、それらも結果に含める\n", + " split_list = re.split(r'(\\s)', input_string)\n", + " return split_list\n", + "\n", + "first_period_index = text.index('.')\n", + "print(split_string(text[:first_period_index+1]))\n", + "unique_words = list(set(split_string(text)))\n", + "\n", + "word_count_dict = {}\n", + "for word in split_string(text):\n", + " if word in word_count_dict:\n", + " word_count_dict[word] += 1\n", + " else:\n", + " word_count_dict[word] = 1\n", + "# 多い順に並べ替え\n", + "word_count_dict = dict(sorted(word_count_dict.items(), key=lambda x: -x[1]))\n", + "# 上位・下位5件を表示\n", + "print(list(word_count_dict.items())[:5])\n", + "print(list(word_count_dict.items())[-5:])\n", + "print('splitted', len(split_string(text)), 'unique_word', len(unique_words))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5962, 22307, 25, 198, 8421, 356, 5120, 597, 2252, 11, 3285, 502, 2740, 13]\n", + "['First', ' Citizen', ':', '\\n', 'Before', ' we', ' proceed', ' any', ' further', ',', ' hear', ' me', ' speak', '.']\n", + "[(198, {'count': 39996, 'token_id': '\\n'}), (11, {'count': 19777, 'token_id': ','}), (25, {'count': 10291, 'token_id': ':'}), (13, {'count': 7811, 'token_id': '.'}), (262, {'count': 5370, 'token_id': ' the'})]\n", + "[(16558, {'count': 1, 'token_id': ' sphere'}), (31960, {'count': 1, 'token_id': ' Wond'}), (22194, {'count': 1, 'token_id': ' possesses'}), (29708, {'count': 1, 'token_id': ' eyel'}), (30757, {'count': 1, 'token_id': 'stroke'})]\n", + "splitted 338025 unique_token 11706 vocab_size 50257\n" + ] + } + ], + "source": [ + "import tiktoken\n", + "enc = tiktoken.get_encoding(\"gpt2\")\n", + "encoded_ids = enc.encode(text[:first_period_index+1])\n", + "decoded_text = [enc.decode([encoded_id]) for encoded_id in encoded_ids]\n", + "print(encoded_ids)\n", + "print(decoded_text)\n", + "\n", + "\n", + "unique_tokens = list(set(enc.encode(text)))\n", + "\n", + "token_count_dict = {}\n", + "for token in enc.encode(text):\n", + " if token in token_count_dict:\n", + " token_count_dict[token]['count'] += 1\n", + " else:\n", + " token_count_dict[token] = {'count': 1, 'token_id': enc.decode([token])}\n", + "# 多い順に並べ替え\n", + "token_count_dict = dict(sorted(token_count_dict.items(), key=lambda x: -x[1]['count']))\n", + "# 上位・下位5件を表示\n", + "print(list(token_count_dict.items())[:5])\n", + "print(list(token_count_dict.items())[-5:])\n", + "print('splitted', len(enc.encode(text)), 'unique_token', len(unique_tokens), 'vocab_size', enc.n_vocab)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "params of unigram 50257\n", + "2525766049 126937424324593\n" + ] + } + ], + "source": [ + "from ngram import Ngram\n", + "vocab = list(range(enc.n_vocab))\n", + "unigram = Ngram(1, vocab)\n", + "tokens = enc.encode(text)\n", + "unigram.train(tokens)\n", + "print('params of unigram', len(unigram.ngram)) \n", + "\n", + "\n", + "print(enc.n_vocab ** 2, enc.n_vocab ** 3)\n", + "# bigram = Ngram(2, vocab)\n", + "# bigram.train(tokens)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('\\n', 39997), (',', 19778), (':', 10292), ('.', 7812), (' the', 5371)]\n", + "[('ominated', 1), (' regress', 1), (' Collider', 1), (' informants', 1), ('<|endoftext|>', 1)]\n" + ] + } + ], + "source": [ + "# 上位・下位5件を表示\n", + "unigram_info = unigram.ngram\n", + "unigram_info = dict(sorted(unigram_info.items(), key=lambda x: -x[1]))\n", + "top_unigram = list(unigram_info.items())[:5]\n", + "bottom_unigram = list(unigram_info.items())[-5:]\n", + "print([(enc.decode([token[0]]), count) for token, count in top_unigram])\n", + "print([(enc.decode([token[0]]), count) for token, count in bottom_unigram])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50257" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "enc.n_vocab" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input\n", + "torch.Size([4, 8])\n", + "tensor([[ 198, 30313, 262, 22397, 282, 290, 884, 3790],\n", + " [ 4151, 438, 198, 10418, 329, 511, 11989, 11],\n", + " [ 3355, 322, 12105, 287, 3426, 6729, 198, 3886],\n", + " [ 290, 15581, 8636, 13, 198, 198, 35510, 4221]])\n", + "[['\\n', 'Except', ' the', ' marsh', 'al', ' and', ' such', ' officers'], [' eye', '--', '\\n', 'Men', ' for', ' their', ' sons', ','], [' wall', 'ow', ' naked', ' in', ' December', ' snow', '\\n', 'By'], [' and', ' noble', ' estimate', '.', '\\n', '\\n', 'NOR', 'TH']]\n", + "target\n", + "torch.Size([4, 8])\n", + "tensor([[30313, 262, 22397, 282, 290, 884, 3790, 198],\n", + " [ 438, 198, 10418, 329, 511, 11989, 11, 17743],\n", + " [ 322, 12105, 287, 3426, 6729, 198, 3886, 3612],\n", + " [15581, 8636, 13, 198, 198, 35510, 4221, 5883]])\n", + "[['Except', ' the', ' marsh', 'al', ' and', ' such', ' officers', '\\n'], ['--', '\\n', 'Men', ' for', ' their', ' sons', ',', ' wives'], ['ow', ' naked', ' in', ' December', ' snow', '\\n', 'By', ' thinking'], [' noble', ' estimate', '.', '\\n', '\\n', 'NOR', 'TH', 'UM']]\n", + "input: ['\\n'] target: 'Except'\n", + "input: ['\\n', 'Except'] target: ' the'\n", + "input: ['\\n', 'Except', ' the'] target: ' marsh'\n", + "input: ['\\n', 'Except', ' the', ' marsh'] target: 'al'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al'] target: ' and'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al', ' and'] target: ' such'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al', ' and', ' such'] target: ' officers'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al', ' and', ' such', ' officers'] target: '\\n'\n" + ] + } + ], + "source": [ + "import torch\n", + "seed = 1337\n", + "torch.manual_seed(seed) \n", + "batch_size = 4\n", + "context_length = 8\n", + "data = torch.tensor(enc.encode(text), dtype=torch.long)\n", + "n = int(0.9*len(data)) # first 90% will be train, rest val\n", + "train_data = data[:n]\n", + "val_data = data[n:]\n", + "\n", + "\n", + "def get_batch(split):\n", + " data = train_data if split == 'train' else val_data\n", + " index = torch.randint(len(data) - context_length, (batch_size,))\n", + " x = torch.stack([data[i:i+context_length] for i in index])\n", + " y = torch.stack([data[i+1:i+1+context_length] for i in index])\n", + " return x, y\n", + "\n", + "\n", + "x, y = get_batch('train')\n", + "print('input')\n", + "print(x.shape)\n", + "print(x)\n", + "print([[enc.decode([token])for token in sequence] for sequence in x])\n", + "print('target')\n", + "print(y.shape)\n", + "print(y)\n", + "print([[enc.decode([token])for token in sequence] for sequence in y])\n", + "\n", + "for t in range(context_length):\n", + " context = x[0, :t+1]\n", + " target = y[0, t]\n", + " print('input: ', [enc.decode([token]) for token in context], 'target: ', repr(enc.decode([target])))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/playground/GPT DEV_1_become_one_with_data.ipynb b/playground/GPT DEV_1_become_one_with_data.ipynb new file mode 100644 index 0000000..77d86e4 --- /dev/null +++ b/playground/GPT DEV_1_become_one_with_data.ipynb @@ -0,0 +1,303 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "- [A Recipe for Training Neural Networks\n", + "](https://karpathy.github.io/2019/04/25/recipe/)\n", + "- [Harvard CS197 AI Research Experiences](https://docs.google.com/document/d/1uvAbEhbgS_M-uDMTzmOWRlYxqCkogKRXdbKYYT98ooc/edit#heading=h.2z3yllpny6or)\n", + "- [Unit tests for machine learning research](https://semla.polymtl.ca/wp-content/uploads/2022/11/Pablo-Unit-tests-for-ML-code-SEMLA-talk.pdf)\n", + "- [CS 329S: Machine Learning Systems Design](https://stanford-cs329s.github.io/syllabus.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Become one with the data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "length of dataset in characters: 1115394\n", + "First Citizen:\n", + "Before we proceed any further, hear me speak.\n", + "\n", + "All:\n", + "Speak, speak.\n", + "\n", + "First Citizen:\n", + "You\n" + ] + } + ], + "source": [ + "# !wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\n", + "\n", + "with open('input.txt', 'r', encoding='utf-8') as f:\n", + " text = f.read()\n", + "\n", + "print(\"length of dataset in characters: \", len(text))\n", + "print(text[:100])\n", + "train_data = text[:int(len(text)*0.9)]\n", + "val_data = text[int(len(text)*0.9):]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['First', ' ', 'Citizen:', '\\n', 'Before', ' ', 'we', ' ', 'proceed', ' ', 'any', ' ', 'further,', ' ', 'hear', ' ', 'me', ' ', 'speak.']\n", + "[(' ', 169892), ('\\n', 40000), ('', 7242), ('the', 5437), ('I', 4403)]\n", + "[('open;', 1), ('standing,', 1), ('moving,', 1), ('sleep--die,', 1), (\"wink'st\", 1)]\n", + "splitted 419785 unique_word 25673\n" + ] + } + ], + "source": [ + "import re\n", + "\n", + "def split_string(input_string):\n", + " # 正規表現で改行(\\n)やスペース( )で区切り、それらも結果に含める\n", + " split_list = re.split(r'(\\s)', input_string)\n", + " return split_list\n", + "\n", + "first_period_index = text.index('.')\n", + "print(split_string(text[:first_period_index+1]))\n", + "unique_words = list(set(split_string(text)))\n", + "\n", + "word_count_dict = {}\n", + "for word in split_string(text):\n", + " if word in word_count_dict:\n", + " word_count_dict[word] += 1\n", + " else:\n", + " word_count_dict[word] = 1\n", + "# 多い順に並べ替え\n", + "word_count_dict = dict(sorted(word_count_dict.items(), key=lambda x: -x[1]))\n", + "# 上位・下位5件を表示\n", + "print(list(word_count_dict.items())[:5])\n", + "print(list(word_count_dict.items())[-5:])\n", + "print('splitted', len(split_string(text)), 'unique_word', len(unique_words))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5962, 22307, 25, 198, 8421, 356, 5120, 597, 2252, 11, 3285, 502, 2740, 13]\n", + "['First', ' Citizen', ':', '\\n', 'Before', ' we', ' proceed', ' any', ' further', ',', ' hear', ' me', ' speak', '.']\n", + "[(198, {'count': 39996, 'token_id': '\\n'}), (11, {'count': 19777, 'token_id': ','}), (25, {'count': 10291, 'token_id': ':'}), (13, {'count': 7811, 'token_id': '.'}), (262, {'count': 5370, 'token_id': ' the'})]\n", + "[(16558, {'count': 1, 'token_id': ' sphere'}), (31960, {'count': 1, 'token_id': ' Wond'}), (22194, {'count': 1, 'token_id': ' possesses'}), (29708, {'count': 1, 'token_id': ' eyel'}), (30757, {'count': 1, 'token_id': 'stroke'})]\n", + "splitted 338025 unique_token 11706 vocab_size 50257\n" + ] + } + ], + "source": [ + "import tiktoken\n", + "enc = tiktoken.get_encoding(\"gpt2\")\n", + "encoded_ids = enc.encode(text[:first_period_index+1])\n", + "decoded_text = [enc.decode([encoded_id]) for encoded_id in encoded_ids]\n", + "print(encoded_ids)\n", + "print(decoded_text)\n", + "\n", + "\n", + "unique_tokens = list(set(enc.encode(text)))\n", + "\n", + "token_count_dict = {}\n", + "for token in enc.encode(text):\n", + " if token in token_count_dict:\n", + " token_count_dict[token]['count'] += 1\n", + " else:\n", + " token_count_dict[token] = {'count': 1, 'token_id': enc.decode([token])}\n", + "# 多い順に並べ替え\n", + "token_count_dict = dict(sorted(token_count_dict.items(), key=lambda x: -x[1]['count']))\n", + "# 上位・下位5件を表示\n", + "print(list(token_count_dict.items())[:5])\n", + "print(list(token_count_dict.items())[-5:])\n", + "print('splitted', len(enc.encode(text)), 'unique_token', len(unique_tokens), 'vocab_size', enc.n_vocab)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "params of unigram 50257\n", + "2525766049 126937424324593\n" + ] + } + ], + "source": [ + "from ngram import Ngram\n", + "vocab = list(range(enc.n_vocab))\n", + "unigram = Ngram(1, vocab)\n", + "tokens = enc.encode(text)\n", + "unigram.train(tokens)\n", + "print('params of unigram', len(unigram.ngram)) \n", + "\n", + "\n", + "print(enc.n_vocab ** 2, enc.n_vocab ** 3)\n", + "# bigram = Ngram(2, vocab)\n", + "# bigram.train(tokens)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('\\n', 39997), (',', 19778), (':', 10292), ('.', 7812), (' the', 5371)]\n", + "[('ominated', 1), (' regress', 1), (' Collider', 1), (' informants', 1), ('<|endoftext|>', 1)]\n" + ] + } + ], + "source": [ + "# 上位・下位5件を表示\n", + "unigram_info = unigram.ngram\n", + "unigram_info = dict(sorted(unigram_info.items(), key=lambda x: -x[1]))\n", + "top_unigram = list(unigram_info.items())[:5]\n", + "bottom_unigram = list(unigram_info.items())[-5:]\n", + "print([(enc.decode([token[0]]), count) for token, count in top_unigram])\n", + "print([(enc.decode([token[0]]), count) for token, count in bottom_unigram])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50257" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "enc.n_vocab" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input\n", + "torch.Size([4, 8])\n", + "tensor([[ 198, 30313, 262, 22397, 282, 290, 884, 3790],\n", + " [ 4151, 438, 198, 10418, 329, 511, 11989, 11],\n", + " [ 3355, 322, 12105, 287, 3426, 6729, 198, 3886],\n", + " [ 290, 15581, 8636, 13, 198, 198, 35510, 4221]])\n", + "[['\\n', 'Except', ' the', ' marsh', 'al', ' and', ' such', ' officers'], [' eye', '--', '\\n', 'Men', ' for', ' their', ' sons', ','], [' wall', 'ow', ' naked', ' in', ' December', ' snow', '\\n', 'By'], [' and', ' noble', ' estimate', '.', '\\n', '\\n', 'NOR', 'TH']]\n", + "target\n", + "torch.Size([4, 8])\n", + "tensor([[30313, 262, 22397, 282, 290, 884, 3790, 198],\n", + " [ 438, 198, 10418, 329, 511, 11989, 11, 17743],\n", + " [ 322, 12105, 287, 3426, 6729, 198, 3886, 3612],\n", + " [15581, 8636, 13, 198, 198, 35510, 4221, 5883]])\n", + "[['Except', ' the', ' marsh', 'al', ' and', ' such', ' officers', '\\n'], ['--', '\\n', 'Men', ' for', ' their', ' sons', ',', ' wives'], ['ow', ' naked', ' in', ' December', ' snow', '\\n', 'By', ' thinking'], [' noble', ' estimate', '.', '\\n', '\\n', 'NOR', 'TH', 'UM']]\n", + "input: ['\\n'] target: 'Except'\n", + "input: ['\\n', 'Except'] target: ' the'\n", + "input: ['\\n', 'Except', ' the'] target: ' marsh'\n", + "input: ['\\n', 'Except', ' the', ' marsh'] target: 'al'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al'] target: ' and'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al', ' and'] target: ' such'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al', ' and', ' such'] target: ' officers'\n", + "input: ['\\n', 'Except', ' the', ' marsh', 'al', ' and', ' such', ' officers'] target: '\\n'\n" + ] + } + ], + "source": [ + "import torch\n", + "seed = 1337\n", + "torch.manual_seed(seed) \n", + "batch_size = 4\n", + "context_length = 8\n", + "data = torch.tensor(enc.encode(text), dtype=torch.long)\n", + "n = int(0.9*len(data)) # first 90% will be train, rest val\n", + "train_data = data[:n]\n", + "val_data = data[n:]\n", + "\n", + "\n", + "def get_batch(split):\n", + " data = train_data if split == 'train' else val_data\n", + " index = torch.randint(len(data) - context_length, (batch_size,))\n", + " x = torch.stack([data[i:i+context_length] for i in index])\n", + " y = torch.stack([data[i+1:i+1+context_length] for i in index])\n", + " return x, y\n", + "\n", + "\n", + "x, y = get_batch('train')\n", + "print('input')\n", + "print(x.shape)\n", + "print(x)\n", + "print([[enc.decode([token])for token in sequence] for sequence in x])\n", + "print('target')\n", + "print(y.shape)\n", + "print(y)\n", + "print([[enc.decode([token])for token in sequence] for sequence in y])\n", + "\n", + "for t in range(context_length):\n", + " context = x[0, :t+1]\n", + " target = y[0, t]\n", + " print('input: ', [enc.decode([token]) for token in context], 'target: ', repr(enc.decode([target])))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/playground/GPT DEV_2_e2epipeline_baseline.ipynb b/playground/GPT DEV_2_e2epipeline_baseline.ipynb new file mode 100644 index 0000000..e42834d --- /dev/null +++ b/playground/GPT DEV_2_e2epipeline_baseline.ipynb @@ -0,0 +1,417 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "- [A Recipe for Training Neural Networks\n", + "](https://karpathy.github.io/2019/04/25/recipe/)\n", + "- [Harvard CS197 AI Research Experiences](https://docs.google.com/document/d/1uvAbEhbgS_M-uDMTzmOWRlYxqCkogKRXdbKYYT98ooc/edit#heading=h.2z3yllpny6or)\n", + "- [Unit tests for machine learning research](https://semla.polymtl.ca/wp-content/uploads/2022/11/Pablo-Unit-tests-for-ML-code-SEMLA-talk.pdf)\n", + "- [CS 329S: Machine Learning Systems Design](https://stanford-cs329s.github.io/syllabus.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up the end-to-end training/evaluation skeleton + get dumb baselines" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "from torch.nn import functional as F\n", + "\n", + "torch.manual_seed(1337)\n", + "\n", + "class BigramLanguageModel(nn.Module):\n", + " def __init__(self, vocab_size):\n", + " super().__init__()\n", + " # self.bigram_table = nn.Embedding(vocab_size, vocab_size)\n", + " self.token_embedding_table = nn.Embedding(vocab_size, 16)\n", + " self.linear = nn.Linear(16, vocab_size)\n", + " print('number of parameters:', sum(p.numel() for p in self.parameters()))\n", + " \n", + " def forward(self, token_indexes):\n", + " # token_index: (batch_size, sequence_length)\n", + " # logits = self.bigram_table(token_indexes)\n", + "\n", + " embedding = self.token_embedding_table(token_indexes)\n", + " logits = self.linear(embedding)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " return logits\n", + "\n", + " def loss_per_token(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length),\n", + " reduction='none'\n", + " )\n", + " # loss: (batch_size*sequence_length)\n", + " return loss.view(batch_size, sequence_length)\n", + " \n", + " def loss(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length)\n", + " )\n", + " # loss: scalar\n", + " return loss\n", + " \n", + " def generate(self, token_indexes, max_new_tokens):\n", + " # token_indexes: (batch_size, sequence_length)\n", + " batch_size, sequence_length = token_indexes.shape\n", + " for _ in range(max_new_tokens):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " next_token_logits = logits[:, -1, :]\n", + " # next_token_logits: (batch_size, vocab_size)\n", + " next_token_probs = F.softmax(next_token_logits, dim=-1)\n", + " # next_token_probs: (batch_size, vocab_size)\n", + " next_token = torch.multinomial(next_token_probs, num_samples=1)\n", + " # next_token: (batch_size, 1)\n", + " token_indexes = torch.cat([token_indexes, next_token], dim=1)\n", + " # token_indexes: (batch_size, sequence_length+1)\n", + " return token_indexes\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def rand_int_test(cls, low, high, shape, kwargs):\n", + " layer = cls(**kwargs).cuda()\n", + " random_input = torch.randint(low, high, shape).cuda()\n", + " print('input shape:', random_input.shape)\n", + " output = layer(random_input)\n", + " print('output shape:', output.shape)\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of parameters: 8448\n", + "input shape: torch.Size([4, 1024])\n", + "output shape: torch.Size([4, 1024, 256])\n" + ] + } + ], + "source": [ + "test_cls = BigramLanguageModel\n", + "batch_size = 4\n", + "context_length = 1024\n", + "vocab_size = 256\n", + "\n", + "kwargs = {'vocab_size': vocab_size}\n", + "output = rand_int_test(test_cls, 0, vocab_size, (batch_size, context_length), kwargs)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of parameters: 1658481\n", + "random guess loss: 10.82490511970208\n", + "tensor(10.9950, device='cuda:0', grad_fn=)\n", + "torch.Size([4, 1024]) tensor(10.9950, device='cuda:0', grad_fn=)\n", + "tensor([[11.4311, 11.6552, 10.2010, ..., 10.5417, 10.6344, 11.4137],\n", + " [11.0913, 10.3161, 10.8965, ..., 11.6884, 11.4491, 10.4440],\n", + " [12.3048, 10.9655, 10.6260, ..., 10.9756, 11.2433, 10.6060],\n", + " [10.5069, 10.6218, 11.0385, ..., 11.9397, 10.6035, 10.4034]],\n", + " device='cuda:0', grad_fn=)\n" + ] + } + ], + "source": [ + "from data import get_batch, enc\n", + "import tiktoken\n", + "import math\n", + "\n", + "x, y = get_batch(batch_size, context_length, 'train')\n", + "vocab_size = tiktoken.get_encoding(\"gpt2\").n_vocab\n", + "model = BigramLanguageModel(vocab_size).cuda()\n", + "loss = model.loss(x.cuda(), y.cuda())\n", + "print('random guess loss:', -math.log(1/vocab_size))\n", + "print(loss)\n", + "loss_per_token = model.loss_per_token(x.cuda(), y.cuda())\n", + "print(loss_per_token.shape, loss_per_token.mean())\n", + "print(loss_per_token)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input [' must', '\\n', 'In', ' that']\n", + "output [' must', '\\n', 'In', ' that', ' Calculator', ' HPV', 'Empty', ' LW', ' Seconds', ' Infinite', ' payoff', 'ste']\n", + "Gold label [' must', '\\n', 'In', ' that', ' be', ' made', ' more', ' bitter', '.', ' Fear', ' o', \"'\", 'ers', 'h', 'ades', ' me', ':', '\\n', 'Good', ' expedition', ' be', ' my', ' friend', ',', ' and', ' comfort', '\\n', 'The', ' gracious', ' queen', ',', ' part', ' of', ' his', ' theme', ',', ' but', ' nothing', '\\n', 'Of', ' his', ' ill', '-', 'ta', \"'\", 'en', ' suspicion', '!', ' Come', ',', ' Cam', 'illo', ';', '\\n', 'I', ' will', ' respect', ' thee', ' as', ' a', ' father', ' if', '\\n', 'Th', 'ou', ' bear', \"'s\", 't', ' my', ' life', ' off', ' hence', ':', ' let', ' us', ' avoid', '.', '\\n', '\\n', 'C', 'AM', 'ILL', 'O', ':', '\\n', 'It', ' is', ' in', ' mine', ' authority', ' to', ' command', '\\n', 'The', ' keys', ' of', ' all', ' the', ' post', 'ern', 's', ':', ' please', ' your', ' high', 'ness', '\\n', 'To', ' take', ' the', ' urgent', ' hour', '.', ' Come', ',', ' sir', ',', ' away', '.', '\\n', '\\n', 'HER', 'M', 'ION', 'E', ':', '\\n', 'Take', ' the', ' boy', ' to', ' you', ':', ' he', ' so', ' troubles', ' me', ',', '\\n', \"'\", 'T', 'is', ' past', ' enduring', '.', '\\n', '\\n', 'First', ' Lady', ':', '\\n', 'Come', ',', ' my', ' gracious', ' lord', ',', '\\n', 'Sh', 'all', ' I', ' be', ' your', ' play', 'f', 'ellow', '?', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'No', ',', ' I', \"'ll\", ' none', ' of', ' you', '.', '\\n', '\\n', 'First', ' Lady', ':', '\\n', 'Why', ',', ' my', ' sweet', ' lord', '?', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'You', \"'ll\", ' kiss', ' me', ' hard', ' and', ' speak', ' to', ' me', ' as', ' if', '\\n', 'I', ' were', ' a', ' baby', ' still', '.', ' I', ' love', ' you', ' better', '.', '\\n', '\\n', 'Second', ' Lady', ':', '\\n', 'And', ' why', ' so', ',', ' my', ' lord', '?', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'Not', ' for', ' because', '\\n', 'Your', ' brow', 's', ' are', ' black', 'er', ';', ' yet', ' black', ' brow', 's', ',', ' they', ' say', ',', '\\n', 'Bec', 'ome', ' some', ' women', ' best', ',', ' so', ' that', ' there', ' be', ' not', '\\n', 'Too', ' much', ' hair', ' there', ',', ' but', ' in', ' a', ' semic', 'irc', 'le', '\\n', 'Or', ' a', ' half', '-', 'moon', ' made', ' with', ' a', ' pen', '.', '\\n', '\\n', 'Second', ' Lady', ':', '\\n', 'Who', ' taught', ' you', ' this', '?', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'I', ' learnt', ' it', ' out', ' of', ' women', \"'s\", ' faces', '.', ' Pr', 'ay', ' now', '\\n', 'What', ' colour', ' are', ' your', ' eyebrows', '?', '\\n', '\\n', 'First', ' Lady', ':', '\\n', 'Blue', ',', ' my', ' lord', '.', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'N', 'ay', ',', ' that', \"'s\", ' a', ' mock', ':', ' I', ' have', ' seen', ' a', ' lady', \"'s\", ' nose', '\\n', 'That', ' has', ' been', ' blue', ',', ' but', ' not', ' her', ' eyebrows', '.', '\\n', '\\n', 'First', ' Lady', ':', '\\n', 'H', 'ark', ' ye', ';', '\\n', 'The', ' queen', ' your', ' mother', ' rounds', ' ap', 'ace', ':', ' we', ' shall', '\\n', 'Present', ' our', ' services', ' to', ' a', ' fine', ' new', ' prince', '\\n', 'One', ' of', ' these', ' days', ';', ' and', ' then', ' you', \"'\", 'ld', ' want', 'on', ' with', ' us', ',', '\\n', 'If', ' we', ' would', ' have', ' you', '.', '\\n', '\\n', 'Second', ' Lady', ':', '\\n', 'She', ' is', ' spread', ' of', ' late', '\\n', 'Int', 'o', ' a', ' good', 'ly', ' bulk', ':', ' good', ' time', ' encounter', ' her', '!', '\\n', '\\n', 'HER', 'M', 'ION', 'E', ':', '\\n', 'What', ' wisdom', ' stir', 's', ' amongst', ' you', '?', ' Come', ',', ' sir', ',', ' now', '\\n', 'I', ' am', ' for', ' you', ' again', ':', ' pray', ' you', ',', ' sit', ' by', ' us', ',', '\\n', 'And', ' tell', \" '\", 's', ' a', ' tale', '.', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'M', 'erry', ' or', ' sad', ' shall', \"'t\", ' be', '?', '\\n', '\\n', 'HER', 'M', 'ION', 'E', ':', '\\n', 'As', ' merry', ' as', ' you', ' will', '.', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'A', ' sad', ' tale', \"'s\", ' best', ' for', ' winter', ':', ' I', ' have', ' one', '\\n', 'Of', ' sprites', ' and', ' goblins', '.', '\\n', '\\n', 'HER', 'M', 'ION', 'E', ':', '\\n', 'Let', \"'s\", ' have', ' that', ',', ' good', ' sir', '.', '\\n', 'Come', ' on', ',', ' sit', ' down', ':', ' come', ' on', ',', ' and', ' do', ' your', ' best', '\\n', 'To', ' fright', ' me', ' with', ' your', ' sprites', ';', ' you', \"'re\", ' powerful', ' at', ' it', '.', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'There', ' was', ' a', ' man', '--', '\\n', '\\n', 'HER', 'M', 'ION', 'E', ':', '\\n', 'N', 'ay', ',', ' come', ',', ' sit', ' down', ';', ' then', ' on', '.', '\\n', '\\n', 'M', 'AM', 'ILL', 'I', 'US', ':', '\\n', 'D', 'w', 'elt', ' by', ' a', ' church', 'yard', ':', ' I', ' will', ' tell', ' it', ' softly', ';', '\\n', 'Y', 'ond', ' cr', 'ickets', ' shall', ' not', ' hear', ' it', '.', '\\n', '\\n', 'HER', 'M', 'ION', 'E', ':', '\\n', 'Come', ' on', ',', ' then', ',', '\\n', 'And', ' give', \"'t\", ' me', ' in', ' mine', ' ear', '.', '\\n', '\\n', 'LE', 'ONT', 'ES', ':', '\\n', 'Was', ' he', ' met', ' there', '?', ' his', ' train', '?', ' Cam', 'illo', ' with', ' him', '?', '\\n', '\\n', 'First', ' Lord', ':', '\\n', 'Behind', ' the', ' tu', 'ft', ' of', ' p', 'ines', ' I', ' met', ' them', ';', ' never', '\\n', 'S', 'aw', ' I', ' men', ' sc', 'our', ' so', ' on', ' their', ' way', ':', ' I', ' eyed', ' them', '\\n', 'Even', ' to', ' their', ' ships', '.', '\\n', '\\n', 'LE', 'ONT', 'ES', ':', '\\n', 'How', ' bl', 'est', ' am', ' I', '\\n', 'In', ' my', ' just', ' cens', 'ure', ',', ' in', ' my', ' true', ' opinion', '!', '\\n', 'Al', 'ack', ',', ' for', ' lesser', ' knowledge', '!', ' how', ' acc', 'ursed', '\\n', 'In', ' being', ' so', ' bl', 'est', '!', ' There', ' may', ' be', ' in', ' the', ' cup', '\\n', 'A', ' spider', ' steep', \"'d\", ',', ' and', ' one', ' may', ' drink', ',', ' depart', ',', '\\n', 'And', ' yet', ' partake', ' no', ' venom', ',', ' for', ' his', ' knowledge', '\\n', 'Is', ' not', ' infected', ':', ' but', ' if', ' one', ' present', '\\n', 'The', ' abhor', 'r', \"'d\", ' ingredient', ' to', ' his', ' eye', ',', ' make', ' known', '\\n', 'How', ' he', ' hath', ' drunk', ',', ' he', ' cracks', ' his', ' gorge', ',', ' his', ' sides', ',', '\\n', 'With', ' violent', ' he', 'fts', '.', ' I', ' have', ' drunk', ',', '\\n', 'and', ' seen', ' the', ' spider', '.', '\\n', 'Cam', 'illo', ' was', ' his', ' help', ' in', ' this', ',', ' his', ' p', 'ander', ':', '\\n', 'There', ' is', ' a', ' plot', ' against', ' my', ' life', ',', ' my', ' crown', ';', '\\n', 'All', \"'s\", ' true', ' that', ' is', ' mist', 'r', 'usted', ':', ' that', ' false', ' villain', '\\n', 'Wh', 'om', ' I', ' employ', \"'d\", ' was', ' pre', '-', 'employ', \"'d\", ' by', ' him', ':', '\\n', 'He', ' has', ' discover', \"'d\", ' my', ' design', ',', ' and', ' I', '\\n', 'Rem', 'ain', ' a', ' pinch', \"'d\", ' thing', ';', ' yea', ',', ' a', ' very', ' trick', '\\n', 'For', ' them', ' to', ' play', ' at', ' will', '.', ' How', ' came', ' the', ' post', 'ern', 's', '\\n', 'So', ' easily', ' open', '?', '\\n', '\\n', 'First', ' Lord', ':', '\\n', 'By', ' his', ' great', ' authority', ';', '\\n', 'Which', ' often', ' hath', ' no', ' less', ' prevail', \"'d\", ' than', ' so', '\\n', 'On', ' your', ' command', '.', '\\n', '\\n', 'LE', 'ONT', 'ES', ':', '\\n', 'I', ' know', \"'t\", ' too', ' well', '.', '\\n', 'Give', ' me', ' the', ' boy', ':', ' I', ' am', ' glad', ' you', ' did', ' not', ' nurse']\n" + ] + } + ], + "source": [ + "input_tokens = x[0, :4].unsqueeze(0).cuda()\n", + "max_new_token = 8\n", + "generated_tokens = model.generate(input_tokens, max_new_token)\n", + "print('input', [enc.decode([i.item()]) for i in input_tokens[0]])\n", + "print('output', [enc.decode([i.item()]) for i in generated_tokens[0]])\n", + "print('Gold label', [enc.decode([i.item()]) for i in x[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "steps: 0 loss: 11.002180099487305\n", + "steps: 100 loss: 10.177762985229492\n", + "steps: 200 loss: 9.035687446594238\n", + "steps: 300 loss: 7.741647243499756\n", + "steps: 400 loss: 6.681490898132324\n", + "steps: 499 loss: 6.057015895843506\n" + ] + } + ], + "source": [ + "optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)\n", + "batch_size = 32\n", + "context_length = 1024\n", + "iterations = 500\n", + "for steps in range(iterations):\n", + " x, y = get_batch(batch_size, context_length, 'train')\n", + " # print(x[0], y[0])\n", + " x, y = x.cuda(), y.cuda()\n", + " loss = model.loss(x, y)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " if steps % 100 == 0:\n", + " print('steps:', steps, 'loss:', loss.item())\n", + "print('steps:', steps, 'loss:', loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input ['I', ' have', ' a', ' motion']\n", + "output ['I', ' have', ' a', ' motion', ' we', ' Glou', ' depletion', 'Pont', 'ndra', ' vividly', 'object', 'Red']\n", + "Gold label ['I', ' have', ' a', ' motion', ' much', ' imports', ' your', ' good', ';', '\\n', 'Whe', 'ret', 'o', ' if', ' you', \"'ll\", ' a', ' willing', ' ear', ' incl', 'ine', ',', '\\n', 'What', \"'s\", ' mine', ' is', ' yours', ' and', ' what', ' is', ' yours', ' is', ' mine', '.', '\\n', 'So', ',', ' bring', ' us', ' to', ' our', ' palace', ';', ' where', ' we', \"'ll\", ' show', '\\n', 'What', \"'s\", ' yet', ' behind', ',', ' that', \"'s\", ' meet', ' you', ' all', ' should', ' know', '.', '\\n', '\\n', 'SL', 'Y', ':', '\\n', 'I', \"'ll\", ' p', 'hee', 'ze', ' you', ',', ' in', ' faith', '.', '\\n', '\\n', 'Host', 'ess', ':', '\\n', 'A', ' pair', ' of', ' stocks', ',', ' you', ' rogue', '!', '\\n', '\\n', 'SL', 'Y', ':', '\\n', 'Ye', ' are', ' a', ' baggage', ':', ' the', ' S', 'lys', ' are', ' no', ' rog', 'ues', ';', ' look', ' in', '\\n', 'the', ' chron', 'icles', ';', ' we', ' came', ' in', ' with', ' Richard', ' Conquer', 'or', '.', '\\n', 'Therefore', ' p', 'aucas', ' pall', 'ab', 'ris', ';', ' let', ' the', ' world', ' slide', ':', ' s', 'essa', '!', '\\n', '\\n', 'Host', 'ess', ':', '\\n', 'You', ' will', ' not', ' pay', ' for', ' the', ' glasses', ' you', ' have', ' burst', '?', '\\n', '\\n', 'SL', 'Y', ':', '\\n', 'No', ',', ' not', ' a', ' den', 'ier', '.', ' Go', ' by', ',', ' Jer', 'on', 'im', 'y', ':', ' go', ' to', ' thy', ' cold', '\\n', 'bed', ',', ' and', ' warm', ' thee', '.', '\\n', '\\n', 'Host', 'ess', ':', '\\n', 'I', ' know', ' my', ' remedy', ';', ' I', ' must', ' go', ' fetch', ' the', '\\n', 'third', '--', 'borough', '.', '\\n', '\\n', 'SL', 'Y', ':', '\\n', 'Third', ',', ' or', ' fourth', ',', ' or', ' fifth', ' borough', ',', ' I', \"'ll\", ' answer', ' him', '\\n', 'by', ' law', ':', ' I', \"'ll\", ' not', ' bud', 'ge', ' an', ' inch', ',', ' boy', ':', ' let', ' him', ' come', ',', '\\n', 'and', ' kindly', '.', '\\n', '\\n', 'Lord', ':', '\\n', 'Hun', 'ts', 'man', ',', ' I', ' charge', ' thee', ',', ' tender', ' well', ' my', ' h', 'ounds', ':', '\\n', 'Br', 'ach', ' Mer', 'rim', 'an', ',', ' the', ' poor', ' cur', ' is', ' emb', 'oss', \"'d\", ';', '\\n', 'And', ' couple', ' Cl', 'owder', ' with', ' the', ' deep', '--', 'mouth', \"'d\", ' br', 'ach', '.', '\\n', 'S', 'aw', \"'s\", 't', ' thou', ' not', ',', ' boy', ',', ' how', ' Silver', ' made', ' it', ' good', '\\n', 'At', ' the', ' hedge', '-', 'cor', 'ner', ',', ' in', ' the', ' cold', 'est', ' fault', '?', '\\n', 'I', ' would', ' not', ' lose', ' the', ' dog', ' for', ' twenty', ' pound', '.', '\\n', '\\n', 'First', ' Hunts', 'man', ':', '\\n', 'Why', ',', ' Bel', 'man', ' is', ' as', ' good', ' as', ' he', ',', ' my', ' lord', ';', '\\n', 'He', ' cried', ' upon', ' it', ' at', ' the', ' me', 'rest', ' loss', '\\n', 'And', ' twice', ' to', '-', 'day', ' pick', \"'d\", ' out', ' the', ' dull', 'est', ' scent', ':', '\\n', 'Trust', ' me', ',', ' I', ' take', ' him', ' for', ' the', ' better', ' dog', '.', '\\n', '\\n', 'Lord', ':', '\\n', 'Th', 'ou', ' art', ' a', ' fool', ':', ' if', ' Echo', ' were', ' as', ' fleet', ',', '\\n', 'I', ' would', ' esteem', ' him', ' worth', ' a', ' dozen', ' such', '.', '\\n', 'But', ' sup', ' them', ' well', ' and', ' look', ' unto', ' them', ' all', ':', '\\n', 'To', '-', 'morrow', ' I', ' intend', ' to', ' hunt', ' again', '.', '\\n', '\\n', 'First', ' Hunts', 'man', ':', '\\n', 'I', ' will', ',', ' my', ' lord', '.', '\\n', '\\n', 'Lord', ':', '\\n', 'What', \"'s\", ' here', '?', ' one', ' dead', ',', ' or', ' drunk', '?', ' See', ',', ' d', 'oth', ' he', ' breathe', '?', '\\n', '\\n', 'Second', ' Hunts', 'man', ':', '\\n', 'He', ' breat', 'hes', ',', ' my', ' lord', '.', ' Were', ' he', ' not', ' warm', \"'d\", ' with', ' ale', ',', '\\n', 'This', ' were', ' a', ' bed', ' but', ' cold', ' to', ' sleep', ' so', ' sound', 'ly', '.', '\\n', '\\n', 'Lord', ':', '\\n', 'O', ' monstrous', ' beast', '!', ' how', ' like', ' a', ' sw', 'ine', ' he', ' lies', '!', '\\n', 'G', 'rim', ' death', ',', ' how', ' foul', ' and', ' lo', 'ath', 'some', ' is', ' th', 'ine', ' image', '!', '\\n', 'S', 'irs', ',', ' I', ' will', ' practise', ' on', ' this', ' drunken', ' man', '.', '\\n', 'What', ' think', ' you', ',', ' if', ' he', ' were', ' convey', \"'d\", ' to', ' bed', ',', '\\n', 'Wra', 'pp', \"'d\", ' in', ' sweet', ' clothes', ',', ' rings', ' put', ' upon', ' his', ' fingers', ',', '\\n', 'A', ' most', ' delicious', ' banquet', ' by', ' his', ' bed', ',', '\\n', 'And', ' brave', ' attendants', ' near', ' him', ' when', ' he', ' wakes', ',', '\\n', 'Would', ' not', ' the', ' begg', 'ar', ' then', ' forget', ' himself', '?', '\\n', '\\n', 'First', ' Hunts', 'man', ':', '\\n', 'Bel', 'ieve', ' me', ',', ' lord', ',', ' I', ' think', ' he', ' cannot', ' choose', '.', '\\n', '\\n', 'Second', ' Hunts', 'man', ':', '\\n', 'It', ' would', ' seem', ' strange', ' unto', ' him', ' when', ' he', ' w', 'aked', '.', '\\n', '\\n', 'Lord', ':', '\\n', 'Even', ' as', ' a', ' flattering', ' dream', ' or', ' worthless', ' fancy', '.', '\\n', 'Then', ' take', ' him', ' up', ' and', ' manage', ' well', ' the', ' j', 'est', ':', '\\n', 'C', 'arry', ' him', ' gently', ' to', ' my', ' faire', 'st', ' chamber', '\\n', 'And', ' hang', ' it', ' round', ' with', ' all', ' my', ' want', 'on', ' pictures', ':', '\\n', 'Bal', 'm', ' his', ' foul', ' head', ' in', ' warm', ' distilled', ' waters', '\\n', 'And', ' burn', ' sweet', ' wood', ' to', ' make', ' the', ' lodging', ' sweet', ':', '\\n', 'Pro', 'c', 'ure', ' me', ' music', ' ready', ' when', ' he', ' wakes', ',', '\\n', 'To', ' make', ' a', ' d', 'ul', 'c', 'et', ' and', ' a', ' heavenly', ' sound', ';', '\\n', 'And', ' if', ' he', ' chance', ' to', ' speak', ',', ' be', ' ready', ' straight', '\\n', 'And', ' with', ' a', ' low', ' sub', 'missive', ' reverence', '\\n', 'Say', \" '\", 'What', ' is', ' it', ' your', ' honour', ' will', ' command', \"?'\", '\\n', 'Let', ' one', ' attend', ' him', ' with', ' a', ' silver', ' basin', '\\n', 'Full', ' of', ' rose', '-', 'water', ' and', ' best', 'rew', \"'d\", ' with', ' flowers', ',', '\\n', 'Another', ' bear', ' the', ' e', 'wer', ',', ' the', ' third', ' a', ' diaper', ',', '\\n', 'And', ' say', \" '\", 'Will', \"'t\", ' please', ' your', ' lords', 'hip', ' cool', ' your', ' hands', \"?'\", '\\n', 'Some', ' one', ' be', ' ready', ' with', ' a', ' costly', ' suit', '\\n', 'And', ' ask', ' him', ' what', ' apparel', ' he', ' will', ' wear', ';', '\\n', 'Another', ' tell', ' him', ' of', ' his', ' h', 'ounds', ' and', ' horse', ',', '\\n', 'And', ' that', ' his', ' lady', ' mourn', 's', ' at', ' his', ' disease', ':', '\\n', 'Pers', 'u', 'ade', ' him', ' that', ' he', ' hath', ' been', ' lun', 'atic', ';', '\\n', 'And', ' when', ' he', ' says', ' he', ' is', ',', ' say', ' that', ' he', ' dreams', ',', '\\n', 'For', ' he', ' is', ' nothing', ' but', ' a', ' mighty', ' lord', '.', '\\n', 'This', ' do', ' and', ' do', ' it', ' kindly', ',', ' gentle', ' sir', 's', ':', '\\n', 'It', ' will', ' be', ' past', 'ime', ' passing', ' excellent', ',', '\\n', 'If', ' it', ' be', ' husband', 'ed', ' with', ' modesty', '.', '\\n', '\\n', 'First', ' Hunts', 'man', ':', '\\n', 'My', ' lord', ',', ' I', ' warrant', ' you', ' we', ' will', ' play', ' our', ' part', ',', '\\n', 'As', ' he', ' shall', ' think', ' by', ' our', ' true', ' diligence', '\\n', 'He', ' is', ' no', ' less', ' than', ' what', ' we', ' say', ' he', ' is', '.', '\\n', '\\n', 'Lord', ':', '\\n', 'Take', ' him', ' up', ' gently', ' and', ' to', ' bed', ' with', ' him', ';', '\\n', 'And', ' each', ' one', ' to', ' his', ' office', ' when', ' he', ' wakes', '.', '\\n', 'Sir', 'rah', ',', ' go', ' see', ' what', ' trumpet', \" '\", 'tis', ' that', ' sounds', ':', '\\n', 'Bel', 'ike', ',', ' some', ' noble', ' gentleman', ' that', ' means', ',', '\\n', 'T', 'rave', 'lling', ' some']\n" + ] + } + ], + "source": [ + "input_tokens = x[0, :4].unsqueeze(0).cuda()\n", + "max_new_token = 8\n", + "generated_tokens = model.generate(input_tokens, max_new_token)\n", + "print('input', [enc.decode([i.item()]) for i in input_tokens[0]])\n", + "print('output', [enc.decode([i.item()]) for i in generated_tokens[0]])\n", + "print('Gold label', [enc.decode([i.item()]) for i in x[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seen tokens: 16384000\n" + ] + } + ], + "source": [ + "print('seen tokens: ', batch_size * context_length * iterations)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(10.8249, device='cuda:0')\n", + "48\n", + "tensor(4.5572, device='cuda:0')\n" + ] + } + ], + "source": [ + "from ngram import Ngram\n", + "from data import text, enc\n", + "import torch\n", + "vocab = list(range(enc.n_vocab))\n", + "context_lengh = 16\n", + "ngram = Ngram(2, vocab)\n", + "inputs = [enc.encode(text)[:context_lengh]]\n", + "targets = torch.LongTensor([enc.encode(text)[1:context_lengh+1]]).cuda()\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)\n", + "epochs = (batch_size * context_length * iterations) // len(enc.encode(text))\n", + "ngram = Ngram(2, vocab)\n", + "print(epochs)\n", + "for epoch in range(epochs):\n", + " ngram.train(enc.encode(text))\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(10.8249, device='cuda:0')\n", + "tensor(3.4560, device='cuda:0')\n" + ] + } + ], + "source": [ + "ngram = Ngram(2, vocab, 1e-3)\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)\n", + "ngram.train(enc.encode(text))\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(10.8249, device='cuda:0')\n", + "tensor(2.3174, device='cuda:0')\n" + ] + } + ], + "source": [ + "ngram = Ngram(4, vocab, 1e-5)\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)\n", + "ngram.train(enc.encode(text))\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.09853373047652528" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.exp(-loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# ngram.ngram" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/playground/GPT DEV_2_e2epipeline_baseline_char.ipynb b/playground/GPT DEV_2_e2epipeline_baseline_char.ipynb new file mode 100644 index 0000000..9f23a55 --- /dev/null +++ b/playground/GPT DEV_2_e2epipeline_baseline_char.ipynb @@ -0,0 +1,935 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "- [A Recipe for Training Neural Networks\n", + "](https://karpathy.github.io/2019/04/25/recipe/)\n", + "- [Harvard CS197 AI Research Experiences](https://docs.google.com/document/d/1uvAbEhbgS_M-uDMTzmOWRlYxqCkogKRXdbKYYT98ooc/edit#heading=h.2z3yllpny6or)\n", + "- [Unit tests for machine learning research](https://semla.polymtl.ca/wp-content/uploads/2022/11/Pablo-Unit-tests-for-ML-code-SEMLA-talk.pdf)\n", + "- [CS 329S: Machine Learning Systems Design](https://stanford-cs329s.github.io/syllabus.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up the end-to-end training/evaluation skeleton + get dumb baselines" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "from torch.nn import functional as F\n", + "\n", + "torch.manual_seed(1337)\n", + "\n", + "class BigramLanguageModel(nn.Module):\n", + " def __init__(self, vocab_size):\n", + " super().__init__()\n", + " self.bigram_table = nn.Embedding(vocab_size, vocab_size)\n", + " # self.token_embedding_table = nn.Embedding(vocab_size, 16)\n", + " # self.linear = nn.Linear(16, vocab_size)\n", + " print('number of parameters:', sum(p.numel() for p in self.parameters()))\n", + " \n", + " def forward(self, token_indexes):\n", + " # token_index: (batch_size, sequence_length)\n", + " logits = self.bigram_table(token_indexes)\n", + "\n", + " # embedding = self.token_embedding_table(token_indexes)\n", + " # logits = self.linear(embedding)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " return logits\n", + "\n", + " def loss_per_token(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length),\n", + " reduction='none'\n", + " )\n", + " # loss: (batch_size*sequence_length)\n", + " return loss.view(batch_size, sequence_length)\n", + " \n", + " def loss(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length)\n", + " )\n", + " # loss: scalar\n", + " return loss\n", + " \n", + " def generate(self, token_indexes, max_new_tokens):\n", + " # token_indexes: (batch_size, sequence_length)\n", + " batch_size, sequence_length = token_indexes.shape\n", + " for _ in range(max_new_tokens):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " next_token_logits = logits[:, -1, :]\n", + " # next_token_logits: (batch_size, vocab_size)\n", + " next_token_probs = F.softmax(next_token_logits, dim=-1)\n", + " # next_token_probs: (batch_size, vocab_size)\n", + " next_token = torch.multinomial(next_token_probs, num_samples=1)\n", + " # next_token: (batch_size, 1)\n", + " token_indexes = torch.cat([token_indexes, next_token], dim=1)\n", + " # token_indexes: (batch_size, sequence_length+1)\n", + " return token_indexes\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "65\n", + "['\\n', ' ', '!', '$', '&', \"'\", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']\n", + "[20, 43, 50, 50, 53, 1, 51, 63, 1, 52, 39, 51, 43, 1, 47, 57, 1, 49, 43, 52, 53]\n", + "['H', 'e', 'l', 'l', 'o', ' ', 'm', 'y', ' ', 'n', 'a', 'm', 'e', ' ', 'i', 's', ' ', 'k', 'e', 'n', 'o']\n" + ] + } + ], + "source": [ + "from data_char import text, CharTokenizer\n", + "\n", + "tokenizer = CharTokenizer(text)\n", + "print(tokenizer.n_vocab)\n", + "print(tokenizer.vocab)\n", + "print(tokenizer.encode('Hello my name is keno'))\n", + "print(tokenizer.decode(tokenizer.encode('Hello my name is keno')))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def rand_int_test(cls, low, high, shape, kwargs):\n", + " layer = cls(**kwargs).cuda()\n", + " random_input = torch.randint(low, high, shape).cuda()\n", + " print('input shape:', random_input.shape)\n", + " output = layer(random_input)\n", + " print('output shape:', output.shape)\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of parameters: 65536\n", + "input shape: torch.Size([4, 1024])\n", + "output shape: torch.Size([4, 1024, 256])\n" + ] + } + ], + "source": [ + "test_cls = BigramLanguageModel\n", + "batch_size = 4\n", + "context_length = 1024\n", + "vocab_size = 256\n", + "\n", + "kwargs = {'vocab_size': vocab_size}\n", + "output = rand_int_test(test_cls, 0, vocab_size, (batch_size, context_length), kwargs)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of parameters: 4225\n", + "random guess loss: 4.174387269895637\n", + "tensor(4.7202, device='cuda:0', grad_fn=)\n", + "torch.Size([4, 1024]) tensor(4.7202, device='cuda:0', grad_fn=)\n", + "tensor([[4.6262, 5.8574, 5.4585, ..., 5.5064, 5.7386, 3.9034],\n", + " [5.4831, 3.7372, 4.9155, ..., 3.7848, 4.7020, 4.1312],\n", + " [4.7929, 3.7371, 5.3320, ..., 4.7322, 3.9989, 4.3654],\n", + " [4.2844, 5.6883, 4.1599, ..., 5.5120, 5.1844, 3.5611]],\n", + " device='cuda:0', grad_fn=)\n" + ] + } + ], + "source": [ + "from data_char import get_batch, enc\n", + "import math\n", + "\n", + "x, y = get_batch(batch_size, context_length, 'train')\n", + "vocab_size = enc.n_vocab\n", + "model = BigramLanguageModel(vocab_size).cuda()\n", + "loss = model.loss(x.cuda(), y.cuda())\n", + "print('random guess loss:', -math.log(1/vocab_size))\n", + "print(loss)\n", + "loss_per_token = model.loss_per_token(x.cuda(), y.cuda())\n", + "print(loss_per_token.shape, loss_per_token.mean())\n", + "print(loss_per_token)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input [['y'], [' '], ['s'], ['t']]\n", + "output [['y'], [' '], ['s'], ['t'], ['?'], ['v'], ['d'], ['x'], ['x'], ['b'], ['D'], ['y']]\n", + "Gold label [['y'], [' '], ['s'], ['t'], ['i'], ['n'], ['g'], [' '], ['t'], ['o'], [' '], ['h'], ['u'], ['r'], ['t'], [','], ['\\n'], ['Y'], ['e'], ['t'], [' '], ['l'], ['o'], ['o'], ['k'], [' '], ['t'], ['o'], [' '], ['h'], ['a'], ['v'], ['e'], [' '], ['t'], ['h'], ['e'], ['m'], [' '], ['b'], ['u'], ['z'], ['z'], [' '], ['t'], ['o'], [' '], ['o'], ['f'], ['f'], ['e'], ['n'], ['d'], [' '], ['t'], ['h'], ['i'], ['n'], ['e'], [' '], ['e'], ['a'], ['r'], ['s'], ['.'], ['\\n'], ['F'], ['i'], ['r'], ['s'], ['t'], [' '], ['w'], ['i'], ['l'], ['l'], [' '], ['I'], [' '], ['s'], ['e'], ['e'], [' '], ['t'], ['h'], ['e'], [' '], ['c'], ['o'], ['r'], ['o'], ['n'], ['a'], ['t'], ['i'], ['o'], ['n'], [';'], ['\\n'], ['A'], ['n'], ['d'], [' '], ['t'], ['h'], ['e'], ['n'], [' '], ['t'], ['o'], [' '], ['B'], ['r'], ['i'], ['t'], ['t'], ['a'], ['n'], ['y'], [' '], ['I'], [\"'\"], ['l'], ['l'], [' '], ['c'], ['r'], ['o'], ['s'], ['s'], [' '], ['t'], ['h'], ['e'], [' '], ['s'], ['e'], ['a'], [','], ['\\n'], ['T'], ['o'], [' '], ['e'], ['f'], ['f'], ['e'], ['c'], ['t'], [' '], ['t'], ['h'], ['i'], ['s'], [' '], ['m'], ['a'], ['r'], ['r'], ['i'], ['a'], ['g'], ['e'], [','], [' '], ['s'], ['o'], [' '], ['i'], ['t'], [' '], ['p'], ['l'], ['e'], ['a'], ['s'], ['e'], [' '], ['m'], ['y'], [' '], ['l'], ['o'], ['r'], ['d'], ['.'], ['\\n'], ['\\n'], ['E'], ['D'], ['W'], ['A'], ['R'], ['D'], [':'], ['\\n'], ['E'], ['v'], ['e'], ['n'], [' '], ['a'], ['s'], [' '], ['t'], ['h'], ['o'], ['u'], [' '], ['w'], ['i'], ['l'], ['t'], [','], [' '], ['s'], ['w'], ['e'], ['e'], ['t'], [' '], ['W'], ['a'], ['r'], ['w'], ['i'], ['c'], ['k'], [','], [' '], ['l'], ['e'], ['t'], [' '], ['i'], ['t'], [' '], ['b'], ['e'], [';'], ['\\n'], ['F'], ['o'], ['r'], [' '], ['i'], ['n'], [' '], ['t'], ['h'], ['y'], [' '], ['s'], ['h'], ['o'], ['u'], ['l'], ['d'], ['e'], ['r'], [' '], ['d'], ['o'], [' '], ['I'], [' '], ['b'], ['u'], ['i'], ['l'], ['d'], [' '], ['m'], ['y'], [' '], ['s'], ['e'], ['a'], ['t'], [','], ['\\n'], ['A'], ['n'], ['d'], [' '], ['n'], ['e'], ['v'], ['e'], ['r'], [' '], ['w'], ['i'], ['l'], ['l'], [' '], ['I'], [' '], ['u'], ['n'], ['d'], ['e'], ['r'], ['t'], ['a'], ['k'], ['e'], [' '], ['t'], ['h'], ['e'], [' '], ['t'], ['h'], ['i'], ['n'], ['g'], ['\\n'], ['W'], ['h'], ['e'], ['r'], ['e'], ['i'], ['n'], [' '], ['t'], ['h'], ['y'], [' '], ['c'], ['o'], ['u'], ['n'], ['s'], ['e'], ['l'], [' '], ['a'], ['n'], ['d'], [' '], ['c'], ['o'], ['n'], ['s'], ['e'], ['n'], ['t'], [' '], ['i'], ['s'], [' '], ['w'], ['a'], ['n'], ['t'], ['i'], ['n'], ['g'], ['.'], ['\\n'], ['R'], ['i'], ['c'], ['h'], ['a'], ['r'], ['d'], [','], [' '], ['I'], [' '], ['w'], ['i'], ['l'], ['l'], [' '], ['c'], ['r'], ['e'], ['a'], ['t'], ['e'], [' '], ['t'], ['h'], ['e'], ['e'], [' '], ['D'], ['u'], ['k'], ['e'], [' '], ['o'], ['f'], [' '], ['G'], ['l'], ['o'], ['u'], ['c'], ['e'], ['s'], ['t'], ['e'], ['r'], [','], ['\\n'], ['A'], ['n'], ['d'], [' '], ['G'], ['e'], ['o'], ['r'], ['g'], ['e'], [','], [' '], ['o'], ['f'], [' '], ['C'], ['l'], ['a'], ['r'], ['e'], ['n'], ['c'], ['e'], [':'], [' '], ['W'], ['a'], ['r'], ['w'], ['i'], ['c'], ['k'], [','], [' '], ['a'], ['s'], [' '], ['o'], ['u'], ['r'], ['s'], ['e'], ['l'], ['f'], [','], ['\\n'], ['S'], ['h'], ['a'], ['l'], ['l'], [' '], ['d'], ['o'], [' '], ['a'], ['n'], ['d'], [' '], ['u'], ['n'], ['d'], ['o'], [' '], ['a'], ['s'], [' '], ['h'], ['i'], ['m'], [' '], ['p'], ['l'], ['e'], ['a'], ['s'], ['e'], ['t'], ['h'], [' '], ['b'], ['e'], ['s'], ['t'], ['.'], ['\\n'], ['\\n'], ['R'], ['I'], ['C'], ['H'], ['A'], ['R'], ['D'], [':'], ['\\n'], ['L'], ['e'], ['t'], [' '], ['m'], ['e'], [' '], ['b'], ['e'], [' '], ['D'], ['u'], ['k'], ['e'], [' '], ['o'], ['f'], [' '], ['C'], ['l'], ['a'], ['r'], ['e'], ['n'], ['c'], ['e'], [','], [' '], ['G'], ['e'], ['o'], ['r'], ['g'], ['e'], [' '], ['o'], ['f'], [' '], ['G'], ['l'], ['o'], ['u'], ['c'], ['e'], ['s'], ['t'], ['e'], ['r'], [';'], ['\\n'], ['F'], ['o'], ['r'], [' '], ['G'], ['l'], ['o'], ['u'], ['c'], ['e'], ['s'], ['t'], ['e'], ['r'], [\"'\"], ['s'], [' '], ['d'], ['u'], ['k'], ['e'], ['d'], ['o'], ['m'], [' '], ['i'], ['s'], [' '], ['t'], ['o'], ['o'], [' '], ['o'], ['m'], ['i'], ['n'], ['o'], ['u'], ['s'], ['.'], ['\\n'], ['\\n'], ['W'], ['A'], ['R'], ['W'], ['I'], ['C'], ['K'], [':'], ['\\n'], ['T'], ['u'], ['t'], [','], [' '], ['t'], ['h'], ['a'], ['t'], [\"'\"], ['s'], [' '], ['a'], [' '], ['f'], ['o'], ['o'], ['l'], ['i'], ['s'], ['h'], [' '], ['o'], ['b'], ['s'], ['e'], ['r'], ['v'], ['a'], ['t'], ['i'], ['o'], ['n'], [':'], ['\\n'], ['R'], ['i'], ['c'], ['h'], ['a'], ['r'], ['d'], [','], [' '], ['b'], ['e'], [' '], ['D'], ['u'], ['k'], ['e'], [' '], ['o'], ['f'], [' '], ['G'], ['l'], ['o'], ['u'], ['c'], ['e'], ['s'], ['t'], ['e'], ['r'], ['.'], [' '], ['N'], ['o'], ['w'], [' '], ['t'], ['o'], [' '], ['L'], ['o'], ['n'], ['d'], ['o'], ['n'], [','], ['\\n'], ['T'], ['o'], [' '], ['s'], ['e'], ['e'], [' '], ['t'], ['h'], ['e'], ['s'], ['e'], [' '], ['h'], ['o'], ['n'], ['o'], ['u'], ['r'], ['s'], [' '], ['i'], ['n'], [' '], ['p'], ['o'], ['s'], ['s'], ['e'], ['s'], ['s'], ['i'], ['o'], ['n'], ['.'], ['\\n'], ['3'], [' '], ['K'], ['I'], ['N'], ['G'], [' '], ['H'], ['E'], ['N'], ['R'], ['Y'], [' '], ['V'], ['I'], ['\\n'], ['\\n'], ['F'], ['i'], ['r'], ['s'], ['t'], [' '], ['K'], ['e'], ['e'], ['p'], ['e'], ['r'], [':'], ['\\n'], ['U'], ['n'], ['d'], ['e'], ['r'], [' '], ['t'], ['h'], ['i'], ['s'], [' '], ['t'], ['h'], ['i'], ['c'], ['k'], ['-'], ['g'], ['r'], ['o'], ['w'], ['n'], [' '], ['b'], ['r'], ['a'], ['k'], ['e'], [' '], ['w'], ['e'], [\"'\"], ['l'], ['l'], [' '], ['s'], ['h'], ['r'], ['o'], ['u'], ['d'], [' '], ['o'], ['u'], ['r'], ['s'], ['e'], ['l'], ['v'], ['e'], ['s'], [';'], ['\\n'], ['F'], ['o'], ['r'], [' '], ['t'], ['h'], ['r'], ['o'], ['u'], ['g'], ['h'], [' '], ['t'], ['h'], ['i'], ['s'], [' '], ['l'], ['a'], ['u'], ['n'], ['d'], [' '], ['a'], ['n'], ['o'], ['n'], [' '], ['t'], ['h'], ['e'], [' '], ['d'], ['e'], ['e'], ['r'], [' '], ['w'], ['i'], ['l'], ['l'], [' '], ['c'], ['o'], ['m'], ['e'], [';'], ['\\n'], ['A'], ['n'], ['d'], [' '], ['i'], ['n'], [' '], ['t'], ['h'], ['i'], ['s'], [' '], ['c'], ['o'], ['v'], ['e'], ['r'], ['t'], [' '], ['w'], ['i'], ['l'], ['l'], [' '], ['w'], ['e'], [' '], ['m'], ['a'], ['k'], ['e'], [' '], ['o'], ['u'], ['r'], [' '], ['s'], ['t'], ['a'], ['n'], ['d'], [','], ['\\n'], ['C'], ['u'], ['l'], ['l'], ['i'], ['n'], ['g'], [' '], ['t'], ['h'], ['e'], [' '], ['p'], ['r'], ['i'], ['n'], ['c'], ['i'], ['p'], ['a'], ['l'], [' '], ['o'], ['f'], [' '], ['a'], ['l'], ['l'], [' '], ['t'], ['h'], ['e'], [' '], ['d'], ['e'], ['e'], ['r'], ['.'], ['\\n'], ['\\n'], ['S'], ['e'], ['c'], ['o'], ['n'], ['d'], [' '], ['K'], ['e'], ['e'], ['p'], ['e'], ['r'], [':'], ['\\n'], ['I'], [\"'\"], ['l'], ['l'], [' '], ['s'], ['t'], ['a'], ['y'], [' '], ['a'], ['b'], ['o'], ['v'], ['e'], [' '], ['t'], ['h'], ['e'], [' '], ['h'], ['i'], ['l'], ['l'], [','], [' '], ['s'], ['o'], [' '], ['b'], ['o'], ['t'], ['h'], [' '], ['m'], ['a'], ['y'], [' '], ['s'], ['h'], ['o'], ['o'], ['t'], ['.'], ['\\n'], ['\\n'], ['F'], ['i'], ['r'], ['s'], ['t'], [' '], ['K'], ['e'], ['e'], ['p'], ['e'], ['r'], [':'], ['\\n'], ['T'], ['h'], ['a'], ['t'], [' '], ['c'], ['a'], ['n'], ['n']]\n" + ] + } + ], + "source": [ + "input_tokens = x[0, :4].unsqueeze(0).cuda()\n", + "max_new_token = 8\n", + "generated_tokens = model.generate(input_tokens, max_new_token)\n", + "print('input', [enc.decode([i.item()]) for i in input_tokens[0]])\n", + "print('output', [enc.decode([i.item()]) for i in generated_tokens[0]])\n", + "print('Gold label', [enc.decode([i.item()]) for i in x[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "steps: 0 loss: 4.718399524688721\n", + "steps: 100 loss: 4.5681352615356445\n", + "steps: 200 loss: 4.431594371795654\n", + "steps: 300 loss: 4.299322605133057\n", + "steps: 400 loss: 4.155538558959961\n", + "steps: 500 loss: 4.031984329223633\n", + "steps: 600 loss: 3.921776533126831\n", + "steps: 700 loss: 3.828066825866699\n", + "steps: 800 loss: 3.71785569190979\n", + "steps: 900 loss: 3.628220558166504\n", + "steps: 999 loss: 3.539994239807129\n" + ] + } + ], + "source": [ + "optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)\n", + "batch_size = 32\n", + "context_length = 1024\n", + "iterations = 1000\n", + "for steps in range(iterations):\n", + " x, y = get_batch(batch_size, context_length, 'train')\n", + " # print(x[0], y[0])\n", + " x, y = x.cuda(), y.cuda()\n", + " loss = model.loss(x, y)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " if steps % 100 == 0:\n", + " print('steps:', steps, 'loss:', loss.item())\n", + "print('steps:', steps, 'loss:', loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input [['e'], ['s'], [' '], ['i']]\n", + "output [['e'], ['s'], [' '], ['i'], ['R'], ['Z'], [\"'\"], ['\\n'], ['d'], ['c'], [\"'\"], ['?']]\n", + "Gold label [['e'], ['s'], [' '], ['i'], ['n'], [' '], ['a'], [' '], ['m'], ['i'], ['l'], ['e'], ['-'], ['a'], ['.'], ['\\n'], ['\\n'], ['F'], ['L'], ['O'], ['R'], ['I'], ['Z'], ['E'], ['L'], [':'], ['\\n'], ['T'], ['h'], ['e'], ['s'], ['e'], [' '], ['y'], ['o'], ['u'], ['r'], [' '], ['u'], ['n'], ['u'], ['s'], ['u'], ['a'], ['l'], [' '], ['w'], ['e'], ['e'], ['d'], ['s'], [' '], ['t'], ['o'], [' '], ['e'], ['a'], ['c'], ['h'], [' '], ['p'], ['a'], ['r'], ['t'], [' '], ['o'], ['f'], [' '], ['y'], ['o'], ['u'], ['\\n'], ['D'], ['o'], [' '], ['g'], ['i'], ['v'], ['e'], [' '], ['a'], [' '], ['l'], ['i'], ['f'], ['e'], [':'], [' '], ['n'], ['o'], [' '], ['s'], ['h'], ['e'], ['p'], ['h'], ['e'], ['r'], ['d'], ['e'], ['s'], ['s'], [','], [' '], ['b'], ['u'], ['t'], [' '], ['F'], ['l'], ['o'], ['r'], ['a'], ['\\n'], ['P'], ['e'], ['e'], ['r'], ['i'], ['n'], ['g'], [' '], ['i'], ['n'], [' '], ['A'], ['p'], ['r'], ['i'], ['l'], [\"'\"], ['s'], [' '], ['f'], ['r'], ['o'], ['n'], ['t'], ['.'], [' '], ['T'], ['h'], ['i'], ['s'], [' '], ['y'], ['o'], ['u'], ['r'], [' '], ['s'], ['h'], ['e'], ['e'], ['p'], ['-'], ['s'], ['h'], ['e'], ['a'], ['r'], ['i'], ['n'], ['g'], ['\\n'], ['I'], ['s'], [' '], ['a'], ['s'], [' '], ['a'], [' '], ['m'], ['e'], ['e'], ['t'], ['i'], ['n'], ['g'], [' '], ['o'], ['f'], [' '], ['t'], ['h'], ['e'], [' '], ['p'], ['e'], ['t'], ['t'], ['y'], [' '], ['g'], ['o'], ['d'], ['s'], [','], ['\\n'], ['A'], ['n'], ['d'], [' '], ['y'], ['o'], ['u'], [' '], ['t'], ['h'], ['e'], [' '], ['q'], ['u'], ['e'], ['e'], ['n'], [' '], ['o'], ['n'], [\"'\"], ['t'], ['.'], ['\\n'], ['\\n'], ['P'], ['E'], ['R'], ['D'], ['I'], ['T'], ['A'], [':'], ['\\n'], ['S'], ['i'], ['r'], [','], [' '], ['m'], ['y'], [' '], ['g'], ['r'], ['a'], ['c'], ['i'], ['o'], ['u'], ['s'], [' '], ['l'], ['o'], ['r'], ['d'], [','], ['\\n'], ['T'], ['o'], [' '], ['c'], ['h'], ['i'], ['d'], ['e'], [' '], ['a'], ['t'], [' '], ['y'], ['o'], ['u'], ['r'], [' '], ['e'], ['x'], ['t'], ['r'], ['e'], ['m'], ['e'], ['s'], [' '], ['i'], ['t'], [' '], ['n'], ['o'], ['t'], [' '], ['b'], ['e'], ['c'], ['o'], ['m'], ['e'], ['s'], [' '], ['m'], ['e'], [':'], ['\\n'], ['O'], [','], [' '], ['p'], ['a'], ['r'], ['d'], ['o'], ['n'], [','], [' '], ['t'], ['h'], ['a'], ['t'], [' '], ['I'], [' '], ['n'], ['a'], ['m'], ['e'], [' '], ['t'], ['h'], ['e'], ['m'], ['!'], [' '], ['Y'], ['o'], ['u'], ['r'], [' '], ['h'], ['i'], ['g'], ['h'], [' '], ['s'], ['e'], ['l'], ['f'], [','], ['\\n'], ['T'], ['h'], ['e'], [' '], ['g'], ['r'], ['a'], ['c'], ['i'], ['o'], ['u'], ['s'], [' '], ['m'], ['a'], ['r'], ['k'], [' '], ['o'], [\"'\"], [' '], ['t'], ['h'], ['e'], [' '], ['l'], ['a'], ['n'], ['d'], [','], [' '], ['y'], ['o'], ['u'], [' '], ['h'], ['a'], ['v'], ['e'], [' '], ['o'], ['b'], ['s'], ['c'], ['u'], ['r'], ['e'], ['d'], ['\\n'], ['W'], ['i'], ['t'], ['h'], [' '], ['a'], [' '], ['s'], ['w'], ['a'], ['i'], ['n'], [\"'\"], ['s'], [' '], ['w'], ['e'], ['a'], ['r'], ['i'], ['n'], ['g'], [','], [' '], ['a'], ['n'], ['d'], [' '], ['m'], ['e'], [','], [' '], ['p'], ['o'], ['o'], ['r'], [' '], ['l'], ['o'], ['w'], ['l'], ['y'], [' '], ['m'], ['a'], ['i'], ['d'], [','], ['\\n'], ['M'], ['o'], ['s'], ['t'], [' '], ['g'], ['o'], ['d'], ['d'], ['e'], ['s'], ['s'], ['-'], ['l'], ['i'], ['k'], ['e'], [' '], ['p'], ['r'], ['a'], ['n'], ['k'], [\"'\"], ['d'], [' '], ['u'], ['p'], [':'], [' '], ['b'], ['u'], ['t'], [' '], ['t'], ['h'], ['a'], ['t'], [' '], ['o'], ['u'], ['r'], [' '], ['f'], ['e'], ['a'], ['s'], ['t'], ['s'], ['\\n'], ['I'], ['n'], [' '], ['e'], ['v'], ['e'], ['r'], ['y'], [' '], ['m'], ['e'], ['s'], ['s'], [' '], ['h'], ['a'], ['v'], ['e'], [' '], ['f'], ['o'], ['l'], ['l'], ['y'], [' '], ['a'], ['n'], ['d'], [' '], ['t'], ['h'], ['e'], [' '], ['f'], ['e'], ['e'], ['d'], ['e'], ['r'], ['s'], ['\\n'], ['D'], ['i'], ['g'], ['e'], ['s'], ['t'], [' '], ['i'], ['t'], [' '], ['w'], ['i'], ['t'], ['h'], [' '], ['a'], [' '], ['c'], ['u'], ['s'], ['t'], ['o'], ['m'], [','], [' '], ['I'], [' '], ['s'], ['h'], ['o'], ['u'], ['l'], ['d'], [' '], ['b'], ['l'], ['u'], ['s'], ['h'], ['\\n'], ['T'], ['o'], [' '], ['s'], ['e'], ['e'], [' '], ['y'], ['o'], ['u'], [' '], ['s'], ['o'], [' '], ['a'], ['t'], ['t'], ['i'], ['r'], ['e'], ['d'], [','], [' '], ['s'], ['w'], ['o'], ['r'], ['n'], [','], [' '], ['I'], [' '], ['t'], ['h'], ['i'], ['n'], ['k'], [','], ['\\n'], ['T'], ['o'], [' '], ['s'], ['h'], ['o'], ['w'], [' '], ['m'], ['y'], ['s'], ['e'], ['l'], ['f'], [' '], ['a'], [' '], ['g'], ['l'], ['a'], ['s'], ['s'], ['.'], ['\\n'], ['\\n'], ['F'], ['L'], ['O'], ['R'], ['I'], ['Z'], ['E'], ['L'], [':'], ['\\n'], ['I'], [' '], ['b'], ['l'], ['e'], ['s'], ['s'], [' '], ['t'], ['h'], ['e'], [' '], ['t'], ['i'], ['m'], ['e'], ['\\n'], ['W'], ['h'], ['e'], ['n'], [' '], ['m'], ['y'], [' '], ['g'], ['o'], ['o'], ['d'], [' '], ['f'], ['a'], ['l'], ['c'], ['o'], ['n'], [' '], ['m'], ['a'], ['d'], ['e'], [' '], ['h'], ['e'], ['r'], [' '], ['f'], ['l'], ['i'], ['g'], ['h'], ['t'], [' '], ['a'], ['c'], ['r'], ['o'], ['s'], ['s'], ['\\n'], ['T'], ['h'], ['y'], [' '], ['f'], ['a'], ['t'], ['h'], ['e'], ['r'], [\"'\"], ['s'], [' '], ['g'], ['r'], ['o'], ['u'], ['n'], ['d'], ['.'], ['\\n'], ['\\n'], ['P'], ['E'], ['R'], ['D'], ['I'], ['T'], ['A'], [':'], ['\\n'], ['N'], ['o'], ['w'], [' '], ['J'], ['o'], ['v'], ['e'], [' '], ['a'], ['f'], ['f'], ['o'], ['r'], ['d'], [' '], ['y'], ['o'], ['u'], [' '], ['c'], ['a'], ['u'], ['s'], ['e'], ['!'], ['\\n'], ['T'], ['o'], [' '], ['m'], ['e'], [' '], ['t'], ['h'], ['e'], [' '], ['d'], ['i'], ['f'], ['f'], ['e'], ['r'], ['e'], ['n'], ['c'], ['e'], [' '], ['f'], ['o'], ['r'], ['g'], ['e'], ['s'], [' '], ['d'], ['r'], ['e'], ['a'], ['d'], [';'], [' '], ['y'], ['o'], ['u'], ['r'], [' '], ['g'], ['r'], ['e'], ['a'], ['t'], ['n'], ['e'], ['s'], ['s'], ['\\n'], ['H'], ['a'], ['t'], ['h'], [' '], ['n'], ['o'], ['t'], [' '], ['b'], ['e'], ['e'], ['n'], [' '], ['u'], ['s'], ['e'], ['d'], [' '], ['t'], ['o'], [' '], ['f'], ['e'], ['a'], ['r'], ['.'], [' '], ['E'], ['v'], ['e'], ['n'], [' '], ['n'], ['o'], ['w'], [' '], ['I'], [' '], ['t'], ['r'], ['e'], ['m'], ['b'], ['l'], ['e'], ['\\n'], ['T'], ['o'], [' '], ['t'], ['h'], ['i'], ['n'], ['k'], [' '], ['y'], ['o'], ['u'], ['r'], [' '], ['f'], ['a'], ['t'], ['h'], ['e'], ['r'], [','], [' '], ['b'], ['y'], [' '], ['s'], ['o'], ['m'], ['e'], [' '], ['a'], ['c'], ['c'], ['i'], ['d'], ['e'], ['n'], ['t'], [','], ['\\n'], ['S'], ['h'], ['o'], ['u'], ['l'], ['d'], [' '], ['p'], ['a'], ['s'], ['s'], [' '], ['t'], ['h'], ['i'], ['s'], [' '], ['w'], ['a'], ['y'], [' '], ['a'], ['s'], [' '], ['y'], ['o'], ['u'], [' '], ['d'], ['i'], ['d'], [':'], [' '], ['O'], [','], [' '], ['t'], ['h'], ['e'], [' '], ['F'], ['a'], ['t'], ['e'], ['s'], ['!'], ['\\n'], ['H'], ['o'], ['w'], [' '], ['w'], ['o'], ['u'], ['l'], ['d'], [' '], ['h'], ['e'], [' '], ['l'], ['o'], ['o'], ['k'], [','], [' '], ['t'], ['o'], [' '], ['s'], ['e'], ['e'], [' '], ['h'], ['i'], ['s'], [' '], ['w'], ['o'], ['r'], ['k'], [' '], ['s'], ['o'], [' '], ['n'], ['o'], ['b'], ['l'], ['e'], ['\\n'], ['V'], ['i'], ['l'], ['e'], ['l'], ['y'], [' '], ['b'], ['o'], ['u'], ['n'], ['d'], [' '], ['u'], ['p'], ['?'], [' '], ['W'], ['h'], ['a'], ['t'], [' '], ['w'], ['o'], ['u'], ['l'], ['d'], [' ']]\n" + ] + } + ], + "source": [ + "input_tokens = x[0, :4].unsqueeze(0).cuda()\n", + "max_new_token = 8\n", + "generated_tokens = model.generate(input_tokens, max_new_token)\n", + "print('input', [enc.decode([i.item()]) for i in input_tokens[0]])\n", + "print('output', [enc.decode([i.item()]) for i in generated_tokens[0]])\n", + "print('Gold label', [enc.decode([i.item()]) for i in x[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seen tokens: 32768000\n" + ] + } + ], + "source": [ + "print('seen tokens: ', batch_size * context_length * iterations)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(4.1744, device='cuda:0')\n", + "29\n", + "after 1 epoch: tensor(2.6940, device='cuda:0')\n" + ] + } + ], + "source": [ + "from ngram import Ngram\n", + "from data_char import text, enc\n", + "import torch\n", + "vocab = list(range(enc.n_vocab))\n", + "context_lengh = 16\n", + "ngram = Ngram(2, vocab)\n", + "inputs = [enc.encode(text)[:context_lengh]]\n", + "targets = torch.LongTensor([enc.encode(text)[1:context_lengh+1]]).cuda()\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)\n", + "epochs = (batch_size * context_length * iterations) // len(enc.encode(text))\n", + "print(epochs)\n", + "ngram.train(enc.encode(text))\n", + "loss = ngram.loss(inputs, targets)\n", + "print('after 1 epoch:', loss)\n", + "# for epoch in range(epochs-1):\n", + "# ngram.train(enc.encode(text))\n", + "# loss = ngram.loss(inputs, targets)\n", + "# print(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1115394" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'First Citi'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n", + "{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5}\n", + "{0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f'}\n", + "tensor(4.1744, device='cuda:0')\n", + "tensor(2.9666, device='cuda:0')\n" + ] + } + ], + "source": [ + "train_text = 'abcdefabcdedfabcdedf'\n", + "check_enc = CharTokenizer(train_text)\n", + "print(check_enc.n_vocab)\n", + "print(check_enc.encoder)\n", + "print(check_enc.decoder)\n", + "ngram = Ngram(2, list(range(enc.n_vocab)))\n", + "loss = ngram.loss([check_enc.encode(train_text)[:-1]], torch.LongTensor([check_enc.encode(train_text)[1:]]).cuda())\n", + "print(loss)\n", + "ngram.train(check_enc.encode(train_text))\n", + "loss = ngram.loss([check_enc.encode(train_text)[:-1]], torch.LongTensor([check_enc.encode(train_text)[1:]]).cuda())\n", + "print(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 3, 5, 0, 1, 2, 3, 4, 3, 5]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check_enc.encode(train_text)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(.()>,\n", + " {'0-0': 1,\n", + " '0-1': 4,\n", + " '0-2': 1,\n", + " '0-3': 1,\n", + " '0-4': 1,\n", + " '0-5': 1,\n", + " '0-6': 1,\n", + " '0-7': 1,\n", + " '0-8': 1,\n", + " '0-9': 1,\n", + " '0-10': 1,\n", + " '0-11': 1,\n", + " '0-12': 1,\n", + " '0-13': 1,\n", + " '0-14': 1,\n", + " '0-15': 1,\n", + " '0-16': 1,\n", + " '0-17': 1,\n", + " '0-18': 1,\n", + " '0-19': 1,\n", + " '0-20': 1,\n", + " '0-21': 1,\n", + " '0-22': 1,\n", + " '0-23': 1,\n", + " '0-24': 1,\n", + " '0-25': 1,\n", + " '0-26': 1,\n", + " '0-27': 1,\n", + " '0-28': 1,\n", + " '0-29': 1,\n", + " '0-30': 1,\n", + " '0-31': 1,\n", + " '0-32': 1,\n", + " '0-33': 1,\n", + " '0-34': 1,\n", + " '0-35': 1,\n", + " '0-36': 1,\n", + " '0-37': 1,\n", + " '0-38': 1,\n", + " '0-39': 1,\n", + " '0-40': 1,\n", + " '0-41': 1,\n", + " '0-42': 1,\n", + " '0-43': 1,\n", + " '0-44': 1,\n", + " '0-45': 1,\n", + " '0-46': 1,\n", + " '0-47': 1,\n", + " '0-48': 1,\n", + " '0-49': 1,\n", + " '0-50': 1,\n", + " '0-51': 1,\n", + " '0-52': 1,\n", + " '0-53': 1,\n", + " '0-54': 1,\n", + " '0-55': 1,\n", + " '0-56': 1,\n", + " '0-57': 1,\n", + " '0-58': 1,\n", + " '0-59': 1,\n", + " '0-60': 1,\n", + " '0-61': 1,\n", + " '0-62': 1,\n", + " '0-63': 1,\n", + " '0-64': 1,\n", + " '1-0': 1,\n", + " '1-1': 1,\n", + " '1-2': 4,\n", + " '1-3': 1,\n", + " '1-4': 1,\n", + " '1-5': 1,\n", + " '1-6': 1,\n", + " '1-7': 1,\n", + " '1-8': 1,\n", + " '1-9': 1,\n", + " '1-10': 1,\n", + " '1-11': 1,\n", + " '1-12': 1,\n", + " '1-13': 1,\n", + " '1-14': 1,\n", + " '1-15': 1,\n", + " '1-16': 1,\n", + " '1-17': 1,\n", + " '1-18': 1,\n", + " '1-19': 1,\n", + " '1-20': 1,\n", + " '1-21': 1,\n", + " '1-22': 1,\n", + " '1-23': 1,\n", + " '1-24': 1,\n", + " '1-25': 1,\n", + " '1-26': 1,\n", + " '1-27': 1,\n", + " '1-28': 1,\n", + " '1-29': 1,\n", + " '1-30': 1,\n", + " '1-31': 1,\n", + " '1-32': 1,\n", + " '1-33': 1,\n", + " '1-34': 1,\n", + " '1-35': 1,\n", + " '1-36': 1,\n", + " '1-37': 1,\n", + " '1-38': 1,\n", + " '1-39': 1,\n", + " '1-40': 1,\n", + " '1-41': 1,\n", + " '1-42': 1,\n", + " '1-43': 1,\n", + " '1-44': 1,\n", + " '1-45': 1,\n", + " '1-46': 1,\n", + " '1-47': 1,\n", + " '1-48': 1,\n", + " '1-49': 1,\n", + " '1-50': 1,\n", + " '1-51': 1,\n", + " '1-52': 1,\n", + " '1-53': 1,\n", + " '1-54': 1,\n", + " '1-55': 1,\n", + " '1-56': 1,\n", + " '1-57': 1,\n", + " '1-58': 1,\n", + " '1-59': 1,\n", + " '1-60': 1,\n", + " '1-61': 1,\n", + " '1-62': 1,\n", + " '1-63': 1,\n", + " '1-64': 1,\n", + " '2-0': 1,\n", + " '2-1': 1,\n", + " '2-2': 1,\n", + " '2-3': 4,\n", + " '2-4': 1,\n", + " '2-5': 1,\n", + " '2-6': 1,\n", + " '2-7': 1,\n", + " '2-8': 1,\n", + " '2-9': 1,\n", + " '2-10': 1,\n", + " '2-11': 1,\n", + " '2-12': 1,\n", + " '2-13': 1,\n", + " '2-14': 1,\n", + " '2-15': 1,\n", + " '2-16': 1,\n", + " '2-17': 1,\n", + " '2-18': 1,\n", + " '2-19': 1,\n", + " '2-20': 1,\n", + " '2-21': 1,\n", + " '2-22': 1,\n", + " '2-23': 1,\n", + " '2-24': 1,\n", + " '2-25': 1,\n", + " '2-26': 1,\n", + " '2-27': 1,\n", + " '2-28': 1,\n", + " '2-29': 1,\n", + " '2-30': 1,\n", + " '2-31': 1,\n", + " '2-32': 1,\n", + " '2-33': 1,\n", + " '2-34': 1,\n", + " '2-35': 1,\n", + " '2-36': 1,\n", + " '2-37': 1,\n", + " '2-38': 1,\n", + " '2-39': 1,\n", + " '2-40': 1,\n", + " '2-41': 1,\n", + " '2-42': 1,\n", + " '2-43': 1,\n", + " '2-44': 1,\n", + " '2-45': 1,\n", + " '2-46': 1,\n", + " '2-47': 1,\n", + " '2-48': 1,\n", + " '2-49': 1,\n", + " '2-50': 1,\n", + " '2-51': 1,\n", + " '2-52': 1,\n", + " '2-53': 1,\n", + " '2-54': 1,\n", + " '2-55': 1,\n", + " '2-56': 1,\n", + " '2-57': 1,\n", + " '2-58': 1,\n", + " '2-59': 1,\n", + " '2-60': 1,\n", + " '2-61': 1,\n", + " '2-62': 1,\n", + " '2-63': 1,\n", + " '2-64': 1,\n", + " '3-0': 1,\n", + " '3-1': 1,\n", + " '3-2': 1,\n", + " '3-3': 1,\n", + " '3-4': 4,\n", + " '3-5': 3,\n", + " '3-6': 1,\n", + " '3-7': 1,\n", + " '3-8': 1,\n", + " '3-9': 1,\n", + " '3-10': 1,\n", + " '3-11': 1,\n", + " '3-12': 1,\n", + " '3-13': 1,\n", + " '3-14': 1,\n", + " '3-15': 1,\n", + " '3-16': 1,\n", + " '3-17': 1,\n", + " '3-18': 1,\n", + " '3-19': 1,\n", + " '3-20': 1,\n", + " '3-21': 1,\n", + " '3-22': 1,\n", + " '3-23': 1,\n", + " '3-24': 1,\n", + " '3-25': 1,\n", + " '3-26': 1,\n", + " '3-27': 1,\n", + " '3-28': 1,\n", + " '3-29': 1,\n", + " '3-30': 1,\n", + " '3-31': 1,\n", + " '3-32': 1,\n", + " '3-33': 1,\n", + " '3-34': 1,\n", + " '3-35': 1,\n", + " '3-36': 1,\n", + " '3-37': 1,\n", + " '3-38': 1,\n", + " '3-39': 1,\n", + " '3-40': 1,\n", + " '3-41': 1,\n", + " '3-42': 1,\n", + " '3-43': 1,\n", + " '3-44': 1,\n", + " '3-45': 1,\n", + " '3-46': 1,\n", + " '3-47': 1,\n", + " '3-48': 1,\n", + " '3-49': 1,\n", + " '3-50': 1,\n", + " '3-51': 1,\n", + " '3-52': 1,\n", + " '3-53': 1,\n", + " '3-54': 1,\n", + " '3-55': 1,\n", + " '3-56': 1,\n", + " '3-57': 1,\n", + " '3-58': 1,\n", + " '3-59': 1,\n", + " '3-60': 1,\n", + " '3-61': 1,\n", + " '3-62': 1,\n", + " '3-63': 1,\n", + " '3-64': 1,\n", + " '4-0': 1,\n", + " '4-1': 1,\n", + " '4-2': 1,\n", + " '4-3': 3,\n", + " '4-4': 1,\n", + " '4-5': 2,\n", + " '4-6': 1,\n", + " '4-7': 1,\n", + " '4-8': 1,\n", + " '4-9': 1,\n", + " '4-10': 1,\n", + " '4-11': 1,\n", + " '4-12': 1,\n", + " '4-13': 1,\n", + " '4-14': 1,\n", + " '4-15': 1,\n", + " '4-16': 1,\n", + " '4-17': 1,\n", + " '4-18': 1,\n", + " '4-19': 1,\n", + " '4-20': 1,\n", + " '4-21': 1,\n", + " '4-22': 1,\n", + " '4-23': 1,\n", + " '4-24': 1,\n", + " '4-25': 1,\n", + " '4-26': 1,\n", + " '4-27': 1,\n", + " '4-28': 1,\n", + " '4-29': 1,\n", + " '4-30': 1,\n", + " '4-31': 1,\n", + " '4-32': 1,\n", + " '4-33': 1,\n", + " '4-34': 1,\n", + " '4-35': 1,\n", + " '4-36': 1,\n", + " '4-37': 1,\n", + " '4-38': 1,\n", + " '4-39': 1,\n", + " '4-40': 1,\n", + " '4-41': 1,\n", + " '4-42': 1,\n", + " '4-43': 1,\n", + " '4-44': 1,\n", + " '4-45': 1,\n", + " '4-46': 1,\n", + " '4-47': 1,\n", + " '4-48': 1,\n", + " '4-49': 1,\n", + " '4-50': 1,\n", + " '4-51': 1,\n", + " '4-52': 1,\n", + " '4-53': 1,\n", + " '4-54': 1,\n", + " '4-55': 1,\n", + " '4-56': 1,\n", + " '4-57': 1,\n", + " '4-58': 1,\n", + " '4-59': 1,\n", + " '4-60': 1,\n", + " '4-61': 1,\n", + " '4-62': 1,\n", + " '4-63': 1,\n", + " '4-64': 1,\n", + " '5-0': 3,\n", + " '5-1': 1,\n", + " '5-2': 1,\n", + " '5-3': 1,\n", + " '5-4': 1,\n", + " '5-5': 1,\n", + " '5-6': 1,\n", + " '5-7': 1,\n", + " '5-8': 1,\n", + " '5-9': 1,\n", + " '5-10': 1,\n", + " '5-11': 1,\n", + " '5-12': 1,\n", + " '5-13': 1,\n", + " '5-14': 1,\n", + " '5-15': 1,\n", + " '5-16': 1,\n", + " '5-17': 1,\n", + " '5-18': 1,\n", + " '5-19': 1,\n", + " '5-20': 1,\n", + " '5-21': 1,\n", + " '5-22': 1,\n", + " '5-23': 1,\n", + " '5-24': 1,\n", + " '5-25': 1,\n", + " '5-26': 1,\n", + " '5-27': 1,\n", + " '5-28': 1,\n", + " '5-29': 1,\n", + " '5-30': 1,\n", + " '5-31': 1,\n", + " '5-32': 1,\n", + " '5-33': 1,\n", + " '5-34': 1,\n", + " '5-35': 1,\n", + " '5-36': 1,\n", + " '5-37': 1,\n", + " '5-38': 1,\n", + " '5-39': 1,\n", + " '5-40': 1,\n", + " '5-41': 1,\n", + " '5-42': 1,\n", + " '5-43': 1,\n", + " '5-44': 1,\n", + " '5-45': 1,\n", + " '5-46': 1,\n", + " '5-47': 1,\n", + " '5-48': 1,\n", + " '5-49': 1,\n", + " '5-50': 1,\n", + " '5-51': 1,\n", + " '5-52': 1,\n", + " '5-53': 1,\n", + " '5-54': 1,\n", + " '5-55': 1,\n", + " '5-56': 1,\n", + " '5-57': 1,\n", + " '5-58': 1,\n", + " '5-59': 1,\n", + " '5-60': 1,\n", + " '5-61': 1,\n", + " '5-62': 1,\n", + " '5-63': 1,\n", + " '5-64': 1})" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ngram.ngram" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(4.1744, device='cuda:0')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(2.6793, device='cuda:0')\n" + ] + } + ], + "source": [ + "ngram = Ngram(2, vocab, 1e-3)\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)\n", + "ngram.train(enc.encode(text))\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(4.1744, device='cuda:0')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(1.2322, device='cuda:0')\n" + ] + } + ], + "source": [ + "ngram = Ngram(4, vocab, 1e-3)\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)\n", + "ngram.train(enc.encode(text))\n", + "loss = ngram.loss(inputs, targets)\n", + "print(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# ngram.ngram" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/playground/GPT DEV_3_attention_check_overfit.ipynb b/playground/GPT DEV_3_attention_check_overfit.ipynb new file mode 100644 index 0000000..c633d3b --- /dev/null +++ b/playground/GPT DEV_3_attention_check_overfit.ipynb @@ -0,0 +1,989 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[15., 57., 92.],\n", + " [ 0., 95., 53.],\n", + " [15., 10., 34.],\n", + " [90., 12., 20.]],\n", + "\n", + " [[97., 86., 90.],\n", + " [38., 51., 64.],\n", + " [ 9., 15., 13.],\n", + " [46., 22., 50.]]])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import torch\n", + "torch.manual_seed(1337)\n", + "\n", + "batch_size = 2\n", + "sequence_length = 4\n", + "d_model = 3\n", + "representations = torch.randint(0, 100, (batch_size, sequence_length, d_model)).float()\n", + "representations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## generilize a special case\n", + "> This is a bit more of a general coding tip but I’ve often seen people create bugs when they bite off more than they can chew, writing a relatively general functionality from scratch. I like to write a very specific function to what I’m doing right now, get that to work, and then generalize it later making sure that I get the same result. Often this applies to vectorizing code, where I almost always write out the fully loopy version first and only then transform it to vectorized code one loop at a time.\n", + "\n", + "https://karpathy.github.io/2019/04/25/recipe/" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[15.0000, 57.0000, 92.0000],\n", + " [ 7.5000, 76.0000, 72.5000],\n", + " [10.0000, 54.0000, 59.6667],\n", + " [30.0000, 43.5000, 49.7500]],\n", + "\n", + " [[97.0000, 86.0000, 90.0000],\n", + " [67.5000, 68.5000, 77.0000],\n", + " [48.0000, 50.6667, 55.6667],\n", + " [47.5000, 43.5000, 54.2500]]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aggregated_representations = torch.empty((batch_size, sequence_length, d_model))\n", + "\n", + "for batch_idx in range(batch_size):\n", + " for sequence_idx in range(sequence_length):\n", + " aggregated_representations[batch_idx, sequence_idx] = torch.mean(representations[batch_idx, :sequence_idx+1], dim=0)\n", + "aggregated_representations" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[1.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.5000, 0.5000, 0.0000, 0.0000],\n", + " [0.3333, 0.3333, 0.3333, 0.0000],\n", + " [0.2500, 0.2500, 0.2500, 0.2500]],\n", + "\n", + " [[1.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.5000, 0.5000, 0.0000, 0.0000],\n", + " [0.3333, 0.3333, 0.3333, 0.0000],\n", + " [0.2500, 0.2500, 0.2500, 0.2500]]])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attention_score = torch.tril(torch.ones((batch_size, sequence_length, sequence_length)))\n", + "attention_score = attention_score / torch.sum(attention_score, dim=2, keepdim=True)\n", + "attention_score" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[15.0000, 57.0000, 92.0000],\n", + " [ 7.5000, 76.0000, 72.5000],\n", + " [10.0000, 54.0000, 59.6667],\n", + " [30.0000, 43.5000, 49.7500]],\n", + "\n", + " [[97.0000, 86.0000, 90.0000],\n", + " [67.5000, 68.5000, 77.0000],\n", + " [48.0000, 50.6667, 55.6667],\n", + " [47.5000, 43.5000, 54.2500]]])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attention_score @ representations" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[-0.0476, -inf, -inf, -inf],\n", + " [-1.1081, -1.8002, -inf, -inf],\n", + " [ 0.1662, 1.2055, 0.1883, -inf],\n", + " [-0.1585, -0.6300, -0.2221, 0.6924]],\n", + "\n", + " [[ 1.1490, -inf, -inf, -inf],\n", + " [ 0.1526, 0.3843, -inf, -inf],\n", + " [-0.7296, -1.5580, -0.3950, -inf],\n", + " [-1.7097, -0.0826, -0.0495, -1.4480]]])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tril = torch.tril(torch.ones((sequence_length, sequence_length)))\n", + "qk_dot_product = torch.randn((batch_size, sequence_length, sequence_length))\n", + "qk_dot_product = qk_dot_product.masked_fill(tril == 0, float('-inf'))\n", + "qk_dot_product" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[1.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.6664, 0.3336, 0.0000, 0.0000],\n", + " [0.2062, 0.5830, 0.2108, 0.0000],\n", + " [0.2039, 0.1273, 0.1913, 0.4775]],\n", + "\n", + " [[1.0000, 0.0000, 0.0000, 0.0000],\n", + " [0.4424, 0.5576, 0.0000, 0.0000],\n", + " [0.3528, 0.1541, 0.4931, 0.0000],\n", + " [0.0791, 0.4024, 0.4159, 0.1027]]])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.softmax(qk_dot_product, dim=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "d_head = 16\n", + "k = torch.randn(batch_size, sequence_length, d_head)\n", + "q = torch.randn(batch_size, sequence_length, d_head)\n", + "v = torch.randn(batch_size, sequence_length, d_head)\n", + "\n", + "qk_dot_product = q @ k.transpose(-2, -1)\n", + "scaled_qk_dot_product = qk_dot_product / (d_head ** 0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(0.8750)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k.var()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(1.1169)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q.var()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(tensor(13.3547), tensor(0.8347))" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qk_dot_product.var(), scaled_qk_dot_product.var()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([0.1925, 0.1426, 0.2351, 0.1426, 0.2872])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.softmax(torch.tensor([0.1, -0.2, 0.3, -0.2, 0.5]), dim=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([0.0326, 0.0030, 0.1615, 0.0030, 0.8000])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.softmax(torch.tensor([0.1, -0.2, 0.3, -0.2, 0.5])* 8, dim=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "from torch.nn import functional as F\n", + "\n", + "torch.manual_seed(1337)\n", + "\n", + "class Head(nn.Module):\n", + " def __init__(self, d_model, d_head):\n", + " super().__init__()\n", + " self.key = nn.Linear(d_model, d_head, bias=False)\n", + " self.query = nn.Linear(d_model, d_head, bias=False)\n", + " self.value = nn.Linear(d_model, d_head, bias=False)\n", + " self.back_to_d_model = nn.Linear(d_head, d_model)\n", + "\n", + " self.register_buffer('mask', torch.tril(torch.ones((sequence_length, sequence_length))))\n", + " \n", + " \n", + " def forward(self, x):\n", + " # x: (batch_size, sequence_length, d_model)\n", + " k = self.key(x)\n", + " q = self.query(x)\n", + " v = self.value(x)\n", + "\n", + " qk_dot_product = q @ k.transpose(-2, -1) / (d_head ** 0.5)\n", + " qk_dot_product = qk_dot_product.masked_fill(self.mask == 0, float('-inf'))\n", + " attention_score = torch.softmax(qk_dot_product, dim=-1)\n", + " out = attention_score @ v\n", + " out = self.back_to_d_model(out)\n", + " return out\n", + "\n", + "\n", + "class AttentionLM(nn.Module):\n", + " def __init__(self, vocab_size, sequence_length, d_model, d_head):\n", + " super().__init__()\n", + " self.embed = nn.Embedding(vocab_size, d_model)\n", + " self.pos_embed = nn.Embedding(sequence_length, d_model)\n", + " self.head = Head(d_model, d_head)\n", + " self.ln = nn.LayerNorm(d_model)\n", + " self.unembed = nn.Linear(d_model, vocab_size)\n", + " print('number of parameters:', sum(p.numel() for p in self.parameters()))\n", + " \n", + " \n", + " def forward(self, token_indexes):\n", + " # token_indexes: (batch_size, sequence_length)\n", + " batch_size, sequence_length = token_indexes.size()\n", + " token_embed = self.embed(token_indexes)\n", + " pos_embed = self.pos_embed(torch.arange(sequence_length).to(token_embed.device))\n", + " x = token_embed + pos_embed\n", + " x = self.head(x)\n", + " x = self.ln(x)\n", + " logits = self.unembed(x)\n", + "\n", + " return logits\n", + " \n", + " def loss_per_token(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length),\n", + " reduction='none'\n", + " )\n", + " # loss: (batch_size*sequence_length)\n", + " return loss.view(batch_size, sequence_length)\n", + " \n", + " def loss(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length)\n", + " )\n", + " # loss: scalar\n", + " return loss\n", + " \n", + " def generate(self, token_indexes, max_new_tokens):\n", + " # token_indexes: (batch_size, sequence_length)\n", + " batch_size, sequence_length = token_indexes.shape\n", + " for _ in range(max_new_tokens):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " next_token_logits = logits[:, -1, :]\n", + " # next_token_logits: (batch_size, vocab_size)\n", + " next_token_probs = F.softmax(next_token_logits, dim=-1)\n", + " # next_token_probs: (batch_size, vocab_size)\n", + " next_token = torch.multinomial(next_token_probs, num_samples=1)\n", + " # next_token: (batch_size, 1)\n", + " token_indexes = torch.cat([token_indexes, next_token], dim=1)\n", + " # token_indexes: (batch_size, sequence_length+1)\n", + " return token_indexes\n", + "\n", + "\n", + "class BigramLanguageModel(nn.Module):\n", + " def __init__(self, vocab_size, d_model):\n", + " super().__init__()\n", + " # self.bigram_table = nn.Embedding(vocab_size, vocab_size)\n", + " self.token_embedding_table = nn.Embedding(vocab_size, d_model)\n", + " self.ln = nn.LayerNorm(d_model)\n", + " self.linear = nn.Linear(d_model, vocab_size)\n", + " print('number of parameters:', sum(p.numel() for p in self.parameters()))\n", + " \n", + " def forward(self, token_indexes):\n", + " # token_index: (batch_size, sequence_length)\n", + " # logits = self.bigram_table(token_indexes)\n", + "\n", + " embedding = self.token_embedding_table(token_indexes)\n", + " embedding = self.ln(embedding)\n", + " logits = self.linear(embedding)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " return logits\n", + "\n", + " def loss_per_token(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length),\n", + " reduction='none'\n", + " )\n", + " # loss: (batch_size*sequence_length)\n", + " return loss.view(batch_size, sequence_length)\n", + " \n", + " \n", + " def loss(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length)\n", + " )\n", + " # loss: scalar\n", + " return loss\n", + " \n", + " def generate(self, token_indexes, max_new_tokens):\n", + " # token_indexes: (batch_size, sequence_length)\n", + " batch_size, sequence_length = token_indexes.shape\n", + " for _ in range(max_new_tokens):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " next_token_logits = logits[:, -1, :]\n", + " # next_token_logits: (batch_size, vocab_size)\n", + " next_token_probs = F.softmax(next_token_logits, dim=-1)\n", + " # next_token_probs: (batch_size, vocab_size)\n", + " next_token = torch.multinomial(next_token_probs, num_samples=1)\n", + " # next_token: (batch_size, 1)\n", + " token_indexes = torch.cat([token_indexes, next_token], dim=1)\n", + " # token_indexes: (batch_size, sequence_length+1)\n", + " return token_indexes\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of parameters: 1085249\n", + "steps: 0 loss: 4.328359603881836\n", + "steps: 100 loss: 1.9020140171051025\n", + "steps: 200 loss: 0.9388523697853088\n", + "steps: 299 loss: 0.45989322662353516\n", + "validation loss: 8.844975471496582\n" + ] + } + ], + "source": [ + "from data_char import enc, get_batch\n", + "vocab_size = enc.n_vocab\n", + "sequence_length = 1024\n", + "d_model = 768\n", + "d_head = 64\n", + "\n", + "model = AttentionLM(vocab_size, sequence_length, d_model, d_head).cuda()\n", + "optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)\n", + "batch_size = 32\n", + "context_length = 1024\n", + "iterations = 300\n", + "x, y = get_batch(batch_size, context_length, 'train')\n", + "for steps in range(iterations):\n", + " # print(x[0], y[0])\n", + " x, y = x.cuda(), y.cuda()\n", + " loss = model.loss(x, y)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " if steps % 100 == 0:\n", + " print('steps:', steps, 'loss:', loss.item())\n", + " # break\n", + "print('steps:', steps, 'loss:', loss.item())\n", + "\n", + "with torch.no_grad():\n", + " val_x, val_y = get_batch(1, context_length, 'val')\n", + " val_x, val_y = val_x.cuda(), val_y.cuda()\n", + " loss = model.loss(val_x, val_y)\n", + " print('validation loss:', loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'loss per token')" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAHHCAYAAAChjmJTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwZElEQVR4nO3dd3hT1f8H8HeatGlLF7TQsvfesiwgIiBTBNyKioobVMSJC3CB4hZE/bkVBZWvWzZlyN6yd6EgUFb3Ts7vj5L03pt7k5s0bZrk/XqePk+b3Nx7cpPmfvI5n3OOQQghQERERORHQnzdACIiIiJ3MYAhIiIiv8MAhoiIiPwOAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiIiI/A4DGCIiIvI7DGCIiIjI7zCAIfJTX331FQwGA1JTU33dlIAzZcoUGAwGnDt3ztdNISINDGCIKKDk5eVhypQpWLFiha+bQkQViAEMEQWUvLw8TJ06lQEMUYBjAENEfqegoABWq9XXzSAiH2IAQxRgPvroI7Rt2xZmsxl16tTBuHHjkJGRIdvm4MGDuP7665GUlITw8HDUq1cPt9xyCzIzM+3bLFmyBL1790ZcXByioqLQsmVLPPfccy6PbzAYMH78eMyZMwctW7ZEeHg4unTpglWrVjlse/LkSdxzzz1ITEyE2WxG27Zt8cUXX8i2WbFiBQwGA+bOnYsXXngBdevWRWRkJLKyshz2l5qaipo1awIApk6dCoPBAIPBgClTpti3Wb58Oa644gpUq1YNcXFxGDFiBPbu3evyeR07dgzNmjVDu3btcObMGQBARkYGJkyYgPr168NsNqNZs2Z44403ZMFVamoqDAYD3nrrLXz66ado2rQpzGYzunXrhk2bNrk8LhGpM/m6AUTkPVOmTMHUqVMxYMAAPPTQQ9i/fz9mz56NTZs2Yc2aNQgNDUVRUREGDRqEwsJCPPLII0hKSsLJkyfx559/IiMjA7Gxsdi9ezeuueYadOjQAS+//DLMZjMOHTqENWvW6GrHypUrMW/ePDz66KMwm8346KOPMHjwYGzcuBHt2rUDAJw5cwaXX365PeCpWbMmFixYgLFjxyIrKwsTJkyQ7fOVV15BWFgYnnzySRQWFiIsLMzhuDVr1sTs2bPx0EMPYdSoUbjuuusAAB06dAAALF26FEOGDEGTJk0wZcoU5Ofn48MPP0SvXr2wdetWNGrUSPX5HD58GP369UONGjWwZMkSJCQkIC8vD1deeSVOnjyJBx54AA0aNMDatWsxadIknDp1Cu+9955sH99//z2ys7PxwAMPwGAw4M0338R1112HI0eOIDQ0VNd5JSIJQUR+6csvvxQAxNGjR4UQQqSnp4uwsDAxcOBAYbFY7NvNnDlTABBffPGFEEKIbdu2CQDip59+0tz3u+++KwCIs2fPut0uAAKA2Lx5s/22Y8eOifDwcDFq1Cj7bWPHjhW1a9cW586dkz3+lltuEbGxsSIvL08IIURKSooAIJo0aWK/zZmzZ88KAGLy5MkO93Xq1EnUqlVLnD9/3n7bjh07REhIiLjzzjvtt02ePNn+/Pfu3Svq1KkjunXrJi5cuGDf5pVXXhHVqlUTBw4ckB3j2WefFUajURw/flwIIcTRo0cFABEfHy97/G+//SYAiD/++MPlcyIiR+xCIgoQS5cuRVFRESZMmICQkLJ/7fvuuw8xMTH466+/AACxsbEAgEWLFiEvL091X3FxcQCA3377zaNak+TkZHTp0sX+d4MGDTBixAgsWrQIFosFQgjMnz8fw4cPhxAC586ds/8MGjQImZmZ2Lp1q2yfY8aMQUREhNttsTl16hS2b9+Ou+66CzVq1LDf3qFDB1x99dX4+++/HR6za9cuXHnllWjUqBGWLl2K6tWr2+/76aefcMUVV6B69eqy9g8YMAAWi8Why+zmm2+WPf6KK64AABw5csTj50QUzBjAEAWIY8eOAQBatmwpuz0sLAxNmjSx39+4cWNMnDgRn332GRISEjBo0CDMmjVLVv9y8803o1evXrj33nuRmJiIW265BT/++KPuYKZ58+YOt7Vo0QJ5eXk4e/Yszp49i4yMDHz66aeoWbOm7Ofuu+8GAKSnp8se37hxY/0nQ4XW+QGA1q1b49y5c8jNzZXdPnz4cERHR2PRokWIiYmR3Xfw4EEsXLjQof0DBgxQbX+DBg1kf9uCmYsXL5breREFK9bAEAWht99+G3fddRd+++03LF68GI8++iimTZuG9evXo169eoiIiMCqVauQkpKCv/76CwsXLsS8efPQr18/LF68GEajsVzHtwVCt99+O8aMGaO6ja1uxaY82RdPXX/99fj6668xZ84cPPDAA7L7rFYrrr76ajz99NOqj23RooXsb61zJoTwTmOJggwDGKIA0bBhQwDA/v370aRJE/vtRUVFOHr0qD0zYNO+fXu0b98eL7zwAtauXYtevXrh448/xquvvgoACAkJQf/+/dG/f3+88847eP311/H8888jJSXFYV9KBw8edLjtwIEDiIyMtI8Sio6OhsVicbkvdxkMBtXbpedHad++fUhISEC1atVkt8+YMQMmkwkPP/wwoqOjcdttt9nva9q0KXJycrzefiLSh11IRAFiwIABCAsLwwcffCD7Vv/5558jMzMTw4YNAwBkZWWhpKRE9tj27dsjJCQEhYWFAIALFy447L9Tp04AYN/GmXXr1slqWNLS0vDbb79h4MCBMBqNMBqNuP766zF//nzs2rXL4fFnz551/YQ1REZGAoDD0PHatWujU6dO+Prrr2X37dq1C4sXL8bQoUMd9mUwGPDpp5/ihhtuwJgxY/D777/b77vpppuwbt06LFq0yOFxGRkZDueYiLyLGRiiAFGzZk1MmjQJU6dOxeDBg3Httddi//79+Oijj9CtWzfcfvvtAErnQRk/fjxuvPFGtGjRAiUlJfj222/tQQUAvPzyy1i1ahWGDRuGhg0bIj09HR999BHq1auH3r17u2xLu3btMGjQINkwaqB0bhab6dOnIyUlBT169MB9992HNm3a4MKFC9i6dSuWLl2qGkTpERERgTZt2mDevHlo0aIFatSogXbt2qFdu3aYMWMGhgwZguTkZIwdO9Y+jDo2NlY2V4xUSEgIvvvuO4wcORI33XQT/v77b/Tr1w9PPfUUfv/9d1xzzTW466670KVLF+Tm5mLnzp34+eefkZqaioSEBI+eAxHp4NtBUETkKeUwapuZM2eKVq1aidDQUJGYmCgeeughcfHiRfv9R44cEffcc49o2rSpCA8PFzVq1BBXXXWVWLp0qX2bZcuWiREjRog6deqIsLAwUadOHXHrrbc6DBlWA0CMGzdOfPfdd6J58+bCbDaLzp07i5SUFIdtz5w5I8aNGyfq168vQkNDRVJSkujfv7/49NNP7dvYhlE7G/attHbtWtGlSxcRFhbmMKR66dKlolevXiIiIkLExMSI4cOHiz179sgeLx1GbZOXlyeuvPJKERUVJdavXy+EECI7O1tMmjRJNGvWTISFhYmEhATRs2dP8dZbb4mioiIhRNkw6hkzZqieK7Xh3kTkmkEIVpARkfcYDAaMGzcOM2fO9HVTiCiAsQaGiIiI/A4DGCIiIvI7DGCIiIjI7/g0gJkyZYp9tVjbT6tWrXzZJCIqJyEE61+IqML5fBh127ZtsXTpUvvfJpPPm0RERERVnM+jBZPJhKSkJF83g4iIiPyIzwOYgwcPok6dOggPD0dycjKmTZvmsOiZTWFhoWwWUKvVigsXLiA+Pl5z+nAiIiKqWoQQyM7ORp06dRAS4lk1i0/ngVmwYAFycnLQsmVLnDp1ClOnTsXJkyexa9cuREdHO2w/ZcoU2UyeRERE5L/S0tJQr149jx5bpSayy8jIQMOGDfHOO+9g7NixDvcrMzCZmZlo0KAB0tLSHJa6JyIioqopKysL9evXR0ZGBmJjYz3ah8+7kKTi4uLQokULHDp0SPV+s9kMs9nscHtMTAwDGCIiIj9TnvKPKjUPTE5ODg4fPozatWv7uilERERUhfk0gHnyySexcuVKpKamYu3atRg1ahSMRiNuvfVWXzaLiIiIqjifdiGdOHECt956K86fP4+aNWuid+/eWL9+PWrWrOnLZhEREVEV59MAZu7cub48PBEREfmpKlUDQ0RERKQHAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiIiI/A4DGCIiIvI7DGCIiIjI7zCAISIiIr/DAIaIiIj8DgMYIiIi8jsMYIiIiMjvMIAhIiIiv8MAhoiIiPwOAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiIiI/A4DGCIiIvI7DGCIiIjI7zCAISIiIr/DAIaIiIj8DgMYIiIi8jsMYIiIiMjvMIAhIiIiv8MAhoiIiPwOAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiIiI/A4DGCIiIvI7DGCIiIjI7zCAISIiIr/DAIaIiIj8DgMYIiIi8jsMYIiIiMjvMIAhIiIiv8MAhoiIiPwOAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiIiI/A4DGCIiIvI7DGCIiIjI7zCAISIiIr/DAIaIiIj8DgMYIiIi8jsMYIiIiMjvMIAhIiIiv8MAhoiIiPwOAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiIiI/A4DGCIiIvI7DGCIiIjI7zCAISIiIr/DAIaIiIj8DgMYIiIi8jsMYIiIiMjvMIAhIiIiv8MAhoiIiPwOAxgiIiLyOwxgiIiIyO9UmQBm+vTpMBgMmDBhgq+bQkRERFVclQhgNm3ahE8++QQdOnTwdVOIiIjID/g8gMnJycHo0aPxf//3f6hevbqvm0NERER+wOcBzLhx4zBs2DAMGDDA5baFhYXIysqS/RAREVHwMfny4HPnzsXWrVuxadMmXdtPmzYNU6dOreBWERERUVXnswxMWloaHnvsMcyZMwfh4eG6HjNp0iRkZmbaf9LS0iq4lURERFQVGYQQwhcH/vXXXzFq1CgYjUb7bRaLBQaDASEhISgsLJTdpyYrKwuxsbHIzMxETExMRTeZiIiIvMAb12+fdSH1798fO3fulN129913o1WrVnjmmWdcBi9EREQUvHwWwERHR6Ndu3ay26pVq4b4+HiH24mIiIikfD4KiYiIiMhdPh2FpLRixQpfN4GIiIj8ADMwRERE5HcYwBAREZHfYQBDREREfocBDBEREfkdBjBERETkdxjAEBERkd9hAENERER+hwEMERER+R0GMEREROR3GMAQERGR32EAQ0RERH6HAQwRERH5HQYwRERE5HcYwBAREZHfYQBDREREfocBDBEREfkdBjBERETkdxjAEBERkd9hAENERER+hwEMERER+R0GMEREROR3GMAQERGR32EAQ0RERH6HAQwRERH5HQYwRERE5HcYwBAREZHfYQBDREREfocBDBEREfkdBjBERETkdwIigMktLPF1E4iIiKgSBUQAczIjz9dNICIiokoUEAFMZh4zMERERMEkIAKYjPwiXzeBiIiIKlFABDCZecW+bgIRERFVosAIYAoYwBAREQWTwAhg8lkDQ0REFEwCIoDJYg0MERFRUAmIACYzn11IREREwSQgApgSq/B1E4iIiKgSBUQAIwQDGCIiomASEAEMEzBERETBJSACGAsjGCIioqASEAEM4xciIqLgEhABDBjAEBERBZWACGDYhURERBRcAiKAsXIUEhERUVAJiACG8QsREVFwCYgAhhkYIiKi4BIQAYyFAQwREVFQCYgAhjW8REREwSUgAhguJUBERBRcAiKAYRcSERFRcAmIAIbxCxERUXAJkACGEQwREVEwCYgAhsOoiYiIgktABDBcSoCIiCi4BEQAwwQMERFRcAmIAIZdSERERMElIAIYDqMmIiIKLgERwFitvm4BERERVaaACGA4jJqIiCi4BEQAw0FIREREwSVAAhhGMERERMHEpwHM7Nmz0aFDB8TExCAmJgbJyclYsGCB2/thFxIREVFw8WkAU69ePUyfPh1btmzB5s2b0a9fP4wYMQK7d+92az/sQiIiIgouJl8efPjw4bK/X3vtNcyePRvr169H27Ztde+HM/ESEREFF58GMFIWiwU//fQTcnNzkZycrLpNYWEhCgsL7X9nZWUBAKxgAENERBRMfF7Eu3PnTkRFRcFsNuPBBx/EL7/8gjZt2qhuO23aNMTGxtp/6tevD4BLCRAREQUbnwcwLVu2xPbt27FhwwY89NBDGDNmDPbs2aO67aRJk5CZmWn/SUtLA8BRSERERMHG511IYWFhaNasGQCgS5cu2LRpE95//3188sknDtuazWaYzWaH2y2ciZeIiCio+DwDo2S1WmV1LnowAUNERBRcfJqBmTRpEoYMGYIGDRogOzsb33//PVasWIFFixa5tR/OA0NERBRcfBrApKen484778SpU6cQGxuLDh06YNGiRbj66qvd2g9XoyYiIgouPg1gPv/8c6/sR4jSLIzBYPDK/oiIiKhqq3I1MJ5iEoaIiCh4BEwAw6HUREREwSNgAhjWwRAREQWPgAlgGL8QEREFj4AJYNiFREREFDwCJoDhitRERETBI2ACGMYvREREwSNgAhjOxktERBQ8AiaAYQaGiIgoeARMAMMaGCIiouARMAEMu5CIiIiCR8AEMEzAEBERBY+ACWA4Ey8REVHwCJgAxsoUDBERUdAImACGCRgiIqLgETABDJcSICIiCh4BE8CwBoaIiCh4BEwAwxoYIiKi4BEwAUyRxerrJhAREVElCZgApqDY4usmEBERUSXxKID5+uuv8ddff9n/fvrppxEXF4eePXvi2LFjXmucOwqKmYEhIiIKFh4FMK+//joiIiIAAOvWrcOsWbPw5ptvIiEhAY8//rhXG6hXfhEzMERERMHC5MmD0tLS0KxZMwDAr7/+iuuvvx73338/evXqhb59+3qzfboVlDCAISIiChYeZWCioqJw/vx5AMDixYtx9dVXAwDCw8ORn5/vvda5gRkYIiKi4OFRBubqq6/Gvffei86dO+PAgQMYOnQoAGD37t1o1KiRN9unW0EJa2CIiIiChUcZmFmzZiE5ORlnz57F/PnzER8fDwDYsmULbr31Vq82UK8CZmCIiIiChkcZmLi4OMycOdPh9qlTp5a7QZ7iMGoiIqLg4VEGZuHChfjnn3/sf8+aNQudOnXCbbfdhosXL3qtce7IZwBDREQUNDwKYJ566ilkZWUBAHbu3IknnngCQ4cOxdGjRzFx4kSvNlCvgmIrsgqKfXJsIiIiqlweBTBHjx5FmzZtAADz58/HNddcg9dffx2zZs3CggULvNpAvb7bcAwdpizGxysP++T4REREVHk8CmDCwsKQl5cHAFi6dCkGDhwIAKhRo4Y9M1PZii6NQpq+YJ9Pjk9ERESVx6Mi3t69e2PixIno1asXNm7ciHnz5gEADhw4gHr16nm1gURERERKHmVgZs6cCZPJhJ9//hmzZ89G3bp1AQALFizA4MGDvdpAIiIiIiWPMjANGjTAn3/+6XD7u+++W+4GEREREbniUQADABaLBb/++iv27t0LAGjbti2uvfZaGI1GrzWOiIiISI1HAcyhQ4cwdOhQnDx5Ei1btgQATJs2DfXr18dff/2Fpk2berWRRERERFIe1cA8+uijaNq0KdLS0rB161Zs3boVx48fR+PGjfHoo496u41EREREMh5lYFauXIn169ejRo0a9tvi4+Mxffp09OrVy2uNIyIiIlLjUQbGbDYjOzvb4facnByEhYWVu1FEREREzngUwFxzzTW4//77sWHDBgghIITA+vXr8eCDD+Laa6/1dhuJiIiIZDwKYD744AM0bdoUycnJCA8PR3h4OHr27IlmzZrhvffe83ITiYiIiOQ8qoGJi4vDb7/9hkOHDtmHUbdu3RrNmjXzauOIiIiI1OgOYFytMp2SkmL//Z133vG8RUREREQu6A5gtm3bpms7g8HgcWM8FWoMgaXSj0pERES+ojuAkWZYqpowkwH5vm4EERERVRqPinirGqMPsj5ERETkOwERwJhCGMAQEREFk4AIYEIYwBAREQWVgAhgjAxgiIiIggoDGCIiIvI7DGCIiIjI7zCAISIiIr8TEAEMRyEREREFl4AIYIwhAfE0iIiISKeAuPIzAUNERBRcAiKAYQaGiIgouATElZ81MERERMElIAIYjkIiIiIKLoERwHAxRyIioqASGAGMkQEMERFRMAmMACYgngURERHpFRCXfnYhERERBZfACGBYxEtERBRUfBrATJs2Dd26dUN0dDRq1aqFkSNHYv/+/W7vh8OoiYiIgotPA5iVK1di3LhxWL9+PZYsWYLi4mIMHDgQubm5bu2HE9kREREFF5MvD75w4ULZ31999RVq1aqFLVu2oE+fPrr3wwQMERFRcPFpAKOUmZkJAKhRo4bq/YWFhSgsLLT/nZWVBYAZGCIiomBTZa78VqsVEyZMQK9evdCuXTvVbaZNm4bY2Fj7T/369QGwBoaIiCjYVJkAZty4cdi1axfmzp2ruc2kSZOQmZlp/0lLSwPAieyIiIiCTZXoQho/fjz+/PNPrFq1CvXq1dPczmw2w2w2O9zOeWCIiIiCi08zMEIIjB8/Hr/88guWL1+Oxo0be7Qf5TwwmXnF3mgeERERVVE+DWDGjRuH7777Dt9//z2io6Nx+vRpnD59Gvn5+W7tRxnAdHx5MeZsOObNphIREVEV4tMAZvbs2cjMzETfvn1Ru3Zt+8+8efPc2o/aTLzP/7LLW80kIiKiKsanNTBCCK/shzUwREREwaXKjEIqDw6jJiIiCi4BEcBwIjsiIqLgEhBXfsYvREREwSUgLv0mTmRHREQUVAIigAk3BcTTICIiIp0C4sofHmr0dROIiIioEjGAISIiIr8TEAGMWSOAeXjOlkpuCREREVWGgAhgtDIwf+887bXJ8oiIiKjqCIwAxqj9NEqsDGCIiIgCTWAEMGHaT6OwxFqJLSEiIqLKEBgBjJMi3sJiSyW2hIiIiCpDYAQwJicBjI4MjBACVnY1ERER+Y2ACGDMoeXrQrr3680Y9N4qFFvY3UREROQPAiKAcdqFVOK6C2nZvnQcTM/B9rQML7aKiIiIKkrgBzDFzKoQEREFmoAIYEKdDKN+6bddyMgr0rUfThlDRETkHwIigAGAnx9MxrNDWjncvuNEJp77ZaeufXDSOyIiIv8QMAFM10Y1MKhtkv3vpJhw+++bUy9qPk4atDB8ISIi8g8BE8AAQKjRYP+9mrmsLsbZSCQmXYiIiPxPQAUwxhBpAGOy/16gmMwuI68Ij/ywDSsPnIVVmoFhMENEROQXAiqAiZCMRooMc8zA5BaWYN/pLLyxcD/+2PEfxnyxEdL56wQ7kYiIiPyCyfUm/iMuMgzPD20NgwHYdjzD4f7B769C2oV8mCSZGlnQwviFiIjILwRUBgYA7uvTBPde0QRhJvlTO3ouF2kX8gHIV6gu4DwxREREfifgAhgbaT0MALyxYJ/qdh2nLrb/zgQMERGRfwjcAMZgcL2RAot4iYiI/EPABjAhigxMQnSYj1pCRERE3hawAYxydYH4amaXj+EoJCIiIv8QuAGMogvJFOK6S4ldSERERP4hYAMYgyKAKba6jk4YvxAREfmHgA1glKOQSiwcLk1ERBQogieA0ZGBsbIPiYiIyC8EbAATYlBmYHR0ITGAISIi8gsBG8AoRyGVWF13IenYhIiIiKqAgA1glBmYYh0ZGIuXMjB7/svCpP/9izNZBQCAZXvPYPz3W5GZX+yV/RMREQW7gFrMUUoZwGTkFbl8jLe6kIZ+sBoAkHouDz/cfznGfr0ZAJAQZcaUa9t65RhERETBLGAzMMoi3gW7Trt8jLcHKu0/ky37+3RmgXcPQEREFKSCJoDRw9ujkJQZHY5yIiIi8o6ADWCUXUh6eD2AcfE3EREReSaAAxj3H1PRGRIO0yYiIvKOgA1gPOlCqujJehm/EBEReUfABjBVoQtJifELERGRdwRsAONJBsbbXTzK3bELiYiIyDsCNoDxpAbG211IjqOQvLt/IiKiYBW4AUwlDaO2WAUKii0Vtn8iIiJyFLABjNGLNTBnsgqwaPdpWFRSKEPfX40OUxYjt7DE7eMRERGRZwI2gPEoA6PRx9PvrRV44NstWKiYzffDZQex/0w2iixW7EjLcHiccm/MwBAREXlHwAYwnmRglOs9CiHw2eojyC0q7SL692SG7P63lxwo+0PtcA77c7tJREREpCJwF3P0IDSTFt1arQLdX1+Kczlli0A2TYjSPp6OgMm2+2Pnc1EnLgKhxoCNH4mIiCpUwF5BPZkHRlrj8u/JTFnwAgDCyUwueo6XU1iCpXvO4MoZKzDmi41ut4+IiIhKBWwA49lijmW/m1Qe72yYtW1ztUJfm50nM3HvN5sBAGsPn3e7fURERFQqcAOYco5CUgtELE6KWGyHe+5/O+23seSFiIioYgRsAFPeUUjFKukWi5MUjO2h8zanuX3cyrTnvyz89e8pXzfDQUGxBUUlpefXahV4Z8kBpOxL93GriIioqgrYIl5p+FItzGgfSeSMLQgRQqBILYBxklIpUblTCFHllg8Y+sFqAEDN6GR0b1zDx60pVVBsQdvJi1CjWhg2PT8Ai3afxgfLDgIAUqcP83HriIioKgrYDIxB0oWUEG3W9RiLEFiw8xQ6v7IEK/efdbhfmqFRzhmjVftSVZcPOHAm29dNsDuUngOLVeBsdiEA4MTFfB+3iIiIqrqADWCkqkeG6dpOCIGH5mxFRl4xPll1xOF+aQ2Msh6mxOqYsRFwXtTrSy/8uguFJfqWQKhMQgino72IiIiAAA5gpF1IYSZ9T9NVsCG9X7mt1mOragADAIt3n/F1ExwIwQn/iIjItYANYKTCdE4Y5yrWkHUhOWRgNAKYSrgaexokVcXgyiqYfyEiIteCI4DRyMC0rh0j+9vVWkUWJ8Os1YKBvCIL1h46p6uNu05m4q4vN2LPf1m6trdZeeAs2k1ehF+2nXDrcYDzifl8xcoMDBER6RCwAYx0GphQo/qQ6j7NE2R/ay3maGORFfHK79PKwNz/7Ran+7S56ZN1WLH/LG7+dJ2u7W3GfLER+cUWPD5vh8ttq2LGRcnKGhgiItIhYAMYqTCTUfV25Wy9rq7vshoYoczAOJmmV4e8S8O8swtK3HqcOzMOq81tU9WwBoaIiPQI2ABGTwbGpKiNKU8Xkto8MJXBnRmHlQFMVQwUXL0GREREQAAHMFJq6xoBjhd/VxdPaReTcoI6X3XPuLPqtq+CLHdYq+Dkf0REVPX4NIBZtWoVhg8fjjp16sBgMODXX3+tkONorRRtUmRmXA+jlvyucxRSRStPBqYqYhEvERHp4dMAJjc3Fx07dsSsWbMq9Dha13hlZsZVAGMVAhdzi/DviQyHbV/4dRfWHtY34sib3FnzSW15hKpGCFFlZy8mIqKqw6drIQ0ZMgRDhgypkH0bJFPZGTQiGGUBbPqlqey1lFituPnTdThwJgdv3djR4f7b/m+D2+3cnHoBGXnFLrc7lJ6N2rERqGaWv2TuFfHKI4OqmOlg8EJERHr41WKOhYWFKCwsCzKysvTNmaJ1iVdmYI5fyHO6H4sVOHAmBwDw7bpUXcd25YaPXQ+b3pR6ATd+vA514yKw5tl+svukXUgr9qfj8ibxCA9VH3VV4icZGA6jJiIiV/yqiHfatGmIjY21/9SvX197Y0lsolUDo+x+SXMRwGw4ct7+++Gzua4b7IJWl9Xx8/J2/PXvKQDAyQzHRQ7P5xbZf7/ry02Y+sduzeNV1S4kaSbIKio+C/PtulTMWLSvYg9CREQVyq8CmEmTJiEzM9P+k5aWprltz6bxaJxQDQPbJEKrl0V5c2GJ8wv8kXNlQUtOoXvztajJK1Lfx9ivN2HniUy8+OsuXMwt0hyV8/2G4w63/bBR+5w4dCG50daKJB39JTQmghFCoMjF66PXi7/txqyUw9h7yr1Zj4mIqOrwqy4ks9kMs9msb1uTEcsmXgmDAZj6xx71jdwYwVMR8ovUV4M+mJ6D4TP/AQBkFRQjLiJUdbtpC/a6dbyqOgpJGsBYhTywsloFQkIMGP/9NizdewZrnu2HFfvP4uTFfDw2oHm5jpvrhSCUiIh8w68yMO4KCTHAYDBoxil6wpfHB7TAsPa1vdoum0d+2OZym/2nszUzJe6GX1U3gJH+LhRdSqV//LXzFApLrPhl60k8+dMOvLv0AHadzHT/WJKDaRV3E7nrf1tPYMwXG5GZ77ogn4i8w6cBTE5ODrZv347t27cDAI4ePYrt27fj+HHHrpHyMLh9qS+TGGNG10bVvdiaMhuOXnC5jfIiW1BswYfLDuLAmWy3L8DKLiRf+2z1Ebyz5ICsi0y5FpKzehh3l10A5PP1MH4hb5n44w6sPHAWH6Uc8nVTiIKGT7uQNm/ejKuuusr+98SJEwEAY8aMwVdffeW142hdqPTM+BobEerT4tcQg7wk5M2F+/HFmqP4MOUQIsPURxtpKfZSDYm7MvKKkHYhH+3rxdpvs1oFXv2rtAusYY1I++3KEhjl7MjS19KdIeQ2/rCgJfkvZmCIKo9PA5i+fftWyrTxWpc5PUeOiQiVjfSpbMrga9Hu0wCAohIrqrkbwCgCscpad6jvWyuQkVeMufdfjobxkTiXXYQWSVH2+6XFzFbFRHbKJkqzSEYP8oclkkU3mYAhIvJfflXE6ymt2Wr1XL9jI0I9+qbvLSEGg6xL5VRm2VBqt7uQFNkHayVlI2wT9S3dcwaf/XMUAPDnI73L2qEYRi3lsGSDJAgzurMQlG1/rIGhClQVJ4ckClQBXcRro3WZ0pOBiI0IlU0WF6sxIqii/HsiE4XFZRdt6QXe3ctvgWLUkzI4qGjSo207ftH+u3IYtbwGRt5GaRDmzjpQ9sdXsTogIiLyTHAEMBoXOum1MSEqTHWbmIhQWQbnpweTvdo2PX7ackL1dnev38q5azzNwBw/n+dRLYn0fEsf75CBkXYhKcp2CovLgjBPEijS47IehojIfwVJAKN+u/TyFRsRigGtEx22iVasPRQdXjV63Uqfk/oTK7FYcTG3CMv3nZF1uSjnPfHkAv7Hjv/QZ0YKHvlhq9uPlWZTLLJgpqyNpYs5amdgPll1xO3jSklrYCqrBoiIiLyvalyNK5hWCYu0gLiwxIpqZsei2JAQg+wCGx1euV1IWgzQDswGvbcKBoMBh9Jz8MqItrgjuREAIEcx86+yN+XAmWyEm4xoEB8JLZ+sOgwA+HvnaU+bDkB+7qXdOlbFKCRn3VyeBCDMwBARBYbgyMBoZCqk17/cwhKHYcn/e7gngNJFHG3CTVXjlFkFcFZj9ezDZ3NxKL100cnFe87Yb1dmYKRdSOdzCjHw3VXoMyPF6XE9KZy1kWY/pMGDdAkH5Sik0ont1AMNTwIQ6TwwlVXETMDeU1kY9dEarD10ztdNqVBciJSo8lSNq3EF08rASL/B5xZaEBkmT0hVu/S3NANj8mTsrg/VqFZW25NbqF3E+69kVtvDZ3MwK+WQ6lpNyhW8bQqKLdicesHpitfStYwssgyMPICRdylpT2bnSfwhDXpKGMBUmrFfbcK24xm47bMNvm4KEQUI/7oae0qjr0V6/SqyWB1GGEWElmZk/PlCF1+tbO0oZRGv9GJ+OrPA/vudn2/EjEX78cIvu5BTWIIxX2zEvE2lsyNLh5S/vXi//fenf/4XN3y8Dh8u156JVJZpkRxbGtgIIQ9urELIMjdSnswhVCLprlLrnjpxMQ/ZBZyMzNvO5fhuLiUiCkxBEcBo1sAo0r3VI+UBTHhY6ekp8eOht/tOZyH1XC6enf8vlki6kwB5EHFKEsCczCida+Z/207ihw3HsfLAWTwzfycAeQbmw+WHsPNEaebm9x3/AShdHkCLNNMiDQodMzCSNgpAI37xKAMjK+JV7OBkRj56v5GC7q8tc3/HlUQIgcNnc/yv+4tT7hCRlwVFAKOnBgYoHTItFa6RgXl1ZDvvNa6CrT18Hn3fWoG5m9Ic7pNmILTqaZRZCmUXWpYiWxFxqdtNCIF5m47j3xMZ9vukmRZp0CK9/dqZa7DvdJb9b6tVOwNT3hoY5eM3p5auTZVfrL5KuC8JIXDkUtde/7dX4uU/NVZYr6L8JX45m12Imz5Zh1+3nfR1U4jIheAIYHSuhaQMYGxdSBbFBfT2yxt6r3E+JP0WL61dkS5R8Naism6i33f851ADowwCz+UU4rv1x7DiUtbm2plr7PdJu5Ckk/Mp15radjyjrI1CaGZg1LqQUvalywIgJWejkGwBq9a+fWnupjT0e3sl3lp8AADw1dpU3zbITf4y6fH0Bfuw8egFTJi33ddNISIXgiOA0bj9mg51ZBfrGMUcL6GXsg1aSxH4O61C2lzJjL3SjMWjP2zD2sOOo0g+vTS02uaFX3fhyNlch+2kmZaCEovq7UpWAc0MjFUA+UUWFFzKmOw7nYW7v9qEwe+t1tyfVg1MfpHFHrACQF6R/ixMZQzHnumktqiy/bb9pL3LUK/yrAhfmTLzWatD5C+CIoCJi3Scu2Xbi1ejUUI1TBzYEgBw3WV1ESOZ42XDc/3tv4/u0RBNa1bDI/2aVXxjK5E08VGoc6XqgmL5dpn5xXj9730O25lVhptLjyHdj3KRSSmrELL6HKkiiwUdX16MDlMXo8Rixf7T2S7br5aB+XTVYbR+aSGW7S2rEVIWPGv5ZdsJtJ+yCCsPnNW1vSeKSqz2uqTKsnzfGbz8xx6HUWW5hSV4bO52PPrDNreKnf0lA1NeVSxxRxr+y8i3f/Eh/xUUE9nd1K0+Vh88J5sTpfql4cX39GqEXs3i0axmFAwGA9rWiYHZFIJa0WWjd2IjQrHsib6yfVYLM8oyFf5IOZGfJ9SGWgPy7hgbaaZFerx1h89r7n/LsYt4+ud/Ve87n1Nk32dGfrGuTIjaTLy2AOzrdcfs92UXFCMxJtzl/h6ftwMAcNeXG3F02jCX23vioxWVn32556vNAIBmtaJwW48G9tulGbmLucW6J3YMkviF/MC+01kY/N5qNIyPxMqnrvJ1c6gcgiIDYzYZ8emdXfHZnV0ByGs8DAYDWiXFwGQMgTHEgD/G98bPD/Z0uVJxlM4lBdrVjcHzQ1t73vgK9Mu2k/ZRQ4UlngVjysnxbNTmi5EWx0q//WQVaGc75qw/pnmflMUqdA13l80D42R0WbaTNqmpyG/eC3eVb9Zjd/y98xTu+2az/e//FJkfad1UdqFnw80/XnkYqedy8ebCfTiXo148rsfGoxdwKN111s0d5X0dgyXT5M9ss4gfO5/n45ZQeQVFAGPTv3UtzLv/cqx6WjvqDgkx6Kp5qR0boeuY8+5Pxn19muhuY2VKzy7Eq3/txaH0bKd1KM64k4WSBju607dOrgjSJQiKLVZZRkmrCFe+bIH21UpvF5K3nczIxy/bTsgCBVfBtJqNRy/g/aUHHbJSO09kYtRHa7Dp0ogrpYfnbJUNt1dONSCtG3InyJM+h+kL9mHErDX4aMVhzeyaK2kX8nDTJ+sw4J1VHj2+orALiajyBFUAYzAY0KNJPOKjzK43duHdmzuhVVI03r25o9PtlMsTVEUXcos97kLSutCrBSjSwli9x9uRlqF5X5GiEFharqHVnSSvgdE+7h2fb8T3G46j2GLFuZxCFBRbkJ6tXovjTWO+2IjH5+2QLVrpyZf6mz5Zh3eXHsCPm+XD52//fAO2Hc/AjR+v07Uf5QVZGlhl5Xs+4V/mpcduPX7Ro8ennncsEvcGxh+Bj0mywBEUNTAVoXFCNSyc0AdAWR2E0shOdTz69uwLhcWeBTBaFzG1ACZXUi9T6IUCOunw62s+/EcWIK06eBZ9W9RyyKbJ12Ny/pyf+2UnVh04i4W7y7pwNj7XH7V01MZ4yraG1RdrjuKhvk3Lvb/Dl/Znk+lm0KG8oEu76dzZl9Z/gcnDtbWMkv8rIYTL/7Nj53PxwbJDePDKJmieGO3RMW2sVoHjF/LQMD7Sb/6/iQJRUGVgKsqXd3VDp/pxDre/Us4J7xKiwvDaqIqfNE8I4XENjNZFLF8lIJJ+m/c04yMlHf2kHPZ8z1ebMW+zyuR9bq5GLQ1eAGDdEe2CY2+STixYnmtkeTMKygyMxc0A5lB6DlYfPKsZwYQZ1e84k1WAOz7fgEW71et/pIGDntqnu7/ahPlbT+AGnZknZyb9byf6vrUC312qz6pqcwYRBQsGMF5wVata+HVcL4fbQzy48jzav7n9917NEtAqyfW3RemCjZ7yNKDQuoi5qnHxRgDjitpsqrKZeEXp2kfuCHOymGexxYrjFVAYWK4AppzXVmUNjLRuSE8X0oB3VuKOzzdq1suEaqzuPvWP3Vh98Bwe+HaL6v0mSeCjp37LNi+Rs6BLCKGrNssWGL+z5MClx0n24fLR5GtMmgUOBjAVyN0A5qVr2qBLw+r2vyNCjbqG8pZ3nj2BCghgXGR0KmMOBrVv5tIMwit/7kHvN1Lc2qdy6QSpCfO2o8+MFMxcfhAzFu1z+zmmXXA/+HF1DKsQ+L9VRzD++60OGae1h87h+tlrsf90Nnb/l4lndBTUSvdR5IU1wkI1AkKtpS1spP9bnhagKz35079Y62RIvxa1RUF9pTImVfR3/jKpIrnGGpgK5G6kf0/vxrKZbsNDjagV7TqAKW8/vFUIj2tSMvI0AhgXo5MqI4BRmyBPOTGbuy5qPF8A+OvfUwBgn+7fGBKCiVe30L3vge+qj6jR+sB99c89+HJtKj65vQsGtEnU3O9rf+8FAIzqXFd2+22fbQAA3P/tZtSoFiZbwsFOWcQrpF1w5Q8ctAIY1zGBNJDyTgAzf+sJ1dsX7T6NzPxi3NS1vmoLnI1mq0zP/bITi3adxuLH+3hloEJF+WlzGprUrIYuDWv4uil+oaDYojqvFjEDU6E8iSukXRQRYUaEaaTYpZrWrOb+gSRKLMLjDMxxjayBqwURK6MLqVglQ6CnXsIZZcDm7DWWLmSph9Y50zrGZ/8chcUq7AGKGj2TFZ7NLtTM/ggAmXnF9v1IY4WD6Tk4lVm+GYKVNTAlFivmbDiGg4riYyXpHD7eysBoeeDbLXj6538dzpHt1HohjvOK7zccx/ncItWFWyuC1SocMmWrDpzFl2uOaj5mw5HzeOrnf3H97PLXIgWDT1cdRqsX5bOEUxkGMBXIWRfSs0Na4fbLGzjcLv1GGuEi6q4Vbcb8h3qiQY1IzxuJ0ghf7cJeM1r/tzhp7Q6gXsSrPGZFU8vAlDfFnplfhH2ns3Drp+uxOfWCZgYBkBcWHz2Xi/UeFgC7ioPrVdeek0j6uqpNLgiUXoi13qsr9qej48uL8dpfe/HrtpM4L5l4bsX+s0ietlzz2HqKW5Xn77ft/+H5X3a5LBCWdtu4ysDYMmOekA4b18o2VpUMjE1ldSM9PGcrur22VDan0J1fbMTUP/Zg41H1eYZ2nsyslLY54081MLaBCk/8pD7SNdgxgKlA0ovCWzfK54uJrxaGV0e2x63dS4OY9nVjAcg/0G1zyHx9T3fV/fdtWRNdGlaHyclFVI9cjeUAnBWsKimLjV3XZujetcfUuovUsjLuyMgrxv3fbMG6I+dxw8frEOqkAEl6Dq56awVu+XS9fZi0NzWtGSX7Wxo4SCcPdBZsaQUwB86Utvezf45iwrztuOOLjQ7bWK0C0xbsdQgU9GS7lG3af0bfzLqyWhwXGZjHf9yua59qnD0He1ZKNoGi8/39vuM/pOxP97g9elRWQGUboffJysMO9ylncLYpz9xBwczK2iZVDGAqkPTadkOXemiRGCW5r/TOF4a1xps3dLAHKWGmsgc1q1W6/ZUtatpv6yP53ZZ5cXYR1SNHY4SI2oKMWupXl2eBqsJCaapdSOWslyhULKzobCbi/Ev3STNBB1Qu0EUlVucfUC6+Mirvlj5vPTMKCwgYdb6H1L7dL9l7Bp+sPIJx329VtMP1uTYpupDidY6ocyeA0fPM9Ex8CEB1wVC9F5fTmQV49IdtuPvLTV4fei1tZ2Vf61x1F0s5WzakskjfD74IDD5eeRi931juVvdrFUvyVRkMYCqQsrhW+i3XNn9XNbMJN3Wtbx8KLX2jtruUlZFKjDajV7N4AMD1XeoBcP7NWg+tDxU99Tc2MRHyevCqEcCoZGDK+YFVWGLRfbG3fbBL6wRs74G9p7Kw+79MFBRb0HdGCm7+VLsmwNXRlGs6SZ+3NIBxVnfk4XxyALS/betZakCZ5VNbOV6NfDRUaQB4IbdItetJT5eB1kW4WFLgkl1QjEHvlRValxXx6juWdASbt2vApIuqVva8NLauUmnNl9Z5qGoZmPLWxHli+oJ9OHExH+9eGoavR1Ua6VaVMICpRNJKcq2UfYP4SDSKj8QVzROQoDKSIDzUiK/u7o4dkwfa12PSqlBvnKCvuFfrQ0VvAGMMMaBhfDW8MKxs0UrlBaF+DX1rR3mTrTYiPasA2y5NWX+2nMsBFBZbZbPAOmML4k5llh0zq6AYRSVWDHl/NYZ98A+2p2Xgv8wCbEr1bEp9QD67MCAPYKR1OFrZGCGg+zmpHl+jW+6KN10PUf8vswD/t+qI/QKsN0EmvfAUl1gx8cftuOyVJeg4dbFDlkTPsFmtVdUtkuc2S7kq+KW7pMHUz1tOaK4zJf2i4enM11o8WabDW2yZxmtnrnG5rbNpCNxhtQpZPZbH+/FCYGCxCizfdwYXc4vcfJz+batKnVV2QTHGfrUJv2xTH7FX2RjAVKJoyQrWWkOfzSYjlj3RF98o6l7GX9UMtaLNGN+vGUKNIYiNKPumWitGvdhW7ZtY7VjHYdlaBZN6a2DCLwU6917RBFc0TwBQ9qFmb6OO4eDeVnzpg/yKN1Mw6qO1mJVyCD9sLN8IjYPpObpT5raLymlJAJOZVyybI+dMluuAylVsoewqS7tQlhGRBqdPOikE1LOAqebxNYbh6BkdtPdUFl77ey/eWlT6bVRPtxMgT/0XWqz4dft/9r+/Xpcq21br/En/P/IKXWdg1hxSL8JW/p9prTMlbYbWe8hqFQ7/O658uuowek4vK6au7IVI3cm2St+b5ckUPTxnK7q8uhRbjqkHi85I3w/eyMB8tTYV93y1Gdd/vNatx7mTOK+sRNG5nEKHzySrVWDK77vx67aT+HjlYSzbly5bPudCblGFTOCpBwOYShQjCTqcXS+MIQaHAOfJQS2x4bn+qhPbJSluuzO5IQDg5RGOyxBc3iQeu6cOki0yqfWtSG8GRpoBsn3LLFB8w0zUCLIqUrFV4GJukf0b6YxF+8u9T3fW/7FlJi7klX0zy8gvkl18nX1rs23nugtJfq6Hz/zH/nuqzg8WT2aNLju+61XAXbFlLfQGMCVOamCKSqw4crasWFrtuf20OQ2dX1livwBqFbI7G9Fju0dvel/aZq2L/tivN6H1Sws1A9t3Fu/Ht4oA7fW/98naqVXTVlHyiy26XvfzOYWyIm1Xo6WsVoE9/2WpbmcrIJ69wrGA2B0WL0zG+MeO0uDZNtuzXnq7ooHK6Ra0WgW6vroUPV5fJguiF+0+ja/WpmLCvO04n+P4eXXZK0vQZ0aKri9j3sYApoJMu669w20x0gyMB7NBamVtbF1JNpOGtMbelwfLCn7L9lFadyPd0+oD5xy2AzwNYEr3rPyA9kkGxmLFdierWVc024UtT/KN+Gx2oWw47nknAczFvCJdH1zlr+uxIiPPvfS3lDSA6vzKEo++jdnea3pHiUlT6soA5uctJ9Dv7ZXY/V/pkF21/5qnfv4XGXnFuPmT9QAcM4Y2Wt1j8rboarLsQqyVgUnZfxYA8L+tjstgHDyTjQ+WH8KLv+12epxsL3XT6M0E5RVZdHVbpSvmjHGV/Xh36QEM/WA1Xncyz9HSvelIPef5yuT931mJL/7RnremIrnzpaEyMjDS1zD9Ule7EEI2J5Ozrqw9/2VVXOM0MICpILbh0VIx4foyMO5qrJjILjw0BBFh6nUxtsBJGgxla6Sc9XYhmUPLtjObSo+rTGMnRJV/vSZ3CVGa3vQV2wVLWp/w4+YT6PvWCvvfzqbM7/LqUjw2d7vLmZbLO7IKAM6pfLPSa77kYpuRV4w3Fu1zsrU6sz2AcXwuU37f7RDISQMLrayNfVi3k9NXYhUQQuAjlW/yVqvQ1cWgNpJFLfCU1ir9vuM/p11sarMcSzOlzkbPuDMqSMuSPWfQZvJCfL021eW2BcUW1YJt5QKxyi81rs7th8tLa44+dxFgfKwyjNsZaXb4XE4hXv5zj1uP9xZ3MjDORsl5q65I+v60XSde+XOvfc0vwEUg5YP5dRjAVCI9NTCeiDKb8NejvTHn3h7YOWWg033b7lLbookiENKdgTGVBUu2USTKb2TlnQrbnTlppLSKMyuTswvK8n3O5wT5fcd/bo9CqmwnFaOQrFbhdlBle6+pPe6rtalYtjcdn60+gvRLaWo9w6htQbSr9/qCXadVXweLcP48bEGK2rfSrHzH9520zbNXHMZ7S+WjUF76bZdkW8fjSS8ezibvU3bfunLwTLZsviAAuO+bzRACmPy782wPUJo1Uz5+yZ4zaPnCQnx7acVuwPH/wFX3jd7r+/ytJ3R3sby5cB9mphxyvSFKJ3FcoWPOHk//+zzpti0sseDZ+f9i4a7S4PzGj9eiw5TF9oxJeUg/R2xN+0Ixq7LyvS59T/tifkAGMJVIbw2MJ9rWiUWvZgmIDnc+DNV+XJXjt6gVjbt6NrL/rbuIV5KBqR6pnmkpbwDjbLbZwW2T7MXDSjkaxZmVZeKP2/HpqiOa9yvT6mpcFvFaBWavOIzFl+oCfE0I90fC2GqntBaIfPzH7Xj1r72495vNAOR1J1rHsgcwKidQ+n+yS2N2WIuLDIyQbKfU8eXFyJR0FQohcMfn8kkAf9x8QpZJ+WZd2cVeObIMkGddnAcwFlzM1df9uDn1Aq5+d5VsHa4NkhmjpYMFnFFmXP+8lP168deyoEw58krtOUpVC9O3VF+xRdiP54papk1NbmEJ7vpyE+76cpPbRdV6uZOBsZm7MQ1zN6Xhwe9K51zaemn9smV7yzc5YkGxBb/vKCuE18r4KN9S0i8P7gRkGXlFOHCm/F1ODGAqkTQDU56iyfKwdyGp3BdqCpFNXqc3AyOt6aihMhHZ0PZJbk2Kp8ZZW4xGg+ZFzNcZGLVaBjWeZpgAYM2hc3hj4T7c/+0Wj/fhTQLur60V5qQLCSibU+bfE5m46eN1mPS/nfb73tIozs4tLMHx83kO3xo/W31EtnhphkZhtsUqnGa37GshaQQKP0sWhzx+Ic+hAPxcTiE6v7IEe085fpCrrqSuUvej1pW073Q2Or+yBNMXuO7K+3tnadB7MiMf/2XkY8OR87K6sQ71HOeiUqNn5JO7XUiRZv1ferYck09DcORsDvad9vwCKX0+2YUuumjcKLCVZvQ8CWCkhbLS/5VIjZIBvd5YuE+WbdN6bZSBjTSAceeStnRvOq77qPzrYTGAqUSyGhgfnXl7F5LKu233yUzZXBV6AxhpIaoygLmnV2PMuu0yROr8NmXTuUGc7G+1wMhOQLNwNLcCMzBjezf22r6iwrXPj6uC78pa+0av0gyMe+fdbApBenaBy3oHANiomGdFq4Zr0e4z6DMjxaE+49W/9mKfZK6YTI01jixC4EVJt44WrdMvPQda3YiZ+cV4+Q/HGgy1rivpxcJ28XIWKH7iJPNnI72G9py+HDd/uh4bNNYxAkozSWrnS8/IpwLFe+KjlEO44s3lmhMhSjMwk3/bZZ+OQC1Ykl7MhRDo9/ZKDH5vteZr64q0bu2Zn/91uq07/33SzJnyI/hcTiFe/XOP0+VGpF98pV8cXa2b58rPW+Tzumhlx5TBeqGl7DzpiePO5RSixGL1St0ewACmUknT1p6MQvIGW+CiFvxHh5tk3wr0ZgWk3yyVgUZUuAkGgwHV3Pg2BQCTh7fFI/2a2f92trBkidWKYR1qq96n7Jv3plpuLHbpitMPIDfeKpU9C6sWdydqM5tC8MlK1xfcinBRYwRWdkGJrlFsWgHk/606gpd+2wUhhNO6FKFyCVT7Bizdhy2YKU/BrhBCdVqAlQfO2n9Xvo7TFuxDx5cXY8ke+erIejIw+UXyfX297hjSLuTjjYWOmaLX/96LI5LRRV+vO4bx329FelYB2k1e5LD9iYv59tdBeu6Ons/VPTRfSvq5kbL/rEf7UCM9T8rJIx+esxWf/XMU91/qJlUjfYh0gIKrBU3dpZV5dNaF5KpLcO+pLHR9dSlu+78NXjufDGAqgFbmQjrdvq9WRA1xkoF5ZWQ72YrF7iwlYKOcCt4WaVczu5eBMYUYZI+pqTIrsc3F3GKMu6oZ3r+lE0Z0qiO7LyO/4kYhOc0KuclZClhrZV813v4g85S77TCGGGTfeiuT1kg1rdoYG1vgodWFdDGvGN+sO4YNRy/IhtIrqXUnqwVF0oyO7QJQngDmiR934KctjjOqSo+tzKTZ6rle+0ueNdIzEkZr7ptzihl1hRCqdWObj13Egl3qdV4rD5zFw3NKu1ClF9WRs9bgqrdWuL3mkfKLj7NMlzvfGe77uiw4UT7M9n9+xMmwcOk75Xxu2XnT84Xh752nVNfyUlNssdrnt5FS/l9Lz3VRifqJyCooxlVvrcCQ91cDKM2glndRXRsGMF70w32Xo03tGMy9/3LV+6Nlw6h9lYHRvq9DvTgYje4HMNLaHmXRn+2D1t0+WpPRIPuGojXbMFDaf1+jWhhGdKrrcJxFu89oPKp8ejaNx4DWiV7bX6SbAZ6W/CJLueuNyuvAmWx8KylI1cPiwcglbzl5Ub0Lw9W8FvYaGBfNHv3ZBtz22QbN+0MMBodvpGoZGOlFynZBzfewxutkRj7+t811fVZhiVVXd2CGjq4aZReSjTIL5KxL1NnQc9v/unKbExfzNY+tRTmxYaGX1nbbcaIsKHb2ftcKnqVfPC/mlp03teeXV1Rif+02pV7Aw3O2ytbyku1X8bfFKvDID9sctlO+T6UBjVYGZu7G4ziqCMqYgamCkpvG4+/HrsBlDaqr3i+dyM5bL6C7nBXxAtCVgZHOcdMiMQrfje1h/1s5CsoWaesdUSBth7Q7q0Y15wFM2eMq5y39xvUdUL1aGP56tDeWPN5H12Na147RvC+ynH3YNjd+vM5lceQDfZp45VhaUs/nyYbP6nE2u0g1G1AZtGpopN9w1djOsqt1alzVKIWEGByyE9Ihxn/++x9+3nJCdpH6fsNxAI7dMkqNnv0L477fKutaPJNVgF6SpQec2Xc6Gy1fWOiw9o3yKTkLYGzDwws0Mmy2IeeH0nNQWGJxmr3TE0ypLuJaItz6zFXWzjnNwLiogjl+Pg+zUg45ZKmczcx8zYf/qNbvSL+ASoMsZRdlQbEFnV9egj6X1iPTm3mx0cqQqM16XfYYK05czHPoxr6Q6/g8vLWIJgOYSiS9iFf2eiU2tpigeWKUxv1l/yGhKsHAl3d3w4vXtMbUa9ti7bP9sPjxK9Gxfpz9/ihFJqHI4y6kEJh0ZoOGtk+y/+5JZb8nbFmOtnVi0Twx2um2bWrHICHKjB/u64GuDdWD2/KOIrA5mJ7j8oLZtGYUBrSu5ZXjecvSvRWTKSsPPatpA+VfKTj1XK7DBcj2f5NXVILx32/Dkz/tkK2pNccWwOjIDPz17ynZkhI7PJidWrr2DVA6qkrKWVftN+uO4WJuEQo0goCsgmIs3n0aA95ZiYe/24pija4IQN/6WmrBRpHF6lYXpd4upCNnc7DrZFmmTnrxXn/kPEbM/Ad9ZqRgxqL9mKyYQVlaZ6JWuHvsgmNXkvTzWdpGZQB05GwuCkusOJNViKyCYlk9o546OeXcTjbSQNVqFbLX45OVR9D7jRTZxHeA+vIrel5HPRjAVCLpgnm+CmBsKch3b+6EUZ3rOtwvzcCodTdd1bIWIsNMGNOzEerEOc7NogwgbAsqulvEa1RkYEwhBjw9uCX6taolK+h9fVR7vD6qvWw7NVqBg6fcqQ/6+PYu2PBcf8RFhqFdXfVhqeFeCmC0SOfqMYeGeHUixUDlKoApKrHCahVu11coHb+Qh0WKOXxsFyRpN5Z0VXOg9EKkd6Zp6UXL2Yg3Z5wt2qi1or1NYYlV8/E5BSX20WfL9qU7z8DoyKKoPb7IYnVa0K+8qCs/n7UyP/3eXql57Fs+XS/rMlJOiifNQhw445gh+S/DcXI66X+t9PkoAyzpd88OUxbj6fllI6n0BL1aC7/ukQz5L1EEMLaRfR8uPwTrpRmuAfX6KFcFv3oxgPERd0dpeOrLu7vhOpVApXZsBN69uZPD7UaNkUcP922KX8f1cvv4trStdLZeqWeHtFK9PdQYIgtGjCEGPNy3Gb64q5usNua2Hg0QJ5k8z6TR/m/Gdse2F692u/1atAKYRvGRsoxQaZvKgjGtTIu3upC0jOhY9h4wm4yyUWjRTrJji3V2j2lRziPy0ejLyrU/V3o3U5/Q0BOuLsoA8MgP2/DY3O3lPtYLv8qHa9suMjdIVrZWDjcusQocV/mWria/2ILzOYX4cVOarHbCHdLJ7pQuuqiBKSi2aAYwJVYhy7ba1rBSo+ebu1pXUVGJ1emcUMqspTJbo/fz+v2lB+3HU1KeI2kNjNqSIicuyrNcOYUlsq476SSdyhodZ/MXSQPzQ+nZ6DsjBVkeLABaYrVqZqaGfrAaoy/Vfan9H7GI10/d36cJGsZH4vou9SrleFe1rIV3JIGKq+JhrQzGEwNbopOkq0gv2xs1RGO/vZomYNEEx4tkaQam7O0pbZezmgOt9keGmTwaVQUA3RvVQN+WNWU1TFpDzI0hBnw0uoss4yOdW0fr22+96pEu26F3VlSlcVc1RcOEsv2bQ0PQKL5sKv3RlzfUfGyzmupdjXopuxQb1HD9PMujPImlCQOay/7WM7Lmr52nNNPt5ZFfZHG4ECvnZykotuCYzoUz84ssuO+bzXh6/r+65rZRo+w2kjqd6Xwq+/xii2pGwWbNobLZf+/6cpPmdnoCGLVtikqsTut0pBfUwhKL0y6ki7lF6PrqUjR69i+H/dhm+n1Fx/pK0gyMWveWMuP2xT9HZZkLZRdSenYBXv1zD46czXGaZXlzYdnEj8/9skv3ivVq7dcKYPadzsbaw+dRUGxRzRKyiNdPPTe0NVY+dZXHF6PycvUBL81uSAMFT2tL1N7gtvqLhCgzmidGoWWSYw1JqNEgC0akF0JnsbtaO22jhUJVgo6rWjqu2K307i2d8NXd3WGWZEm0Mj2248vqdyTb1lXpdgOAQe0ScU+vxnjnpo6a7Xj7Ru37nIkJD5VNomg2heDhq5rhtVHt8OcjvTWDPqA08CxPWVF0uAmTh7ex/12RPVdhpvJ1jT3aTxHAqKxnVFkKii1IcxIwlG5jRZrGCCqlDUcv2Kedr4gFTl0FcbmF+ubUccXTAOb3HSedBmDFlwKDVQfOou1LixwmAZR2IX25NtVh6LeSniJ2Z0W8gGMdztnsQlkXlbIL6bEftuOzf47i5k/XO13+YL5khmg9ExBqKbEIl9MltHpxIXarjOZjAEMeUX682y6KtouMUZbpKP/xpG/UG7rUQ2KMGW/f1AmHXx+Kf565yr5GknLmXWUNTIKk7sVZEVqo0fEC9vqodpr39XLS5fDWjR2x4LEr7EGHs0vjG9e3R0JUGN6+sZO9/fY2mcp+r6+RgQg3GfHS8Da47jLtzJx0jp0v7urqsPimlPK5JsWE2383m4yIjQjF6B4N0a5urMvgVCtY0yM2IhSD21VOkfXqp69ye3rIXs3iAQDjr2qGkBADHriybITW6azyL5DnqR0nMh3qK5RWHjiLTI1J+JRmaCy3UFm2Hc9ATmEJoswmNKvleVZPT/2G2kV1VsphTPxRva4DKKvVG/f9VpeTCDqrBXKHtAtJLQOTW2SRfdGtHhkqK3DOVmRg1l1aw+psdqHL85SyPx0WqyjXF4oSq9XjYlxvLUDrncknyG90URSzXt+lHq5um2j/hi7NHLgaHqpl7v2X45ZP1wMArrusrPbirRs7wmIV9ouYMaQso/HlXd3w8h977HNThBpDZOnSBMlEds4CK6PKyClb5kTt27nZSe3JdZ3ranZ9Kd3crQFu6lpfMtOxtAC5rE1ai1Lq6d4yGUPw04PJyC4oRr9WiWhdOwbJ09SHw0pT4gJAoiSAkRb0lrbP+XMMM4Z4/EEVGxEqy3y56sK8/rJ6SIgK0zUNvlKNamFufyBPvbYdjp3PRb9WpVnBSUNa+2xGYHepFVqGh4a4vRp1ZUi5VMDarVF1pOw/62Jrba4yPcfP53n0XrUFLVpvn/Hfb8X+V4cA8N5s19uOZ+Cbdam4vUdD1QAmv6hEViCeX2xBkWTq/n9PZNh//3W7fNI5V0HW3V9uwtVtEnEmy/VislpKLMLjzwVvjUJiABMklj9xJXaezJR9G7aRdi9IvyF7+n96eZN47Hl5EA6eyXEo4tT6Bh4XGYbhnerYAxhjiEFWbCitP3G3BkZ5wXa1vY0yeHF1cZQGSLIMjCQo1JpRWE8AYzQY0KlRDfvfershrUIgUTIRoLIrzVWGRS1zpVdcZJjsubka4n1tpzq4skVN3NO7MXq8vkx2X7TZpDlnC1D6Wro7QWRSbLhbGYHIMGOlzxhcLcwIY4hBV6FllDkUBcWeX5Q80axWlNP1ewBg7eHS7MDlTeLLFcBsu9QNpqXPjBRMkXRZ6nX0XC6unfmP5jkuLCnNNoQaDS4/Fxfs1Lcy9umsArz0226cvJgv69axyS20KLqZrLIMjLPgQ88K2srlINxVYhFur3lmU54ZpKXYhRQkmtSMwohOdV3WCIS6OV+AlsgwEzrWj3OrJkF6PFOIQbZGjXQ/zpplUrnYSmtQHh/QAkMkQZw7i6C5s36VtJbIoPG7rI06AhhlAbDeBTKFAOIlgZMyZtMK4m7qWs+t46gxm0Jk518awMSEm/B/d3aVbW8L8NTWmXJ1jgwG91cYczc4u7KF65opvTa/MEDXdvVrROIZjdF6Su5OV+CMclkQLdV1bgeUBjBarmjunRFky/alu95I4flfdrrMRnScuhjNnl+Az1wsOPrQnK1uHVsr25hXVCKbYyi/2LGwW8vLOoqIy6vEatU9V5KSt74EMIAhmb4ta6J17Rjc2KWeV2pg3CENTAwGA65oXnqxUH5AOsvAqM34Kw0aHhvQHLNv74KH+zbFbT0aoFGCdh1JeejterKRXuRtRc7VFEOu45xkXLo0rI7Lm9TQvN8YYsDk4W1wf58maKoYWaSWFdv78mC8cX0HAOUrvC2xCtlzU9YXKANO2xw/aoGeniDP3baqTdY47qqmmttX9+L6V9Ujw7D66atcbhceatQ90aGr7kB3vDqyna7t9Aa40WYT2tZRn406PDQE39zT3SvtX33wnNuP0TOSLL/YUqkrv+cWyY9XUGzBnzqzO5WRJSyxCmTrGKmnhhkYqhBmkxELHrsCM27s6HENjKeUnw3dG9fAb+N6YfkTfWW3921ZeoFXG9Fzbac6DnU+ap4e3Aqvj2pfYWuCu/tBLL3If3JHV2x6fgC6N5YHJDFOApicghLN5RqqX5on5+5ejfHc0NYOwYFa1ioizGjfTs/bQLq8xL9TBtp/N0AezEmzbEI4BhDuLpCpnHNHrYrhu7E98M8z6oGCWqD51KBW+P6+Hnh1ZDu8PKKt7L7WKiPmXLmrZyPV240hBtXJIJXyiyy6M4XenKCwlc7nqhwqr6Vb4xqa3ZWRYaWr1sdHeS9AdEdVrBs6pJhVe/XBc16rHfGGge+uQtoFz6YQ0NPFpQcDGNI0vGPpys7t6mqv4eNNal1WHevHOXzrfW1UO7wwrDV+fijZYfvwUCPmP9QTIxWrUmtx5/PenW2dZWC+Hdu9tFD1ji4Y3DYJt/VoINveGGJAzWgzRiomIHQ2giensEQ2QzEAvHNTR4zqXBc3uJhzqLzrR0WbTXhuaCu0SorGI/2aISY8FHdc3hC1os24sWt9AKXF3N0aVUeHenH2x6kFyM6eY2GJFXcmy+eseXlEO4QYyiawU3t469rRqFc9EjNuKM0oKed7UdOzaQJuv7wh7ri8IT6+/TJ0aVgd8+6/HL2ba3ch3dOrsertWpM1AvpGZe0/k40InVkOd+JmV0GRMrhK1FhQVW92yJYhvLaj4/+mLcuqzA76Wu3YcNcbVRK1KfldkY6qqwgrD3hWz+RsUkF3sIiXNDWtGYXNLwyotDlrpDPqOhMTHop7r3D+j+mt1Z2l3Plua3QS7VzRvCY2PT8ABoMBg9o6FlXbXNuxDqb+scfpvB1Xt0nEkj1nMKZnQ1QzmzB3U5r9vusuq+d0WLaNqxoGl7VQhtJFPBdKJiR8ZWQ7TL22rT0we+emTg4Pswr5/BpPDmzh9DAFxRa8dE0bfCNZ6TohyozdUwfb16ZSO+22rM6NXetjeMc6CA81on71SNnQfM2nZjBgcLvaGNyutv22Off2QDWzCSNnrZFte1WrmjhwJhv/HJJ3YYSHGrHhuf6yomRp7c/k4W0w9Y89SIoJVx263b1xDd1BgloVUKukaPs071J1q0c4Lb6NDDPhsf7N8f6y0tllm9eKVq0T0bvOma3+5Z2bOuL3HfJRM7Yarea1ouwFvxXth/sux5M/7XDaffTZmK4Y9sE/uvYXYvDO1BPeFBlasZd4PV1B0eEmh1oZZmCoUiREmVUngKsI3RpVx8N9m3o8YZuU3hlfpR/43RvXwBjFN3zZtm6kYFx9s9azL4PB4DCKS+nDWztj/kPJGNu7CW7qWh+DLwVE9/ZWzwaoGdg2CSM71dHs9npuWGunj9ca+eOqDkhAyD4Ax/dznhkpKLbIuiBsu48IM9qPpXYBl55r27xD13ep53FBbq9mCehUPw7f3NNddnt1JwG4dBi7wVAaeNrc3asxUqcPw/rn+su63yZe3QLjr2qGGTd00NWF1LxWlGoA92h/+Xl98MqmaFYrCu9KgkqtTMP9kpXLtbqU9ARXrZKi0ebSauwmY4hDpshWvO1OTVqTmtWQ8mRftHSxmKrUoLZl5z0mwuSy+8udhU/1znHUtGY1h+5hb1B7DfUWYleUMGMIdrw00OFzjDUwFHAMBgOeHtzKK8ss3NWzEUZ0qoP3b+nk4phlv/9w3+WaE825y93hvFoua+C8nic81IguDWvAGGJAqDEEH9/RBUenDcUL1+gfSmoMMeC9WzrjHo2g55oOdWTFpr+P74XbepTVvHhad2kV2sPKtbYHSueKAdQDHulpb5UUjbe8EAxr6dOiJt6TLNOh92LhLKElndIgIcqMJwe1RMP4arKpAGxzK0nP+9KJffD7+N6q7zvlshfPDmmFpROvlHUN39ilHt6+sSM+H9MVdeMiMP260gVSq5lNeP+WTvjg1s5ootK9M7R9kssMzPyHkktnfJa0o3asvHtqaPvSDJezc3h/nyaywuLQkBA0TqiGPx/t7fT4Nr+P7yXLSEabQxGhEXw9P7Q1VjzZF+GhRl11WV/e3U33/3xMRCh+fCAZwzqUZfVeGNYafSWzgjeK1/4cuvlSt6zNQ32b4of7Lsece3s4bKvnPensS5snZF+eDKVfZJTvQVdrZ+nFLiQKSOGhRrx/S2e3HhNi8F7g4a2k1f19mqCa2aRryQMbTws5nX2DlAZ2IQYDXh/VHqcy8pGy/yzGaBSpuiKEQPfGNfDiNW3QIlF/7cO069rjjuSGaK+ysrf09VuossaWt0nreNQyMJ6sH9azaTw2Hr0gyxZIOzCfH9oao3s0wOqD5/DepcUDm9UqzUKovfRqRdql25bdXlhitX9x6N86UbbdiE6lAdOmVPlaTHteHoSIUCO+WJNqv802V0+1MCNyL3UTRISaHIp3/+/Orrj503VoUzsG917RxJ7lkAZwUmGmEDw3tDQTaFv40va8Qo0hqBMbjv8urR308oi2qBsXgW/WHZPVaESEGmXvj6hwk8OFdfLwNhjVua6sO/vKFrXw4+ayeVoaJ1TD0XPyRTSvalnL4bPjlZHt8KJikU6g7JWUds3ee0UT3NClHjq9vOTSc2iHV/7cg4MqXXyPDmiOPi1qYtz3pcO1G9aIRHLTeNWuMFfd/5OGtPJ4jTg1n97RBQPbJjkMNdcKFMuLAQwFNVvRYERo6aibG7vWw9frUnFVS8e0cXLTePy85YTT1Ztt1GYE9kR4qBFj3egOKo9QF6mUW7rVx+msAntXwEeju2DHiQzZwpXusIrSi6i7zy/MFKIZGOgdEeMt0uGqkWFGWQDx5d3dVDNorjJW343tgcISq+xDv36NCBhDDIgMK80IxEeZZQsglu3bced6irT1DLvt2rA6HujTBJ+sOoKeTePtw6cvSmq0/nm2H37ecgID2yRi2AerkVVQorrkRZs6MdjwXH/7/52N2ki7RvGRmHmb4yrm0qCoa6Ma9rqaO5MbASgNxF75cw8+v3QxNZuMshlqq5mNsmU+EmPMuL5LPYcgqr+iC+mXh3viXE4hBrwjX51b+brGVwvDx7d3wYPfbXFoO+CYiZM+H1OIQTV7ckXzBNSNi5DNk2SbmiBcJRBxVlf4wrDWuKdXY2xLu+hw3729G6vOd9OlYXW8Pqo9rp+9FjmSSSXnP9QTsREmeyBtYzsnXRpW92h4uysMYCioRYQZsWvqIHv9R3R4KFY82Vc1i/HS8DZonFAN10hSv1rG92uGv3eewi3d6rvctqq4I7kRvlqbimtURokAwPRL88LYRIQZnU5M5oqrYfpvXN8e29My8MPGNKfbST0xqAV2n8qUDeuuSNJiROV7RhkEf3x7Fzz/y058eKvzzGBIiMHhG6vZZMS/kwfCGGKQLFeh8lg3MjDubmMwGDBpaGs8fnULWeZCOkowNiLUHpBufH4ASqzCXnekpDZ/jFrGYN4DybIaIhtpwD312rYwGQ0O3SvSC705NEQWwNjWBLNZ+2x/1SxkeKgR469qhpkphzD3/ssRFxmmGhgoa76izCbVYc+210/5/pdOqmgMMcgyeh/c2hkHz2Tj3t5NLm3rODmk2rIoWnNH9WlR0z4Qop0ik7luUj8cSs+xBzA3dKmHn7eUZqBMIQa0TIrG0PZJsqxUq6Ro1a5EW01av1a17NlCb2IAQ0FP+a1dqwsmJjwU465qpmufdeMisPXFqyt08UJvqxltxtYXry7X4o3ucDW46eZuDXBztwZuBTC1osPx5yNXlLNl+o26rC4+WH4QfXQUBA9ul4RBbRM97uJTXiDU9iMNFsymEBSWWNG6tvY0CK+ObIcfN6fh4b763tfKYwDArd3r4+TFfAxsm+h0Oz3UupCU3Tw20qCrerUw1ZFu0hmozaYQFCgCimcGt8KWYxdxd6/GTv9XnxjYAvdf2UTWvj4tamKVpItK+fjocBMyVIY+J10qtlW+/6VzIpmMIejcoDoWX5ruX23ouY1tdl71DEwoDAbHY3WTZE3NJiM+H9MVi3afxshOdVE7NkIWeM24oYM9gLEtu6FcqV1ZyD26RwPM2XAcTw5qCQCq78EBrWvhS81npQ8DGKIK4k/Bi01lBS/usF2Iq6KEKDM2vzBA8yKr5M2J5vo0r4kZi/bLahheG9UOd36+EeP7Nce1neqgsNjitA7i9ssb4vbLy1fEGRlmwkserD+kJiai7JI0rENtXN060WEeqB6Na2DD0Qu62i2dIsBsMjpMRNgwvhrWT+rv8nUxGAwOwdVbN3TAg99twS3dSrN9yu676HCTrJvly7u64dv1xzD50rmaOLAFlu1Lxz29GpU+XvJ5UaNaGK67rC7eWLgPdVzMRWPLwJiMIejdLAFHz+XibE4hkmLCERMeiiWPX4mRs9Ygr6gEr45sj1OZ+XjgSvls0/1bJ8pqnxrGV8OHt3ZGbEQoDAYDfnwgGb9uP2lf0uKKFglYuPu07PxIvTKiHe7u1RhNL3Ufhl4aeSYdZt65QRwDGCLyT3qHuj8xsAVe/3uffW2mqsZsqpgCRVfa14vFn4/0lg2fbVYrGmsn9bf/Xdk1QeUlHS4+unsD9GzmuD7S1/d0R+r5XF3Dp6VdPWGmEHRtVAP/d2dXNJYM1/Y0qKwVE47/PdzL/vdlDapj6d6yBRLNJvkSEFe1qoWrWpV1K7ZKisHuqYNkmaoZN3TAxbwie/vWPNvPPseRFmlQ8O3Y0qH9F/OKYQwxICTEgGa1orBr6iBYrcKtJU6GS7I+3RvXkA39vrlrfTz/i2OBso3tuFL/PNMPPacvt//tjWDev97dROT3/hjfG+8vO4hnh7TUtf29vZugd7Oabo1U8pV+rWph9cFziA6vnI9WZf2CliuaJ2D1wXMYpZjduaoxGAx44MomSD2Xix4a9VXhoUa0StI3O3iHerFoWycG8VFme0ZUOgePN02/vj1mpUQg9VwuakabUa96BOrGRWBw2yQ01BgWrexmu1FRw6O2XIrNXT0bYdHu07i1e9ljbEGB2tBvd9dnc8ZkDEHH+nHYkZah+zF14iKw7cWrMWHedpzLKcSITnXxSDnbYRDlWXLYS2bNmoUZM2bg9OnT6NixIz788EN0797d5eOysrIQGxuLzMxMxMRUznT3RERaSixWLNp9Bl0bVVctPPWVzPxipOxLx9VtEnXPnBsobJc4b3bfVRVCCJ89rxGz1tgDmNTpw9x+vDeu3z7v8J43bx4mTpyIyZMnY+vWrejYsSMGDRqE9HT3l0QnIvIlkzEEwzrUrlLBC1A6umdk57pBF7wApYFLIAYvgG+Dsocu1dEM0zEqs6L4PAPTo0cPdOvWDTNnzgQAWK1W1K9fH4888gieffZZp49lBoaIiMg3jp3PRd24CI+K//0+A1NUVIQtW7ZgwIAB9ttCQkIwYMAArFu3zoctIyIiImcaxlfz6chFn+YTz507B4vFgsREeVFVYmIi9u3b57B9YWEhCgvLVkPNzMwEUBrJERERkX+wXbfL0wnkVx2i06ZNw9SpUx1ur1/ff2Y7JSIiolLZ2dmIjdU3mk7JpwFMQkICjEYjzpw5I7v9zJkzSEpKcth+0qRJmDhxov3vjIwMNGzYEMePH/f4BFD5ZGVloX79+khLS2Mdko/wNfA9vga+x9fA99x5DYQQyM7ORp062rMMu+LTACYsLAxdunTBsmXLMHLkSAClRbzLli3D+PHjHbY3m80wm80Ot8fGxvIN62MxMTF8DXyMr4Hv8TXwPb4Gvqf3NShv4sHnXUgTJ07EmDFj0LVrV3Tv3h3vvfcecnNzcffdd/u6aURERFRF+TyAufnmm3H27Fm89NJLOH36NDp16oSFCxc6FPYSERER2fg8gAGA8ePHq3YZuWI2mzF58mTVbiWqHHwNfI+vge/xNfA9vga+V9mvgc8nsiMiIiJyl8+XEiAiIiJyFwMYIiIi8jsMYIiIiMjvMIAhIiIiv+PXAcysWbPQqFEjhIeHo0ePHti4caOvmxQQpk2bhm7duiE6Ohq1atXCyJEjsX//ftk2BQUFGDduHOLj4xEVFYXrr7/eYUbl48ePY9iwYYiMjEStWrXw1FNPoaSkpDKfSkCYPn06DAYDJkyYYL+N579ynDx5Erfffjvi4+MRERGB9u3bY/Pmzfb7hRB46aWXULt2bURERGDAgAE4ePCgbB8XLlzA6NGjERMTg7i4OIwdOxY5OTmV/VT8jsViwYsvvojGjRsjIiICTZs2xSuvvCJbO4fn3/tWrVqF4cOHo06dOjAYDPj1119l93vrnP/777+44oorEB4ejvr16+PNN990v7HCT82dO1eEhYWJL774QuzevVvcd999Ii4uTpw5c8bXTfN7gwYNEl9++aXYtWuX2L59uxg6dKho0KCByMnJsW/z4IMPivr164tly5aJzZs3i8svv1z07NnTfn9JSYlo166dGDBggNi2bZv4+++/RUJCgpg0aZIvnpLf2rhxo2jUqJHo0KGDeOyxx+y38/xXvAsXLoiGDRuKu+66S2zYsEEcOXJELFq0SBw6dMi+zfTp00VsbKz49ddfxY4dO8S1114rGjduLPLz8+3bDB48WHTs2FGsX79erF69WjRr1kzceuutvnhKfuW1114T8fHx4s8//xRHjx4VP/30k4iKihLvv/++fRuef+/7+++/xfPPPy/+97//CQDil19+kd3vjXOemZkpEhMTxejRo8WuXbvEDz/8ICIiIsQnn3ziVlv9NoDp3r27GDdunP1vi8Ui6tSpI6ZNm+bDVgWm9PR0AUCsXLlSCCFERkaGCA0NFT/99JN9m7179woAYt26dUKI0n+CkJAQcfr0afs2s2fPFjExMaKwsLByn4Cfys7OFs2bNxdLliwRV155pT2A4fmvHM8884zo3bu35v1Wq1UkJSWJGTNm2G/LyMgQZrNZ/PDDD0IIIfbs2SMAiE2bNtm3WbBggTAYDOLkyZMV1/gAMGzYMHHPPffIbrvuuuvE6NGjhRA8/5VBGcB465x/9NFHonr16rLPomeeeUa0bNnSrfb5ZRdSUVERtmzZggEDBthvCwkJwYABA7Bu3ToftiwwZWZmAgBq1KgBANiyZQuKi4tl579Vq1Zo0KCB/fyvW7cO7du3l82oPGjQIGRlZWH37t2V2Hr/NW7cOAwbNkx2ngGe/8ry+++/o2vXrrjxxhtRq1YtdO7cGf/3f/9nv//o0aM4ffq07HWIjY1Fjx49ZK9DXFwcunbtat9mwIABCAkJwYYNGyrvyfihnj17YtmyZThw4AAAYMeOHfjnn38wZMgQADz/vuCtc75u3Tr06dMHYWFh9m0GDRqE/fv34+LFi7rbUyVm4nXXuXPnYLFYHJYbSExMxL59+3zUqsBktVoxYcIE9OrVC+3atQMAnD59GmFhYYiLi5Ntm5iYiNOnT9u3UXt9bPeRc3PnzsXWrVuxadMmh/t4/ivHkSNHMHv2bEycOBHPPfccNm3ahEcffRRhYWEYM2aM/TyqnWfp61CrVi3Z/SaTCTVq1ODr4MKzzz6LrKwstGrVCkajERaLBa+99hpGjx4NADz/PuCtc3769Gk0btzYYR+2+6pXr66rPX4ZwFDlGTduHHbt2oV//vnH100JGmlpaXjsscewZMkShIeH+7o5QctqtaJr1654/fXXAQCdO3fGrl278PHHH2PMmDE+bl3g+/HHHzFnzhx8//33aNu2LbZv344JEyagTp06PP8EwE9HISUkJMBoNDqMujhz5gySkpJ81KrAM378ePz5559ISUlBvXr17LcnJSWhqKgIGRkZsu2l5z8pKUn19bHdR9q2bNmC9PR0XHbZZTCZTDCZTFi5ciU++OADmEwmJCYm8vxXgtq1a6NNmzay21q3bo3jx48DKDuPzj6HkpKSkJ6eLru/pKQEFy5c4OvgwlNPPYVnn30Wt9xyC9q3b4877rgDjz/+OKZNmwaA598XvHXOvfX55JcBTFhYGLp06YJly5bZb7NarVi2bBmSk5N92LLAIITA+PHj8csvv2D58uUOqb4uXbogNDRUdv7379+P48eP289/cnIydu7cKXsjL1myBDExMQ4XBZLr378/du7cie3bt9t/unbtitGjR9t/5/mveL169XKYPuDAgQNo2LAhAKBx48ZISkqSvQ5ZWVnYsGGD7HXIyMjAli1b7NssX74cVqsVPXr0qIRn4b/y8vIQEiK/RBmNRlitVgA8/77grXOenJyMVatWobi42L7NkiVL0LJlS93dRwD8exi12WwWX331ldizZ4+4//77RVxcnGzUBXnmoYceErGxsWLFihXi1KlT9p+8vDz7Ng8++KBo0KCBWL58udi8ebNITk4WycnJ9vttw3gHDhwotm/fLhYuXChq1qzJYbweko5CEoLnvzJs3LhRmEwm8dprr4mDBw+KOXPmiMjISPHdd9/Zt5k+fbqIi4sTv/32m/j333/FiBEjVIeUdu7cWWzYsEH8888/onnz5hzGq8OYMWNE3bp17cOo//e//4mEhATx9NNP27fh+fe+7OxssW3bNrFt2zYBQLzzzjti27Zt4tixY0II75zzjIwMkZiYKO644w6xa9cuMXfuXBEZGRk8w6iFEOLDDz8UDRo0EGFhYaJ79+5i/fr1vm5SQACg+vPll1/at8nPzxcPP/ywqF69uoiMjBSjRo0Sp06dku0nNTVVDBkyRERERIiEhATxxBNPiOLi4kp+NoFBGcDw/FeOP/74Q7Rr106YzWbRqlUr8emnn8rut1qt4sUXXxSJiYnCbDaL/v37i/3798u2OX/+vLj11ltFVFSUiImJEXfffbfIzs6uzKfhl7KyssRjjz0mGjRoIMLDw0WTJk3E888/Lxt6y/PvfSkpKaqf/2PGjBFCeO+c79ixQ/Tu3VuYzWZRt25dMX36dLfbahBCMq0hERERkR/wyxoYIiIiCm4MYIiIiMjvMIAhIiIiv8MAhoiIiPwOAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiDzSt29fTJgwoVKPOWXKFHTq1Knc+2nUqBHee++9cu+HiHyHAQxRkPNFIOKpJ598UrYOCxEFL5OvG0BEpFdUVBSioqJ83QwiqgKYgSEKYnfddRdWrlyJ999/HwaDAQaDAampqQCAlStXonv37jCbzahduzaeffZZlJSUaO7rr7/+QmxsLObMmQMASEtLw0033YS4uDjUqFEDI0aMsO/bduyRI0firbfeQu3atREfH49x48bJVqhVUnYh6dlHeno6hg8fjoiICDRu3NjePqmMjAzce++9qFmzJmJiYtCvXz/s2LEDAHD27FkkJSXh9ddft2+/du1ahIWFMRtE5EMMYIiC2Pvvv4/k5GTcd999OHXqFE6dOoX69evj5MmTGDp0KLp164YdO3Zg9uzZ+Pzzz/Hqq6+q7uf777/Hrbfeijlz5mD06NEoLi7GoEGDEB0djdWrV2PNmjWIiorC4MGDUVRUZH9cSkoKDh8+jJSUFHz99df46quv8NVXX7n1HFzt46677kJaWhpSUlLw888/46OPPkJ6erpsHzfeeCPS09OxYMECbNmyBZdddhn69++PCxcuoGbNmvjiiy8wZcoUbN68GdnZ2bjjjjswfvx49O/f3622EpEXebhgJREFCOVK10II8dxzz4mWLVsKq9Vqv23WrFkiKipKWCwW2eNmzpwpYmNjxYoVK+zbfvvttw6PLywsFBEREWLRokVCCCHGjBkjGjZsKEpKSuzb3HjjjeLmm2/WbOvkyZNFx44d7X+72sf+/fsFALFx40b7/Xv37hUAxLvvviuEEGL16tUiJiZGFBQUyI7VtGlT8cknn9j/fvjhh0WLFi3EbbfdJtq3b++wPRFVLtbAEJGDvXv3Ijk5GQaDwX5br169kJOTgxMnTqBBgwYAgJ9//hnp6elYs2YNunXrZt92x44dOHToEKKjo2X7LSgowOHDh+1/t23bFkaj0f537dq1sXPnTrfa6mwfe/fuhclkQpcuXez3t2rVCnFxcbK25uTkID4+Xrbf/Px8WVvfeusttGvXDj/99BO2bNkCs9nsVjuJyLsYwBCRxzp37oytW7fiiy++QNeuXe0BT05ODrp06aJab1KzZk3776GhobL7DAYDrFarW20o7z5ycnJQu3ZtrFixwuE+aaBz+PBh/Pfff7BarUhNTUX79u3daicReRcDGKIgFxYWBovFIrutdevWmD9/PoQQ9qBkzZo1iI6ORr169ezbNW3aFG+//Tb69u0Lo9GImTNnAgAuu+wyzJs3D7Vq1UJMTEzlPRmFVq1aoaSkBFu2bLFniPbv34+MjAz7NpdddhlOnz4Nk8mERo0aqe6nqKgIt99+O26++Wa0bNkS9957L3bu3IlatWpVwrMgIjUs4iUKco0aNcKGDRuQmpqKc+fOwWq14uGHH0ZaWhoeeeQR7Nu3D7/99hsmT56MiRMnIiRE/rHRokULpKSkYP78+fb5ZEaPHo2EhASMGDECq1evxtGjR7FixQo8+uijOHHiRKU9t5YtW2Lw4MF44IEHsGHDBmzZsgX33nsvIiIi7NsMGDAAycnJGDlyJBYvXozU1FSsXbsWzz//PDZv3gwAeP7555GZmYkPPvgAzzzzDFq0aIF77rmn0p4HETliAEMU5J588kkYjUa0adMGNWvWxPHjx1G3bl38/fff2LhxIzp27IgHH3wQY8eOxQsvvKC6j5YtW2L58uX44Ycf8MQTTyAyMhKrVq1CgwYNcN1116F169YYO3YsCgoKKj0j8+WXX6JOnTq48sorcd111+H++++XZU4MBgP+/vtv9OnTB3fffTdatGiBW265BceOHUNiYiJWrFiB9957D99++y1iYmIQEhKCb7/9FqtXr8bs2bMr9bkQURmDEEL4uhFERERE7mAGhoiIiPwOAxgiIiLyOwxgiIiIyO8wgCEiIiK/wwCGiIiI/A4DGCIiIvI7DGCIiIjI7zCAISIiIr/DAIaIiIj8DgMYIiIi8jsMYIiIiMjvMIAhIiIiv/P/SYUaY3KN/w0AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import pyplot as plt\n", + "loss_per_token = model.loss_per_token(x,y)\n", + "loss = model.loss(x,y)\n", + "loss_per_token.mean().item(), loss.item()\n", + "# print(loss_per_token.shape)\n", + "\n", + "# plot by points\n", + "plt.plot(loss_per_token.mean(dim=0).detach().cpu().numpy())\n", + "# x-axis 0 ~ 1024\n", + "plt.xlim(0, 1024)\n", + "plt.ylim(0, 5)\n", + "plt.xlabel('token index')\n", + "plt.ylabel('loss')\n", + "plt.title('loss per token')" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of parameters: 101441\n", + "steps: 0 loss: 4.318515300750732\n", + "steps: 100 loss: 2.431689500808716\n", + "steps: 200 loss: 2.423933744430542\n", + "steps: 299 loss: 2.4220685958862305\n", + "validation loss: 2.4799563884735107\n" + ] + } + ], + "source": [ + "model = BigramLanguageModel(vocab_size, d_model).cuda()\n", + "optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)\n", + "batch_size = 32\n", + "context_length = 1024\n", + "iterations = 300\n", + "x, y = get_batch(batch_size, context_length, 'train')\n", + "\n", + "for steps in range(iterations):\n", + " # print(x[0], y[0])\n", + " x, y = x.cuda(), y.cuda()\n", + " loss = model.loss(x, y)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " if steps % 100 == 0:\n", + " print('steps:', steps, 'loss:', loss.item())\n", + "print('steps:', steps, 'loss:', loss.item())\n", + "with torch.no_grad():\n", + " val_x, val_y = get_batch(1, context_length, 'val')\n", + " val_x, val_y = val_x.cuda(), val_y.cuda()\n", + " loss = model.loss(val_x, val_y)\n", + " print('validation loss:', loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'loss per token')" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAHHCAYAAAChjmJTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB31UlEQVR4nO3dd3gU5doG8Ht300kjBBJC771JkyZIERQRxWNBVOwNVA7HhuUInwXsjWIXj4ooNhSl9957b4FQQ0sjpO77/RF2mZmd2ZndzGYzcP+uy0uyOzv77tRnnrfZhBACRERERBZiD3YBiIiIiHzFAIaIiIgshwEMERERWQ4DGCIiIrIcBjBERERkOQxgiIiIyHIYwBAREZHlMIAhIiIiy2EAQ0RERJbDAIbIoiZPngybzYbU1NRgF+WyM3r0aNhsNpw+fTrYRSEiDQxgiOiykpubi9GjR2PRokXBLgoRBRADGCK6rOTm5mLMmDEMYIgucwxgiMhy8vLy4HQ6g10MIgoiBjBEl5mJEyeiWbNmCA8PR0pKCoYNG4aMjAzZMnv37sWtt96K5ORkREREoHr16rjzzjuRmZnpXmbu3Lno2rUr4uPjER0djUaNGuHFF1/U/X6bzYbhw4fjhx9+QKNGjRAREYG2bdtiyZIlHssePXoUDzzwAJKSkhAeHo5mzZrh66+/li2zaNEi2Gw2TJ06FS+//DKqVauGqKgoZGVleawvNTUVlStXBgCMGTMGNpsNNpsNo0ePdi+zYMECdOvWDRUqVEB8fDwGDhyInTt36v6uQ4cOoX79+mjevDlOnjwJAMjIyMCIESNQo0YNhIeHo379+njrrbdkwVVqaipsNhveffddfP7556hXrx7Cw8PRvn17rF27Vvd7iUhdSLALQETmGT16NMaMGYPevXvj8ccfx+7duzFp0iSsXbsWy5cvR2hoKAoKCtC3b1/k5+fjySefRHJyMo4ePYoZM2YgIyMDcXFx2L59O2688Ua0bNkS//d//4fw8HDs27cPy5cvN1SOxYsX46effsJTTz2F8PBwTJw4Ef369cOaNWvQvHlzAMDJkydx9dVXuwOeypUrY+bMmXjwwQeRlZWFESNGyNb52muvISwsDM888wzy8/MRFhbm8b2VK1fGpEmT8Pjjj+OWW27BoEGDAAAtW7YEAMybNw/XX3896tati9GjR+PChQv45JNP0KVLF2zYsAG1a9dW/T379+9Hz549kZCQgLlz5yIxMRG5ubno3r07jh49ikcffRQ1a9bEihUrMGrUKBw/fhwffvihbB1TpkxBdnY2Hn30UdhsNrz99tsYNGgQDhw4gNDQUEPblYgkBBFZ0jfffCMAiIMHDwohhEhPTxdhYWHiuuuuE8XFxe7lxo8fLwCIr7/+WgghxMaNGwUAMW3aNM11f/DBBwKAOHXqlM/lAiAAiHXr1rlfO3TokIiIiBC33HKL+7UHH3xQVK1aVZw+fVr2+TvvvFPExcWJ3NxcIYQQCxcuFABE3bp13a95c+rUKQFAvPrqqx7vtW7dWlSpUkWcOXPG/drmzZuF3W4X9957r/u1V1991f37d+7cKVJSUkT79u3F2bNn3cu89tprokKFCmLPnj2y73jhhReEw+EQhw8fFkIIcfDgQQFAVKpUSfb56dOnCwDir7/+0v1NROSJVUhEl4l58+ahoKAAI0aMgN1+6dR++OGHERsbi7///hsAEBcXBwCYPXs2cnNzVdcVHx8PAJg+fbpfbU06deqEtm3buv+uWbMmBg4ciNmzZ6O4uBhCCPz6668YMGAAhBA4ffq0+7++ffsiMzMTGzZskK1z6NChiIyM9LksLsePH8emTZtw3333ISEhwf16y5Yt0adPH/zzzz8en9m2bRu6d++O2rVrY968eahYsaL7vWnTpqFbt26oWLGirPy9e/dGcXGxR5XZHXfcIft8t27dAAAHDhzw+zcRXckYwBBdJg4dOgQAaNSokez1sLAw1K1b1/1+nTp1MHLkSHz55ZdITExE3759MWHCBFn7lzvuuANdunTBQw89hKSkJNx55534+eefDQczDRo08HitYcOGyM3NxalTp3Dq1ClkZGTg888/R+XKlWX/3X///QCA9PR02efr1KljfGOo0No+ANCkSROcPn0a58+fl70+YMAAxMTEYPbs2YiNjZW9t3fvXsyaNcuj/L1791Ytf82aNWV/u4KZc+fOlep3EV2p2AaG6Ar03nvv4b777sP06dMxZ84cPPXUUxg7dixWrVqF6tWrIzIyEkuWLMHChQvx999/Y9asWfjpp5/Qs2dPzJkzBw6Ho1Tf7wqE7r77bgwdOlR1GVe7FZfSZF/8deutt+Lbb7/FDz/8gEcffVT2ntPpRJ8+ffDcc8+pfrZhw4ayv7W2mRDCnMISXWEYwBBdJmrVqgUA2L17N+rWret+vaCgAAcPHnRnBlxatGiBFi1a4OWXX8aKFSvQpUsXfPrpp3j99dcBAHa7Hb169UKvXr3w/vvv480338RLL72EhQsXeqxLae/evR6v7dmzB1FRUe5eQjExMSguLtZdl69sNpvq69Lto7Rr1y4kJiaiQoUKstffeecdhISE4IknnkBMTAzuuusu93v16tVDTk6O6eUnImNYhUR0mejduzfCwsLw8ccfy57qv/rqK2RmZqJ///4AgKysLBQVFck+26JFC9jtduTn5wMAzp4967H+1q1bA4B7GW9Wrlwpa8OSlpaG6dOn47rrroPD4YDD4cCtt96KX3/9Fdu2bfP4/KlTp/R/sIaoqCgA8Og6XrVqVbRu3Rrffvut7L1t27Zhzpw5uOGGGzzWZbPZ8Pnnn+Nf//oXhg4dij///NP93u23346VK1di9uzZHp/LyMjw2MZEZC5mYIguE5UrV8aoUaMwZswY9OvXDzfddBN2796NiRMnon379rj77rsBlIyDMnz4cNx2221o2LAhioqK8N1337mDCgD4v//7PyxZsgT9+/dHrVq1kJ6ejokTJ6J69ero2rWrblmaN2+Ovn37yrpRAyVjs7iMGzcOCxcuRMeOHfHwww+jadOmOHv2LDZs2IB58+apBlFGREZGomnTpvjpp5/QsGFDJCQkoHnz5mjevDneeecdXH/99ejUqRMefPBBdzfquLg42VgxUna7Hd9//z1uvvlm3H777fjnn3/Qs2dPPPvss/jzzz9x44034r777kPbtm1x/vx5bN26Fb/88gtSU1ORmJjo128gIgOC2wmKiPyl7EbtMn78eNG4cWMRGhoqkpKSxOOPPy7OnTvnfv/AgQPigQceEPXq1RMREREiISFBXHvttWLevHnuZebPny8GDhwoUlJSRFhYmEhJSRGDBw/26DKsBoAYNmyY+P7770WDBg1EeHi4aNOmjVi4cKHHsidPnhTDhg0TNWrUEKGhoSI5OVn06tVLfP755+5lXN2ovXX7VlqxYoVo27atCAsL8+hSPW/ePNGlSxcRGRkpYmNjxYABA8SOHTtkn5d2o3bJzc0V3bt3F9HR0WLVqlVCCCGys7PFqFGjRP369UVYWJhITEwUnTt3Fu+++64oKCgQQlzqRv3OO++obiu17t5EpM8mBFuQEZF5bDYbhg0bhvHjxwe7KER0GWMbGCIiIrIcBjBERERkOQxgiIiIyHKCGsCMHj3aPVus67/GjRsHs0hEVEpCCLZ/IaKAC3o36mbNmmHevHnuv0NCgl4kIiIiKueCHi2EhIQgOTk52MUgIiIiCwl6ALN3716kpKQgIiICnTp1wtixYz0mPXPJz8+XjQLqdDpx9uxZVKpUSXP4cCIiIipfhBDIzs5GSkoK7Hb/WrMEdRyYmTNnIicnB40aNcLx48cxZswYHD16FNu2bUNMTIzH8qNHj5aN5ElERETWlZaWhurVq/v12XI1kF1GRgZq1aqF999/Hw8++KDH+8oMTGZmJmrWrIm0tDSPqe6JiIiofMrKykKNGjWQkZGBuLg4v9YR9Cokqfj4eDRs2BD79u1TfT88PBzh4eEer8fGxjKAISIispjSNP8oV+PA5OTkYP/+/ahatWqwi0JERETlWFADmGeeeQaLFy9GamoqVqxYgVtuuQUOhwODBw8OZrGIiIionAtqFdKRI0cwePBgnDlzBpUrV0bXrl2xatUqVK5cOZjFIiIionIuqAHM1KlTg/n1REREZFHlqg0MERERkREMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVlOuQlgxo0bB5vNhhEjRgS7KERERFTOlYsAZu3atfjss8/QsmXLYBeFiIiILCDoAUxOTg6GDBmCL774AhUrVgx2cYiIiMgCgh7ADBs2DP3790fv3r11l83Pz0dWVpbsPyIiIrryhATzy6dOnYoNGzZg7dq1hpYfO3YsxowZE+BSERERUXkXtAxMWloann76afzwww+IiIgw9JlRo0YhMzPT/V9aWlqAS0lERETlkU0IIYLxxX/88QduueUWOBwO92vFxcWw2Wyw2+3Iz8+XvacmKysLcXFxyMzMRGxsbKCLTERERCYw4/4dtCqkXr16YevWrbLX7r//fjRu3BjPP/+8bvBCREREV66gBTAxMTFo3ry57LUKFSqgUqVKHq8TERERSQW9FxIRERGRr4LaC0lp0aJFwS4CERERWQAzMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCuUPlFxcEuAhERkd8YwFyBVu4/g0Yvz8L4BXuDXRQiIiK/MIC5Ar30+1YAwLtz9gS5JERERP5hAENERESWwwCGiIiILIcBzBVIBLsAREREpRTUAGbSpElo2bIlYmNjERsbi06dOmHmzJnBLBIRERFZQFADmOrVq2PcuHFYv3491q1bh549e2LgwIHYvn17MItFRERE5VxQA5gBAwbghhtuQIMGDdCwYUO88cYbiI6OxqpVq4JZLCIispCjGRew5uDZYBeDylhIsAvgUlxcjGnTpuH8+fPo1KmT6jL5+fnIz893/52VlVVWxSMiumJ9Mn8vKoSH4IGudYJdFFVdxi0AAPw1vCtaVI9TXebPzceQEheBdrUTZK+fzy9CVJgDNpst4OUkcwW9Ee/WrVsRHR2N8PBwPPbYY/j999/RtGlT1WXHjh2LuLg49381atQo49IS0eXuZFYe1h8y/jSfdjYXuQVFASxRcB3NuID35u7B/83YgWJn+e4CsDHtnOrrO45l4akfN+Jfn66Uvb45LQPNXp2NF37dWhbFI5MFPYBp1KgRNm3ahNWrV+Pxxx/H0KFDsWPHDtVlR40ahczMTPd/aWlpZVxaIrrcdXxzPm6dtBKb0jJ0l919Ihvd3l6I7u8sCni5guWCJDgTonwHMFoOnz2v+vonF0cj/2kd7yVWFPQqpLCwMNSvXx8A0LZtW6xduxYfffQRPvvsM49lw8PDER4eXtZFJKIr0LrUs2hdI97rMvN2ngQAnMrO97rc5aKcJ2A0acVdFo3H6KKgZ2CUnE6nrJ0LEVF5ZdWMhL+cFv29WqW25q8hl6BmYEaNGoXrr78eNWvWRHZ2NqZMmYJFixZh9uzZfq8zO68QMRGhJpaSiMqj3zYcQVJsBLrUTwxaGayakXDJyS9CVKgDdruxBqxWDWC0yn2lBaCXm6BmYNLT03HvvfeiUaNG6NWrF9auXYvZs2ejT58+fq1vyurDaDF6Dr5dkWpuQckvGbkFuGXicny+ZH+wixIUFwqK8fqMHezeGQC7T2Rj5M+bMeTL1UEth5Xvf2lnc9H81dkY+s0aw5+xesCmdJn9nCtOUDMwX331lanre/HiLMuv/rkdQzvXNnXd5LvXZuzExsMZ2Hg4A49cUy/YxSlzkxbtw5fLDuLLZQeROq5/QL6j2Clw4FQO6leJvqK6gR7LvBDsIljetIsNV5fuPW34M2ZmYNKz87D9WBZ6NKwc8GOXbWAuT+WuDUxZKU3q8Nz5Aq+f/2H1Iczcetzv9V8u5u86GewiBNX+0+o9H8z0zLTN6PPBEny9PDXg31WeTFoY+KyekZuqCPAz/Ji/tmPYDxsCUtVR5Ec6RTjN+/4e7yzC/d+sxZ+bj5m2Ts1ARWt5076ZguGKDGDe+HsHrnlnITJzC33+7LrUs2jz2lwMm7JB9f1DZ87jpd+34fEf1N+/kmRe8H37XlbK4Or4+8ajAIAJC/cF/svKiX3p2ViTGvhqOSM5gUA/wX+zPBV/bz2OXSeyTV+3P2O6mJmByS0oBgAs2n3KtHVq0QoA2QbG2q7IAOaLpQeRdvYCvl99yOfPfrr4AADgn60nVN8/e77A/e/yPuiTlRQWm/joR5aWeaH8DBoXyDNcev0IRONZf65PxeX8hm+kJsop+d3l/OcExZmcfJzJsUZP4Ms2gDl8JhefzN/rV5bFG4ON9QEABUW86Zrh2Wmb0WL0bJzMygt2UcqtK6f1iyfpU/TJrDwUFTux9Ugmft94pCy+PGCrLnJeun44fLnwGGQ0GJEuZtVeSFq/IdBVgFZTUORE29fnoe3r8ww/NAohgvaAGfSB7ALlts9W4GRWPnYcz8Kku9uqLmP3o+GYL5/JKyxGZJjD5+9QsyktA7USolCxQpgp6ysLNpjzhDptfcmN6PtVh/Cf6xqZsMaywYtj2RCi5Ml7y5EM3DR+OTrUTnBXMSXFRqBzvURM33QU2XlFuPvqWobXa+hpXmMX5+QXITo8BMVOgc1HMtC0aizCHHbN7spCCOQWFKNC+KVLclHxpZX7c63SYzQDIw10ynv8ot0GRprN0l/+SpVx4VINQtaFQlSK1h84dsiXq7HrRDaWPX8tosLKNqS4bAOYk1klKTBv9av+PNToXUekF4V8kzIwK/adxl1frkZsRAi2jO5ryjqtoNgpZFVyVsOLY9koFgJ22PDT2pJeNdL2MfvTc9CpbiU8PXUTAKBn4ypIiY/UXJevbSLUgtSV+89g8Ber8GDXOqgQ5sDHC0raJzVOjsENLaqiYlQo7ulUW/aZp6ZuwuztJzD339egVqUKAOSNbAMRwBhtxCtJBAUkA7MvPcf0dSppZmDK+Bz9ZvlBfLXsIN6/vTU61EnQ/0AZc0iOM6MZuhX7zwAAVh04g56NkwJSLi2XbRWSS15RseZ7/lwT1C4kQgg8OHkt7vx8pSxoyffy3b5YsCsdAJCVV37q/n21Yp/xrpouD327Fu3fmOf+22rVJL5eHHedyMIHc/fgfL58PweroeH4BXvxr0krcKHAnOM4UFwPDWrns91ukz1UZOhUKUvv6f424h07cycA4KtlB/HZkgPu13edyMb7c/fglenbPfbpX5uPoaDIiV83HHW/ViRJywegBknWFgQAXp+xAz3fXYTsPPk2kt7wvcU8/h6nW49mYvWBM359Vvm9Wtf0XMkxbKQK6XjmBYz9ZyeOnMv1u1xqJizcjyPnLuD2z1bqLxwE0vubr7vTFoQr9GUfwHjbCX491ah8JL/Iifm70rHqwFnsOZkte90MIQ7r76a7vlyNxXt8622w0KTeCUczLmCZD2NdmEF5czCi34dL8dH8vXhvzh73a//31w5c++4iZOV5v/EGYhiNd+fswbpD5/DT2sPmr9yLL5YcwK2TViAn31jA7jrH1S6gdptN9iSpV20ifV/ZjfpoxgW8M3uXblssadWP1n6RFiOv8NLNtWpchOGylpYyA/PlsoM4cPq8O5PlIsteaJTp4Onz6DR2Ab5adtCvsvxTimEnpOVTu96fzy/Cy39sc/9tZLM+OHkdPltyAPd9s9bvcqk5baBxrNMpsPtENpxOgWnr0jBvR9kMR7Hh8Dn0en/xpXKUIhs5Y8sxdHhjnntWdyEEpm86ir0nze1NZ/07Yyn4M3iSXtCTVyjJwEj+fTonH33eX4zPFvs+fkWowzq5h6Jip+aT2HI/sjBm6DJuAe7+ajVWKZ7yVuw/jV7vLcKqA2cwa9uJUj0FSp3Kzkf7N+Zh1nb1nmp6Nh/JcP/76+UHkXomFz+tCd5suecNZmAOnTmPn9em+RW8Sb3xz06sP3QOXy01djN0BShqWQq7TX5TkzaMVePton3Pl6sxYeF+PPK/de7X1JaWrkPreiFt9Jh29tJTfrSkDUyhrBeS12L7RWs/KQMbeQZG/TOvz9iBE1l5eG3GDr/KUpqB7KQB6vkCz6B3y5FM2d9GqpB2HM8CYH71lpGf+e6c3ej74RI8+eNGPPvLFjwkOd4C6Z4vV5vWi3b4lI1Iz87HQ9+WlH3+znQ8PXUT+nywpNTllLqiAxgjadnCYiee/HEjpqw+bOgz0qcpaRXShIX7sDc9B2Nn7vK5nIHogRAIuQVF6P7OItw/Wf2pJdhjLqxTjB1y1xersf/Uedz5+So89v163PH5KlO+5/Ml+3GmFG131G4SZlVH+sNoQHLrpJV47tcteGbaZo9qCH+cOW+sK+elKiSNDIyk/L5kYJQOXByYcLPkhqh2SEsDAIfGHUv6PccyL2V0pAGWtAqptOeOWm9MrTYOyuPPSBVSYSkjrNK08ZGW7+1Zu/H2LPk1Vnn5lA7GVx6bqU1cVPKQ+3cZD4aqfFCRZhKNUMuAunriSh/KzHSFBzD6J80/W4/jr83H8OLvWzF5+UHVWj7pCSRtcyOtQipNN7PQclKFlJ6dh42Hz2m+v2j3KRzNuFAmA1OVZ4U+nvhKavcV/XUGLsg12tjTlR7/beNR9Ptwaam/93y+Z9CWeaEQv6yXd4/2FmD5XIUkWVaZsVOjFljI2jBp7BbpzaFQdp249HqRSRmYr5cdRKv/85wjTmu/Kn+SkQxMaY++0lSBKovkCgBclD2/ZIGbyRFMZm4hPlu8H8dNnuoiUA9/TqfAutSzqtW1etlKwHi5pIvpVYf7onzcGYPk1T+3617QpNVAo//aoRr0SFchXV761FyaJ4xA9EBwWbb3tOFGmrd9uhK3TFyhWdWizDoYSQv/d/o2jPptq6HvV9qcloFbJ63A+kPaQVUwqLV9WrDrJM4ZzMqoHZFGLiaB4k8q+WhG6S/gysbMADB8ygb8uEbeJsd1U1VvxCsPcKQ3r/yiYtw6aYW70S0gX3bmthPYdlRe/aAk3TIXCooxY8sxHJdkVDSrkKSZFsl3Sh90pEFOaXr//N/Fap1X/9wue71YIyhW3pTk3Y4lZXIKd9BqJEl8+EyuZhsQ6ccLi51Ysc/4dUlv2yjLFshxYJ79ZTPGztyFwRrZXH+v5EIEJoiZtj4N//p0JYZ+7Tmhp5EHF6PXBul27vnuYi9L+uayDWDCQi79tPTsPMzadgLFTuFxEOil6aIjFD3NVY5AWQZGWoVUKO1F4N+heyo7H29JUqJFJg8YdPdXq/Hkj8amPTh0pqSufrLGbN9ncy7doNVONuVL2XmF+N/KQ/hxzWGkGxmkTrENb/9sJdYfOodbJ60AUPL0M3fHScVNoOxv/GrZtgcmr8MtE5cb+rzatvM1nWsmf+bMkfL3wqvWnkFt4sFiIVBQ5MQ3KvNBeatCmrn1BNYfOofPFl/qKaT8rduP6QQwkt/W98MlGD5lo+x9repfrTIVyTIwl44jXwIYIQR2HMvSHUhTGsxJzxPl7nZqZIIe/2E92r0+D2tTz+o+rJzOycc17yxEu9fnqb4v/fgHc/fgri9X4/Ef1ntdp4veTVRZtkB2o164u6THaOoZc3sv/W9lKjq8OR+7TZ5SwjUVidpDoPKak3r6vMdDhSyb5eUQkC5mpCGzUZdtABMhCWCu/3ApHvt+Pb5dkepxsOsNmawciE6tnk9apypvA1P6AObzJfJ0qFk9m6Tm7Uz3afnNaRmqr5+VtFkocgrdpw3pnjBSh+5a33+nb8OQL1d5bIu7v1qNh/+3Dle/OR9d31qA1NPnZe1aDE3OZ8IVTStoMnpR868KKXBK8/S/7Wgm2r8xD1PX+N6TKdfoE7gTHtVKLhsPZ6CdpCu+9KJcoLKflNVRWl1DtxzJwKEz52X76vBZz/2rNY6RLMh2qlc1S8vqyy6YsuYwbvh4KR7/3nsAIP2tBcXawZJ0k0ivn7O3l/SO+XLpAY+tVFTsxJqDZ93Xwz06vU+k10dXVZfRqmi9S4fy2mtGDVJRsRMTF+3DW7N24ZgJ2UY9o//agVPZ+Rj12xYAJfvOjLnmqsREaL4nDeZ3nchCj3cXodd78uyJ0cRwoK5el20AIx0zxVW1MW/nSZUW9r6tV/pAdeBUzsV1SDMw8iqkqWsOo/f7i2UH+ZYjGfhtwxFDN8szOfILYCACGF/lFhZj+b7THtG4NHXua5sfXwKH/608hOX7PKuxtl5M9585X4Aj5y5gzF/bfa5eMuOJTO3G6FMZVE53vSokmw04ci4XP69LM31Yb+mN9PtVh3zqKvvMtM04nVOAF/yoJlSrQlLjFEIWPEtNXpEq26fSfaMWmng0bNWIeW8avxzd31nk94VZMwOj8W+nEHhvzm58aqAX49cX98/8XZ4PJhsOn8NTP27E8cwLsvWnnr4UfCmvibJqI5UTpNgpPKrv3pu7B7d/thIjLg4gqLehStMLSe/aoVyzGXNMTVlzGG/P2o1Ji/aj29sLJWXxa3U+e+DbtWg1Zg72nypdL6nKMdoj7UofxGZtK+lReUKRKdcb7O58QTEyLxRi+7GsUpRS22UbwKhxCuGRgRFC4EJBMTJy1Z+UlPXE0mi+58VoVFYPrsjAvPDbVuxLz5F1qb1p/HKM/Hkzdh7XTweePq8MYLw/lRZenAfGW8PG0mYZMnILMeTL1XhM8oTndAqslfTyKSzSr12W9QYwWCRfyu5P1YcZk9UVFGmvIzO3EJ8v2Y8TmdpVZmqxSmGxwLajmejz/mLNcSH6vL8Ez/2yBV8sPaD6vr+KLxaooMiJl//Yhtdm7DBW5Qffgzl/JtordgrDQ5hLgzu1m6by+qCbRfTzcNFqrFukUf15+GwuPlmwD+Nm7vK5ykRq0MQV+HPzMfzn582y9dzw8aVG18oMot7Q+yXll3+nqwu867qnt5nSs/Jw1xerMGvbcZ+DQr3TXHlObzmSgffm7MaFgmLFGDLGv1l67dbaH2rXaq1988v6Ixg7c6duGULsJbdsV3ZKOWaPrypGhWq+Jz1GtaojZeMmaayn1Zg5WOLjGGBGXWEBjPoYB21em4PW/zcX2XmF2HY0E01emYWGL8/EtqOZHsv/tE5+wIz6bQu2SLqIabWBUaPV1VQIgfEL9mLGlmMeVVx663zj750YMH4ZPjf5JqZG2h4hPTsf5yRdNQudTt2nG2910VoXBa2gxDVgUmmZMVS6twzIs79sxpv/7MJdX2h32VYrQ1GxEw//bx32pudojgvhCp4Xm9ALTBqguG4A0n0iDdS98nFzSgMerXmDlJxCyMZP8br+Iu8ZGF/bSvvbCLTI6YTTKZCenSfbrq6bxsytxzF+4T7369IRhM3IsB08fd7LjVcZwPiegfFG7eHqt41HsWL/GTz2/Qafg0K9gE75fY99vwGfLNiHL5YekO09M8baka7ixd+2aS6n9My0zfhs8QGs1On5Zte5Y+9Lz9FttyXl7VAyMi1Oacd8Kq0rKoARQnhscKe4VO2zNvUsbvxkGS4UFqOgyIlH/rdO9+T4cU2abLTGCxrjwKjRauC3/tA5vDtnD4ZP2ehRh65XheRqYDvOj/Fm1Py+8Qh6vrsIP6/zHukrb2j5RU7dC1GxlwujVsSv9fsHf77a4zXl086ek9m6DRuN3MD2n8rBI/9bh61H1C8U3r7DldZ3jSmi+nmVq0qRUyDLYJ13aauwAKDDm/Pd/3adA9JqrCFfrjZ0oTRyeTuZlefucSK9OTvswN6T2e4BELVG8Sx2CsOTpurd/JVVdXpVG/7Gu0XFAiN/3oQOb8zHXElGzfX9j/+wQfaA8O6c3e5/6/0GI7FEYbFT89qWpziX5eep5/LK9UxeftDrMaiXGTUcHF+kl7XQ+p0HT5+X7UC15YwGZt+tTPUox68bfJ8NPT3r0gNreIjn7TlEEcFIiyeEQO/3F6P/x8s0axSUir1c8KQ95dTuZcUmtcMpjSsqgFHLwEjHYHh71m7Ze9l5RT53X5V3o9bpBXCxLMcyLuDQmUs3NGlbEuXFRC8oMnPMu0W70/HvnzbjwOnzeO6XLV6XVd60leUGPG9m0qBFmebVCgI0X1e5YCo3xfRNx3RHtTSSgXn423WYs+MkBoxfpvq+txuMkW6HB06dxzPTNssuiL48dbuWFUJgw+Fzhofk1+JqAyNtC3Pk3AXc85Vn10tfHc24gI5vzkeXtxYAkKetHXY7+nywBEO+XI09J7MxWCNr5RTGR6su0Bjm/53Zu7Bod7rvw6f7GcEUOQX+2HQMwKW5zoCSbaz2VCvNwJjRI+10ToFs0kupPEWWV68NzIr9Z2TdzUf/5Tkar2w6ApMbivhaheTy+8ajskEJXeWStpvTGohQ6ZXp27H64NlSV89Lr+9qD7jeBjWVXlvSs9XbhKVn58muod6CSWnzCbXM/78+XYEe7y5y/+3LTy8qdpoyOOcVFsB4toGRRvu7lF3UbL6PgSG9keoGMKKkW3fncQvQ/Z1F7mhWerNSfr/eOmtfnMkWKElD77w4JHZBUUmreb1xLaRWHTBeLaOcH8bIGA7S2FD5O1UDEpv37IYRenWxRtrApJ7Rzp4A5qT4f1l/RLN7rR7Xsn9sOopBE1fgXxe7mfvrUgZGXgYjM4XrXdCXX8wyuNYl23aSz173wRKczlH/vpLzWrcoABRVSJJ7wYSF+3HfN2s91mNDyW9I1ciY+TsCrdaTb2GxQI5K93H5MvoNuktDOQGubDZqjd973EubLqUileEstHy36pDX8ZOW7DmlO32B0WdQ1zAbt0rOF6PVmABw7nyBTxWKH8zd47EvpcGj2jd7BDAX/9xxLEs2j5Ha5t2XnoMOb8zHwAmXhnPwFsAUOb3fyzYezpD97cu9cvqmY2j7mnqXel/4FcB8++23+Pvvv91/P/fcc4iPj0fnzp1x6NChUhcqUEoyMPIdIQ1gqihaZNttNp8bgkovkGpZCFl5nPKd7poTRXqz8ghgdNrAhEieRB//YQOu/6ikcd6U1Yfw9qzduPET9ayBGqMRcl5hMe5VDIRk5LPSJzFluw2tFLQvAYw/txZhYPV6XeJ96Sk2d8dJrNivPkeUNHNS5HQa/j2ubTdtXUkKe9eJbAwcvwy7T2TjoW/XeR1NWY0rqCvtYHr/W5mKCZJ2HYD3qkOjp16xUxh+qr8gCQ7Uuki/pRiG3mYDvlh6QPakKeXvOENa3eKLnE7dqkJfgiZ/MgL5iuuW0dmojSp2ej5Iannlj2142EvW9N6v1+iO5WW0YX6xEB7nbkGRE6+rNFpXuwRUrBBm6HtcPpq/F1NWH8YcSQcP6XVTbT9rZWCGT9ngHqdLy4wtJRm/ncezkHY2F9e8vRCfS2ZLV5Le+4xcd33pAGFGNTfgZwDz5ptvIjIyEgCwcuVKTJgwAW+//TYSExPx73//25SCBUJJGxj5a9IgI0xR52jzIwMjPQH0go1iIWQ7/bOLB5N05yoDqAuFxZi/8yQ+mb/X0GBxLnv8mJTM6I1YbRyECwWen/XWUPeNf3bi/y6OdQDIq/ZcbLChoNh42lFtHXqMnIR6AYzRDMyp7Hw8/L91uOuL1apPttJxRQqLhdcUrbRERcUlA7sdOXdpv2w+kom+Hy7BvJ0ncctE3zIyrmPQ16oL5fH53+nb8c7s3Thy7tLvUq5ReuxvNZgtPJGZh1ydrIXLu3P2eJ35eIGi67HNBrz5j3Z7Mn9HHNbaloXFAlkXdDIwOse1NDAbv2CflyXVSavZ0rPyZLOj+1v9I23s7HQKn2526xRDIRw6c151bifZ9ylGDDbC6RSqYw99uewghk3ZYGgdvtqXnoNHvrvUm1N6z1C7joTYbbLf5trX2QaqiaXHxcRF+1THLZKSHqNGHkh9CZaNjvGkx68AJi0tDfXr1wcA/PHHH7j11lvxyCOPYOzYsVi6tPRzoASKUwiPp0hpyi47T34Q+JeBMd6IV1ml9dfmY8jMLVQMcuUZwDz47Tq8N3cPFqtUh2hdYOIiL3WXM/qL9AKwqIsNJ9UaOuplnwDPgObr5Qfxn2mbAWhH6L5kN7QCiembjmp+Rrr9SrpZem4taTu6TWkZeOjbtbLxGIwOOidtAHdOpdGddOA7X7IfhcVO3PbpCt0LlFGuunDtuXOEaqPBwmL1PjrSY0P5gOBP9dv9k9fi+V+NjzPzxt879RcySG08IiO09mdhsVO3YaQvx8J7c/foL6Rcv2QffLsyFbsljafNaL9S5PR8kDTqZFYeer63GO3fnOc1eJQeq0YfQmdtO4H35+5WfW9t6qUgyulUP96NfI/ySqk8Q6TXN7VNbbfb5GMZXVyhx4SVOld5IxMEu87F9Kw8LJRkyLUCwrSzF/Df6dtKGkfruGDwgUOPsb6HCtHR0Thz5gxq1qyJOXPmYOTIkQCAiIgIXLgQ+FEJ/aWssgHkbTWU3ZptAIp9vKDKMjC6PV6Ex00hv7jYaxuYPEl51cYS0TpsYyMuBTBGe7Mo68KVXGNvqNXnG+lJoPYU5pox2tdeSGq0gqCnp25Cgyoxqu+5Ts7U0+fR491FGNAqBZ8MbuN+z263yRr23XyxPvnA6fNY8J8eAGC4Zb50357M8mx0J12PLxf8wmIha5xYWkUXU/5rD3q2iVp14Azm7zyJLy6O+yH/nHqhpQGvMkAs9DKGjllqJETiwKmcUrcVKQ2tDExRsVN3sjtv4wwBpW8DIy2bcuh6f+MX5eBxynPfZvO+biEEbDYbjpy74K6Ccg2upia/yOmeBNdotsfoYItPTt2If7Z6fndJGxpDq3BTHgd6D70hdpvHtXH8gr2q1w8l6XERH6lf3eXaZ8rOCkVOgTCVAOiNf0oeDP63Ur8ZiXLma3/5lYHp06cPHnroITz00EPYs2cPbrjhBgDA9u3bUbt2bVMKFgglGRjtRrzKwNJm830wNJ8a8To9exxsOJThNfPx9XLPG4WMSnGVbQTO6aRfXfQyMJFhJYePstdCyWtqvZDkhVPOjiulFnycLyjCwVP60b17HV62/9509S65rt3x7cqSsv21uaTeeOPhc2j26mx8teygahWStJGn0QBGeuwpG0EDQK4iLeztqUpaJH+yGMczL2iOfFvkdGLGlmN47lfPnmh3fr5KNXgpKYf6BV26/ZSnl1l1496sOnAWPd9b7PX4cwlUcbSuK4UGustLA8MJC/ehy7gFOGzi3DvS9StnBPdnYk/l53xpA+Mux8UbnvQG763xrvTc97fMWv7eol4F6c8gmMqsv96N3aEIYIQoqRZVOnDqPF75Y5tmlio2Uj934WqDowyO1O5bvjI6UacevzIwEyZMwMsvv4y0tDT8+uuvqFSpEgBg/fr1GDx4sCkFCwQhVDIwXjMFNp8PfukxbKQKSXkhe0xn/hJpTym1Jy210hYUOT0a5mlJz8rDJwv2ocjpRHq2954FESElVUhqv1MtgJEe9HmFxZqTQrrKrOStwZnqOrzcfZQXDhfXRUgZpNzx2SoUFDvx2owdsuo4F9cv86VroPQkVuvFIa0nFjD+dOdrw9JjGRfQedwCVIuPxPIXenq8fyo7H1PX+D7iZ2GxUzXokj68yQczFKZPg+DNBkUvCjXexskoDa3sVFGxU7f61bWN8gqL8c7skiqPSYv3Y+ygFqaU7XhmHp6Zthn3XF3Lo2ozO68IBUVOj/aCepRVOsoboN6xfT6/CNHhIYYb8QcygNFi5KauvGYrH3amrPY+Z5jDZpM9GGud60/8UNJmZ2PaOcx4slvJd0veNzTTtMa6C51OzNminf0ywmibNT1+BTDx8fEYP368x+tjxowpdYECSa0btbeLhT8ZGCm9DIZTJaDyhdpJr9ZmI7+oGHk6J/7TUzcizGHH6oNnDbedCA8tuYip/U61rMy3Kw+he6PK6Nk4SdaQU42vg1mp8XYz1BobxXURUsaG0mBIrf7YtdnTDaRyXaTHntpAgbLsoOKnOJ0Cd2qMi2K0DU5RsRPPTNuMFftL2nEczbjgTtVLHcu4gK7tE3VHCVXKyC1E2lnPJ0BpY0L5+CDmdEE3U8AyMJpVSEJ3/322+ADevT0Gb0kGqzRzQsHjmXn4Zf0R/LL+iMdcOcOmbED1ipFY9rxnoKvl62UHZY2ji1WqzvW4soP+BDBmjzujRe1avjktA+GhdjRKilFtK6hXXaikzMDoZfm3Hb00B5H06/XuTYD2vS/jfCGeds1x5SezGvH6FcDMmjUL0dHR6Nq1K4CSjMwXX3yBpk2bYsKECahYsaIphTObWsZji05bgdIMGqVsPa/07YpUrw1KjXhvzm6cys7H2EEtYLPZVLs55hc5dVN20y8OquULV1uQsyoN2rQCkAcmr0PquP5IO+f9gmtGitHbvtOqLnFd7KTjP0gDjcRo7brj1QfOINSHJ1PpNtqkMsO3tIzFQp7LOJGVhzUqbVIA49Uws7afcA+m5v7OgmKPYfnP5Rbq9oxR01syLoWU1tD0RU5nqcf5MVvgMjDaVUh6DzVzdpxEy9FzZK+lnct1t9EqzcSISmpj/RzROXeV/k9R1VMsjHd7d3FVZRltAyfNhJqVgTl3vgAxEdq3zIW7PSfPdI250qtxFXx1X3uP930dydahaMTrSwAkPS6MTJSqdYwu3Vf6qUrMqkLyqw3Ms88+i6yskshu69at+M9//oMbbrgBBw8edDfoLWtqPXKU1KqQvDmVnY8P5vneit+o1QfPuqek94cA8MmCfZi6Ns3dU0Dt9+UX6qel/bH5SCa+W3UIw6ds9HhP7/uO6GR5zDjAvd0MtaqQXJtPeg9wjaUDlFQtad0ghk3Z6NN21vuN0vf1nljVxjTxpqjYiWV7PcefUc695aI1kJs/tAKYYmfZViEZEajqB9cAk0pFxU7ZEO5GHTh1Hu3fmIddJ7I01+0Prd9fmjYQ/rSByfExA5MfgCqkNq/Nxe8btR84f/RSzTp/VzoW7DrpkV3TG7tFSZmB8XcofyOjc2tVT/nzMKMU1G7UBw8eRNOmTQEAv/76K2688Ua8+eabmDBhAmbOnGlKwXw19Gv9Yc3VulFbmfQAc1XZqN0ANh/JwLT1vs/LYcQrf6hPWDZx0X6vnzujMbqm636Wa0LA5S0ToXUCuybmlAYE0m6BDkUvJKlip9OnjJ3ebzwvqSd2OoWsgVNpb/Q3frJMtd3NaY0A5qROeyhfaA2MVuwUsjFIyoPSVCF789Uy9YbPRcVCNoS7L86cL0C/D8tmGAut7dKiWpzuZ/0JYFxtJvTmZHNxnfuTlx/E96vMG1z1pd+NT9Co9MBk79OYGOGw22TZJX8DGCPjxmhVZZrRfsWsNjB+BTBhYWHIzS2JHOfNm4frrrsOAJCQkODOzJRHpW1zUt5InzLemb0LxzMvqN7YnvzRM0MSbFpPUhcKizHy502mjBPg7Sav9f2uul2tLPzxzDycUOkxBJQERcoRib2ZoDPImPQpxYzxUqR2nchWHXtGOTy4i5GxHYyS/hSPDEw5q0Iqq/YTLgfPnPdp33qr0gik9RrV432aJul+1peRk12W7j2NLUcysFqj2lSpoMiJU9n5GP3XDlOHFPCnp5GZbJA34vUlgJE+XBmpQtK6Vyp7pvljz0nfB1ZV41cA07VrV4wcORKvvfYa1qxZg/79+5cUas8eVK9e3ZSCBYJTCNWxU6T+90CHMipN6UkP5OX7zuDbFYdMmegt0D6ct8drhua3DUdNSTGqNSR20cvE+TMpptHGsy5agZCLMoCRtoJRjgWity41ajeR1//eic0q7XHMvG7Lex5der2oHFYhBSoDo+VUdr7hEYgBID4qNCjj2WhNrBkZqj8ruD8ZmMkrUnHT+OX6C16UX+Q0PCOzL4xO7hgoTiFk1d9GxvTKLSjCN8sPytro5GhUoUsVOp2qVYUXCkv/cGlGJw3Az0a848ePxxNPPIFffvkFkyZNQrVq1QAAM2fORL9+/UwpmC+Mdhs9cu4CntWZVdk1+JEVKNtb7D+VUybjaJTWh/P26i5j1gGuRW8wML3pAsqCRyNeSZHN2M9ag8Zt8eEG6g9p7KgctLG8Hb+lHe/CH1ozCasJddgREx6CLAM3pLJgZD4gfxrx+ir19Hl8FID2iyEOG0xqvuEXIQRmSMahMZKBafrf2R6vGWkDM3VNGn5Y5dmt24wMjFn8CmBq1qyJGTNmeLz+wQcflLpA/vh1g3ntO0Id5ty4KseEu+f1CRRlABMWYi/zJ0alt25t4dOw7lrMaqWuxduTfrHTsytxMMi7Uct7IZmRqdBqLBoe4CBeevNSTptR3nohBeN88qUheJjDjtjI0HITwDRMitZdRm0EcrO9+uf2gKw3xJ/UrImcAjjgx7QlSkYCGK3gyKz2K2bwuwK1uLgYf/zxB3buLBk+uFmzZrjpppvgcOinEM322oydsIdHmbIuszIwNROiAh7AKLsU5uQVBbWNz72daqFHoyqmrMusVupavI4Rk1eEj+frZ4kCTVaFpHhi/W1D6brfA57bICLUjrxCZ8BvLvIA5tK/Nx4+h3fnqM9FU1Z6N0nCvJ2XegYG+hxW463qU6l7o8pYuuc0gOBP4RIeYkdMhOcgj0oZuYX416cry6BE5gt2ht4phCnZaSMBjJbylIHxa2/s27cPTZo0wb333ovffvsNv/32G+6++240a9YM+/d7731S3hmZ5MqIiNDAH+jKwYgy/GyRbpbwELtp289II7PScA3epsbbCMFmq5EQ6f63MvsnnUrg0JlcWXD64xrvI3YaoTx+XCMMB7qnnjQ+kmZchk/Z6NPNOxCiwx3o1iDR/fcvOr33KhmoMvGVLxmYEb0alsm1xoiqcRGGMhTL93t237eKskjMNkuJ1XzPKYShQej0GGkDo8WMHqJm8evIf+qpp1CvXj2kpaVhw4YN2LBhAw4fPow6dergqaeeMruMZcqsA9RIY7bSUkbiu08EtwdYRKgDoXZzLqa+jlBpphNZZfc0G2q3u2+YD3atK3sv0BcK5Y3SNcFboBuCSwOx8jasQef6ibK/9QZOC0RVo9EApkOdBESGORBi0jlXWpFhIQgxUAWf6+MTfN3ECv4WyXSBzgwDJftVi9NpTvvA0mRZlXO0qfn07qvQvWFlv7/DKL+O/MWLF+Ptt99GQsKlDV2pUiWMGzcOixerj755pQkvgwBGWRcZ/KfXEDhMakOkNdBcWQh0+xup8wVF+GRwG0wcchVG9mkoey/QPTaVx0tZZWCk012UxezTRn0yuA3+dVV1nxqYBuKJXG/aDxdXtsOsrGdpRYU5DJXlJ4NjubhEBPhaqja3mZayCGCiwrR/r1OIMr0+qTGyDUIddtzQIjngZfErgAkPD0d2tudsvjk5OQgLMz+lakVlkYHxtS5SOUS82aIjQjxSyJ3qVvJrXUa6BwZKoHtASRUVC8RHheGGFlV9niCvtJQ9fuKiXAFMoNvAlPy/2Cl8vpnpCfdzG1aNi8CAVimw220+BY7+hA7eblCA8bGqQi62xzCS9SgLkSZmYKUCXUVWNS5Cd5myyCa4eLt3FAuBPB8mjA2E8wYa8YaF2MukJ6dfR8aNN96IRx55BKtXr4YQAkIIrFq1Co899hhuuukms8tousbJMZrvhYeYE3j4etJVi4/UX0jB18n16gQ4FRsd7hnAxEWGYvaIa3xel6tXhb83pNIoy0yW0bldyoI7A6OoQjK754XrBr033fMhyIjtY/rim/vbo6uiugfw/2lderH1JQPjz0W6imKCRC1qPSKl7SNCL+4XtTIMalPN53KVVkSo3bQMrFSkTsBXWskGApiyvA5Fhmk/aOYVFgc8M6snI1f/4TLUYV57SG/82isff/wx6tWrh06dOiEiIgIRERHo3Lkz6tevjw8//NDkIprPW/VOvcoVULuSZ48mXy/iET4GQonRYVjxgvEZXv3x4g1NArr+mIgQj4O2WAjdJ0412RfbwFQIcNZIjVobhDY14wPyXcpuwx/c0QpXmfRdXer7lv1yBd3K3knrXu5tWpmAS1VIvjQklN5AKoSH4NpGVVRvKv72EpEmDnxJQPnzkFklRv2GqXyIeXVAM4+RditIbm6uzIvatSkmIgS9m5jTI9AbafYiItQRkG7Ggc5mG6lCCnSTgGsbXcrwePu9/2w9Yer3BirIKNcBTHx8PKZPn449e/bgl19+wS+//II9e/bg999/R3x8vMlFNF+kl+yIzWbDe7e38njd1yc7X58aHHYbUuIj8endbVFLJYACgJtapfi0TqU2NeOx9qXepVqHNxXCQjwaNRY7hV9BiKt7rT/BT2mpBTCBuogqq3FuaVMdvz3RxWuW0ChXo1yjXI1BlUFVfFQYfn28c6nL4+IKEIzMxwIAX97bDmoxhV3lAunvOE7SEVZH9Gpg+HNa3/bGLc3RKEl9H1ZWycB0qJ3gESSH2G1Q/vDQEJvk/ZL9pXajsNttmjeQ4dfW9zqrui+k3xHlY4PiV25simf7NtJdThk81K1sbiZZmcFSq8oNdAZGGtSW5TVvxpNdVTOZRjzl5TwJL6MqJMN3Fr1ZphcuXOj+9/vvv+9/icqAXjDiUDkJK4Q7fOo772vAcyyjZCj4fs2T0a95MjqPnY9jimkP6leJRkx4iOqFP8xhd98MK0aF4pxKmi/UYfepwZqvolXmZSlyClQI9/+ErOAlnRooam1gyqJNk1SCCd1zY33c166n58+WHPB4z8zeNq4xbYx0lR/ZpyF6N02SNfx1URvWvXalCqqTVOqRXmw710/Eo9fUVd0OSlrbZUjHWmieEoeBEzyHv1dri5ZQIcwjexHisHsEbmGSDJM7A6MStDlsNtVg4uq6CXimbyNMW29O2yNpmSN9zMDERYYaGpDR12y2r5QlnvJQR/z7501IO3upN6IvAcywa+thXeo5w/M2ASVTQrgEssqsesVI5BU63ZO2RoY68OGdrTHmrx34a/Mxn9YV7eW6XlYZGMN3h40bjU0IWB5GMNWj95SgdhKW3EiND2rla8SunO1X+nT53xub4kRWHh65pq7qLLbNq8Xi6LkLKMgtuRgkVAhTDWAcdptfc/wYFRPuecO8vnmy7KLrq9IEP/5SawMT4eWiEhXmML13ghkBk/SiaESIzn76d++G+MCE4dmdF9vN7T5hvA2MWrWO2mncMCna57ZhgGdVkNHzV+1y52p/onUBD1fJAAsIj+2vdh2SZgdc1zG1J12H3aYa2Hj7jD8qVghDXqETJ7LycEOLqqpZMS2VosNw9rz+NTUyTLG9AtAGRPpg2K52ApY+1xO3TFzuntzUlwb2j3avh15NcjBo4grDn5E+bARytN+ECmG4UFDsDmDCQuxIjA7HG7c09yOA0b6+hDq0M4BmMhzASDMs1uf7PDi+RsXelrfZ9LvISnf+1XUroenFxntqB/ekIW1lT3oJFcKw/5T67MGBDDAjlBcaAHe0q1Gq71SrfmpaNRY7jgduzBtfMzAVo8KQW+Df2DFDO9VSfd2Mkz9eIwOjdfzpXTif7FlfM4CpEhNueA6f9+fswYzNxw1NAeIqkVrD2shQz2PDZrNh7KAWWLr3FPaczMG+dGOz3iq396kc7YkAH+hSB18vP3jx+zzfd1VBawYwGjdDzwyMzSPzFCbJRriWV7te2WzqNxCHl8/4IykmApPv74C0s7loXi3Op88mVgjHfpv+/lGeewLAomd6oMe7izQ/M+zaenioa108+t16rEnVyYTYgEFXVcO3Kw/J2iFJH7x86dwR5rD7/AArzYwHekJTaabcFZj503ssoYK3AMauO/GlGdP2lI8RkMqJjwe3AaB+sfS1KkMr7fnWrS1wV4eaup+XXmCkFyK1i1KNhChZu4VAd5fWoqyeqle5guEnMq3RJ9XqgwNdR5ynkk2JCLVrdqX0tarGpWOdBLzUv6nqe6YEMBoZGK1jU+2J/WlJPbe3fdm+dgJ+e8JYO5mtRzNVgxe1tiGu06B+5ZI5dqRFSI7zXN4pBAZ3qImJQ9r69CSrvKF7m0JgQKuq7n9HaQRRgHZAqPY0L4Tn9g+x2zwetSJCPKuQ1Djs6jcl103DrOeY5LgIxEWG+hy8AEBCdJihQErt4aG2lx6Vm/7bB8/2bYyKFcJQJVa/x5fdZsOoG5rgvzc2xdRHrna/Lt1P0n/rnZv+BDDS32hkOgZ/2SA/Ll2/y9frTc2EKHRtoN213FsbrJtapWD/mzdgwyt9fPpO1e8p9RosSO3p86M7W7sbyap1bY2SVGVUjgnH2pd6419tq2t+h1YbGLvNphogKV+S7ntpZlnroJBmDfSqAjrU1h7p0R/jBrXA4md7eDyl+JJ5+enRTh6vRYTaVav71ANMB3565GrM/fc1+ORiIKrmjVua65ZFKwPz3u2tVAOtWJW2P0Z0qZ+omZr2JRWvJT5KvR2NVnZQ7Yb7eI96sr8fvaauxzJAyc30qpoVfSyhnLeqxs/vbYf+Lariz+Fd3a8lx3kOPSA9NErTHdrbZ+02Gz66szVqJkThwztbay+nmYHx3P5O4Vm1rXbsS2/Irh5XaqeZw2ZT7dLsOidNy8DE6ndB1lKpQpihG6fyHFFrDyUlPe6NVF/bUHK9fqBrHdRIuNSBQnofkAYknetp9+4Lsdtgt9t8Ho4jNMSOZ65riFuvqo7O9Srh7Vtbel2+NB06pPvetX18zYb8/Ggnr1lpu037+LfZSu5jZtQGXJEBjBrpxUItepZmYH55rBMqx4R7jJwqpTUOjN1mMzRUu/TE1srGSEkHv5IejBVVnsKlTxlmqFclGrUqeT4RSUu6alQvr4GF2jYPD3GodvOQbr4Nr/TBsuevxfpX+qBj3UpokBTj9aI4qI120OmiNpBbZKgDidHheEvlwuJvBsZbOfWyB9c1TdJdv1aD7QitKgyVi73yRvds30Z4rp9nzxEzMkZqF1HXRa5OYgVMGHKV7Ek/WeXmKQ08fJnYVFl+b0MO2G02DGxdDUueuxZNqmrPW+NLBkZteYfDc1A9adDmWl7tW+x2m+r3u14xq3mCWtZsysMdDX02ItRhqBzKG911zbRHeP3vjfKMppG2K1r30cNnct3/lh6b3jpCuL7P10EpQ+02DO/ZAO/d3go2mw3XNvbeBf5WxcPziN4Ge87Z5G2jXNddX4OJiFDvjXQrRoVp9tAzcxybyyaAGdhaPyK9++qaml0ypTu1WUos7utcW/a+tOGdq4rG2w7Uik4ddpvHzMJqtIIWI2lx6VgYamNO2O02jOzTEDe1SjElG6NVJuk5kRwXgWu8jGapNn6H0ylUTwLpE1hChTBUrxgly3h520T+1ru6unKqBVqxfqZ8vT0F10zwPrv6gFYp+PLedl6X0axC8iEDo3wpxGFH21JmWrSEOuw+jTejFsBIzyzlOeyN8nfWrxKNpc9dq7qs0Wu9b21ghEfGJNRuh1BcrVIk4654GzROqxeSK6jzduw1qRpruBq6gsqxlBitX23Tv2VJNZz0xtmqRrzqstLtOHZQC48Hx2sbVcYng9tg5aieeKBrHdl7pRnd+uUbmyDEbsPEIVfJtpe3AMZ1HfO1Ckl5R4iPCpXtayVlNbovGR9pL1vp9v/gDs/hQ7R462W7+NkeiAh1BKKttYfLIoBZ8mwPvKzRlkDq9ZtboLfGk6v0xmaz2TD6pmaaN3dXw1JvFzLpSSdNw9vtNjgNPBlqZWCMVC1ESbJFWnXAT/VqgI8HtzEeuXuhdaG2KcIPaVD36d1t9dfrsKleaPWerAu9ZLiUZU0yUEcupRZo+TJ7sLws2u891r0eBrZO0cxahTrUqwiktMaB0Qo41QMY7YagUt62uVGhDjt+erQTVr/Yy9DyiTGev08a3N59dS38Jaly8kbtCVTrIm0026S1nFYbGGWbFYfKtAZVJRkY1/JqZbdr9EJyVY9qXbseuaYuZj7dzXAAoxYM61VPNakai/EXj2vpNuqjMfCedDPe2b6Gx36JCg/BgFYpsm3jIq1Cql0pChWjQj3m6FFep1xubJmCXa/1ww0tqsrK6a13n78ZGOU1JNRhx7z/dEcvjUyM8gHZ24PZrVddytbYAGgtqpZF1+ItQHOtR6+qzwyXRQCTEB3uU1ddtQ2r9rQivQBIP+Laed5aWUurFepdbIDo+oyRuWakFwFpOZQ3mfduK4maXans1jXiZU9FQzqW9HJpqpHqlvby8Rbxm0F6khlJ70eGOlQvtHof9TYZofJi79o+elxBp/TC5LoQ9jFQnaNGbbwhlwrhIfjozjbo11w9XR5i12/lr3UR3XPSs+eHw26DQ+UcUvsKtSC6UGdKBLUndaXQEDtCHXbD7SoqqrTxkZ6nNpsNLarH4aGLT+XeBgdUCza02goZbT+inYHxXG9yXITH8mo3JenIt3rtdNS+3xXAaP0G16taZb/76pqIkVwz1BqEewvwosIc+PyetpK2OJfeq5EQpRocNK1aUm2YFBuuGqx5u5lKz4H7OtfGhlf6oFmKvMGxtw44rmpV6W/y1sjWdR/ydegI1XaXYSGaI0srMzDeMvNP9arv/rer/YkaX9pFGalyKospDy6LAAbwnk5VUtuuageKtE2A9GLh2nneTtTGyTF4+18t8dMjV8suRA67scaF0puE9EalvOm56kK/uLctHuteD5/d0xZRkgtMu9oVseiZHpojqUrHWfE2sqI/lMe49KA3Uo0WGebwOKluapWim8FSjiSrJM06GZ0Iz/WV0gviyD6NsPGVPrjZz3lnjHy1VpASGqI/UJTW23d19OwF57CpP4eqXajUyqTX9fNHjXZXD3er4x4V1ki7Him1DInaYfVcv8b49oEOmDjkKvdryvmI1Bona7UVMtp+RGu8KWVg2a1BIp7t28gjYHGo9EKS3uBdY3moFcdhV5+qwTXGkebNSuXl129ujjCHHc/1a4TXBjaXVdOoBXneAuupj1wtaygrLUdsRCiWPd8TS56VV91VCHdg+5i+WPqcfKoVV5fnAS21mw/IexI5YLN5dk03MiWntJzeekG6FtPrSKEsm1YWV1mFeKkM8gyZt+9TvqcdwGiuAmte6oVHVM6RZc9fq9l8Q6vsZrpsApjSDv6j9rQTKlmn+kBa6t/pesK4vV0NdKxbSXbA2Aw24pWuWhbMaOyx6hWj8ML1jZEUGyF7IgkLsaN2YgXNp0npxGGBnrZeqkZF/ckrI0MdaFld/rT04R2tdYOfAp3te2PLS91gjT4puYJO6UVHCIGKpRgx10hVhNYxFmpXr14z8tm7OtTEy/3ljVRLqiuMXXDUvlc5JYJSy+rxaKHS1fbaRlUw48lu+PCO1qoXSF87Kqg9HISFlHSBlx7f0x671OutesVI1Yah0gt/a0n7DKMNHrVu5MqMwad3t0V8VJjHjSbUYfd42pJ+t7eu3nabTfUG4rpRqg2mB6hXpwy6qhp2vtYPT/SoD5vNJtuOap0VvGU0lOM6ydqWRIUiOjzEY3JFu82GCuEhHoHfP091w29PdPba4FX6YKpVzWJkd0rPVW+9b46cMz4eVESIHf1bVEVMRAgGtlZ/CNI6JZXb3dv9T3ofs8G/DExcZKjq+9UrRuHDO1qrfkar7GYORXbZBDARoQ7c3q66311a1SJY6cFfSeVG5e3pWLZuyQETHR6iepFVNkiUrtlbBkb1+yUnqt4NWnoxVbaXMdLluGQd6ie02oX+l8c64d3bWqGNgYagPRpVxj1Xy6t47Hb1buhSetUZ0idjo4Gv66dIt6daIFUtPhLzRnY3tE4j+1KLtwzMdU2T8M6/WmpekEIcNtSrEi17zWG3Ge61o1ZsV9bL2+ZUW394qB3JcRG4uU01vydilPL2C6SbQ9p7pqqXqtOr6yagalyEbM4e5W9sV0v9WNbKCivPSdd+Uh6LysawDZPk++y0a7A9la9x2G14rHs93NQqBTOevNQO6MLFMY70xlKSBrPKYeGl1wy1m7m3wFzZtka6rOsaq/y81vriokJ1u+4rH+b8Jb0GmzXUf0SoA+PvaoP1L/fxeeoQ5TYxmoEpGeDQ9+3gbaA7m82mWo3ny6So/rpsAhgAePtfrTDJQONQNWo3MumB+mDXOujXLFkWbWrdIJQXKOmNvHrFSNUL+U+Pandtlj5J+9oLSS+AqRgVBput5OLevnYCOtW9NMaBkQH3UuIiPC6sLmolbVc7wev4OVJP9myAEIfdPTS7i14Vkl6ELw0wG2hMuCeVHBvhDqSk21MtkxbqsKF+FfXtAWiP7+OrkpuK+nuf39sOt7WroRlMhNhtHhcku029+7ga9Ua8JQHMylG98M197VVn71bNjji83wy0Glhq8RbcJsdGYGDrFNzRroYsBe/tZ//48NVY8ty1shu+8ryf8vDVqjdHrXNVmRlx7Qrp8nZbyQz10mW/faADAOD2diXnz7BrL7VtULLbbKheMQofD24j63qeV1QSwEiHhdgoGVBM7dxR/g5p9kYta+utCkkZwOQXXao6cWUzlZutNGPWSPeL6yarPESMrF1aBF/HeNESGVZSpeUtsNKaCFetvZSrl2e1i5MCuyirybWqrr1tZ7vd5vPs9mzE6wcj3d3UtqvahU+apouLCsWn97SVtXfQCkqVJ3ymZF6iqnGRqjcKZQtw6RLSg1X6mx5SdBl0kQYwer2WHHYbto3ui62j+yLUYcdQSddTI6nycbe21FyuNKnCGgmR7oujcnvp3Wf/1ba61wab0pRqfFQo1ryk3etlcIcaWDmqJypdfBqWbs+UeM9qsLqVtYMXAOgkGQTL3wtzvcoV0CwlVnf0U631qzXwdBjIbLmXVW0DU/LZpNgIXNu4iup3a1XveKO3iT6/p60s6Pb2E2w2Gz66sw3e+pd8LB9vmSebzeaRgVCbvbh9bc9MgHS5f/duiOTYCPzvgQ4qg1aWLCdtRO0UFydzlCzr6mUzblBLLH+hp2YDb0D7vHdlYKQPZ9JqUNenpEVUnt/S/a8WwHi75igzNhmSa2OMu3ensQyMEdIHDs1BIw2ch9IymDEEPmBskkqtHk8eGRi7HR/c3gpP9WqAqY9cLRuGQfqwYoP2Q5teYqZbg8r4/sGOWDXK83qpdgaxG7UfpDv2h4c6olsDY1OFqzW4k0baapkMrScNZTrvbO6leVXCQuw+jRCq/J4xNzVDs5RYvDawGV6+Ub3ruK8t4CuEh0iejHwrm7eMUGkCGFlVjeIGo1fVERMRilkjrtF8X7p/HHab6lg5Lk6n5wV18bM9MGtEN1na99fHO+OWNtUwblALAMBfw7vi+X6NZZ+rmRCFp3tdGsPCaANipTn/7o5Qhx3hIQ6v3YQ1q5Dsds+nMrvdcAZGLWBVNuJVOyxUq5BKkdYHSgY1kzYQ9vXcAow9KcrbsXm+3yjJs5ef9Nzo1jARq17shWsaVvYIwN0BjMqK1Upmt9tkc/aoZamU63KVpfbFByWtqVFcHzO6GVWrkDSOu3dva+UR3EgnnZUeVw93u/RwVppMpawR78Vsn0cTXh/bwJhR1QloD3YqpdbTDlDZvw4bKkWXDK5aIyFKlrmTnuuVosPwYNc6eOSauvhJ0bBe63ohPda6Nkj0aKOkpZrKA57ZgjNpTgBJj634qFD0bFwFS/eeli0jPYDH39UGh87kuidLlJJeXNUOWq0nA2WEPuiqavh8yQEMuNh41EhbA+kFRPo9NRKi8PdT3bx+tjR1vb5oVSMeHeqoj5UD+J7+lxpz06X2N8pu0aVNTUpPaL0yqjWEVBsvoW2timgraQvRonocWlSPw1uzdgEoGV9j1PWNZZML+puBkWUDvOxqrdjSbvc8dhtUiTY0PpHy+7WoBTlqq9fNwBgq0SX+HBpGfrbefGT/7tMAB0/nYIBkiHfpzVpaLuVvdi2WLpmR3lVNZITaYaS8XE0f3gUTF+3Hfy4OAhelM8u7tx4k0nfUshFaGRi1qmOt3msPdauLL5YeBFC6CWi15jOSMjL+ifRcNSuACTfQaUJtJHVALQOjnbUKsdvw2T1t8e2KVIy5qTkiQh2qI02rbeWNr/TRrMbS07xaHMbc1Ayv/rld93v8ddkFMMoRbPVuEjd66YInPcDULlpaJ5Yym1MlJgLrXurtPrGVAYxexsTXFGqPRpVht8FQQ1lPxr5r3sjumm09qsVH4mjGBa8pbm/qJlZAV0nm7PrmVTF7+0n3ODVGumB749uYQaX6Kje7rWTuD5vN+43QV956tGmtv0pMBM6ev5QVvLFlVbzUvwl+Xqs/OzQgf/q7v0tt/LX5ON69TT6Kp9o3+1OF5Ct/dpeRrI28fYrnr4uJCMU393fQLpfkO7oo5tJxHRPSffLqgGYen9Oitq2VZWyWEocJd13qRq7ViNcV0Hv7WuVYO0q+HNcPd6uLBbvScUf7GrLXZdfx0gQwOlVIj15T16OjgBpvVUj9W1TF31uP+1w2I70+41QyMDOf7uax3ZVZ/0ZJMejdpAoSo0vGzunbLBl9vUzBAHjuy77NkkrVyxIAhnaujUrRYRg+ZaP7NTOrli67AEZZV6127Bt9gvc3vR0W4v2pRJmq/+dp7xkVX+9z8VFh2D6mn183h0YabUc61EnA6ex83NgqBTe0SPbaUHX68C5Yc/CszwO8fXlvO3w0fy/ev11+MxzYOgVJsRFoUrWkbL62bu/VuArOFxTh7osXKrVqr8EdauDHNWker5vVkt51fZF+dWm7/gPex1/RCrDDQuyoUfFSHfn4ize2TvUq4YN5+t8pjc/vuboW/ntjU4/vkt6AZl48vgNRhaTkTxWSkX0sGw3bh91Wq1IUjpy7IBs8LcRhx7N9G+Gd2btly97fpQ7+3nIcQzvXdj/1+nv46T24KccRufQ5/XXrje+hFnBoDZKZHBeBhc/08FyHTpWdUaGyKiTPRryjvMx5JeUtA1OtYknPw+FTNvg0lpbWOENSV9f1zHCrte0sUlwHbDYbvhza3nBZAPm+f7hbHbxkYHR7Ny+HhFkTh6q57AIY6cay2dTnSzHK33FRtAawcnm+X2Pc+fkq999qwYC3RnRG+NvVr05iBfz8aCf34GIu1eIj8bPKjNFqEqPDcUOLqvoLKvRumqQ61YPNZpM1fjVa1eHSrnaCbDoH6cWxanzJ8fHmLS1QNS4S78/dI/usWS3pXV0XvTUG9Yfe+CtKb91a0kanYoUw/P1UV9kx3qFOAn565Go8/+sWpEomslOSljsi1KF6fEpfco0SrbbfStuI14Mfu8v3NjDGCzV/ZHcUOYXHtUQtS1EnsQLWvdzblFl69bIgA1qm4J3Zu92zqz/avS5mbD6O+7uUtD3xtkXa68yfprz8NUqKwQTJIIJGmBDbAwDCDTTiNUKvDUz9KtFe292pqVNZv+qqSkwEVo3qhY2Hz+HxHzYAUA/6zxf4N52JlGyWahMfLMzal2ouuwBGeu47bDb0aZqEJ3rUkw2INur6Jli5fxke615PZQ2X+Pt0qNc48+q6lTDn39fg9b93yhqrSQVwn+vy1q6lPDD6lD3l4Y5YuCsdD3StLXvdZrNh+Qs9kV9Y7J6I0WazoVYlzwkUzTqRXU+lRmYWV5rxZFfc+Mky1fd87dJ5R/tLXeOVQ6oDQMe6lZAcF+E1gJHSGtRLdQ4r1W7U5mZgOqo8sWrp1ywZs7afUB1AT8nfDEyIww61XaRVLaIMXowc6mqr0ut9WLNSFNa/3Ns9LP6o65vghX6N3d/v7XsbJsXgr+FdNecRU/62jwa39pqxVWNkzjcj1LpR+0NaHDN6Id3Wtjqe6mksW5McF6EYPNDzgLpQ4Dnqsq9kAYzO8Ab+rtdsl18AA+mFpqTNwXOK3iBNU2Kx87V+uo2x9AZJAkraD8zYIq//NNLIq2FSDP73gHadeQWdRnaXg/dua4X/TNuMMTc18+lzRgdc61wvEZ3rqfdCU2sh37dZMlrViEf7WhWREh+JH9cc9pj51l/1qpQ8bUnPZaMX6ebV4tCxTgJWHzzr8d5VNeNxz9W1sHjPKRw+ayzo0KN3wZFWW2llKetXicayffLG88rd9tINTXSzDUbbcS159lqsST2Lmw3MSu8y/q42SDt3AXUS9Z+EZfvNhAuyWTdowFgvJDWVFAPlyfeF93OsRXXP4Nf93YrfpjbcgJ7StHuRMtKI1wi7lwxMDZ2Z49W8o2gzpkeaaVULxPo19z3jrSTd5KEqzSDMctk04h07dix+++037Nq1C5GRkejcuTPeeustNGrUSP/DGoxeaIwEGU1TYjH1kau9jtQ5/q6rMGPL37LXzGjb4G3K9mBo5eWC5a9b21ZHv+bJPrdyrxIbgaw8zwkJSysi1IHpw7q4/35AY5wdX0x7rBM2Hc5A/4tVarJJOn04Tt69rRVemb4Nj3STZwtsNhteu7k5Ji8/iNF/7Sh1eUvW6f19acM+rafaZ/o2ghBC1itHWoWUOq6/1+9Y/GwPpJ7J1a2ucKlZKQo1VTJo3oQ47IaCF0CZgTEhgDG4ijCH3edqQl/WHwjSQOiDO1q5s5y+MOupXRpMudvA+FHPKA2olPeOwYoGyIEgfWhQNti9s30NU+4X0k3ua2Z0YOsUTFt/xPAAlmYJagCzePFiDBs2DO3bt0dRURFefPFFXHfdddixYwcqVDA+tbeUdCfYTMhOX123kv5CCmacfOUlgJn772uw8sAZQ6Py+sOfLnqf3n0VXvp9G57ube7kk4HQvnaC7Cbsb++KGglRmOyll4sZ7SZc9I7f2IhQzHiyK8JD7JqZhOjwEIwZKJ+KwpfeY7UqVTDUvbWsyDaJCZvaaPXh/x7sgKenbsT/DdSe1kO1o4K/BXN93qR7jrSxuC/MOpyl2SnlFC++kDfivfTvwR1qGJq4sbS8NdY3qyF8adrAjBnYDJ3qVUJPlXmpjI4v5Y+gBjCzZs2S/T158mRUqVIF69evxzXX+NYgSo1ZaUhfaU2U5gt/nloCoUFSjKHh9stS/Sox+Mlgg+LyRmuSTjPXWxb0RgFW42vj6/JEVk1jws8wGnBeXbcSVr/Y2+f1G61m1VLan9g4OQZHzl3w6zgBSrKh/ZolIye/SDaqrK+k2ZbStLeSZhGkAUvHOtoPuNc2qoyFu0+5/64aF4HjmXmay3tT4GV+N7PGpSnNWDdRYSEYdJX62EWlPRa9KVdtYDIzMwEACQnqaeP8/Hzk51+agTUrK8tjGWUbmLL21dB2ml0UfRFbTjIwZC6bn1VIepqqNMj1l5nZHCkLxy+mjNkjW5+J21htVYFM2xvx91PdUOR0lmreoE/v8W9eO6l4yTgqrsyJf4MdygOhpc9di61HM9HPy9gqn9x1FZbvO436VaKxcFc67uxQE7O2nUAjPx4IayZoZyPNygCVpgrJmysigHE6nRgxYgS6dOmC5s3V06Vjx47FmDFjvK5HVoUUhARMrya+jX2ipUqMegt/sja77Pg07wBtW6sivri3nWpPKl8FKpsTyAtZoFWOCcegq6rBBhviNEZH9YWZ21gtW1vaAKa0wwc47DY47MHviFAtPhLjBrVATERoqc436fYMddhQIyFKt/FudHiIe/C4ehfnSDM6ka1Sp3qV8NatLVSz4WEmzc0kb8RrXgBz2VYhSQ0bNgzbtm3DsmXq3UUBYNSoURg5cqT776ysLNSoIW9AJd2VwcjAmOWm1imYvumYbHh6sj7lOEVm0hs48PcnOhtaT6DOm9KOoBxs79/e2rR1uWYOTowu/YPKE9fWx5Yjmbi5TQqe/3UrAMCPdr8y1t5Tcnea0H5Puj3NzsYZJR0CQaq6n+2MlOTdqM37jZd9Bmb48OGYMWMGlixZgurVtSPU8PBwhIcbP+GD2RK/tMJDHPj+oY7BLgaZzOzeLEa9OqCp4S7JgTpvrNwGxmwp8ZFY81IvU9q6xUWGuie0dAUwpc/AlLpY5ZY/P016Ew5UFauvPrunLVbuP4NBV1UzZX3yHpKsQtIlhMCTTz6J33//HYsWLUKdOqXvtpok6fJs5QwMXZ6kPePK8uj07buYgSkL3mZBL63SVgGZNQL15SLYbYrUGJnfyBfSs15vMFZfXLbdqIcNG4YpU6Zg+vTpiImJwYkTJwAAcXFxiIz0byru2IhQzBvZ3WsXT6JgCWQVkje+PDUG6rQph/eAy1ZpH3ov5111W9vq+Hj+XnRroD7IpZryGMCYLRAzbgNABRM6tWgJfAd2LyZNmoTMzEz06NEDVatWdf/3008/lWq99atE+zU6IlGgybpRl0EE4+p50bme8fGMmLm0vtKm7V3Hi3JOtMtBjYQobB/TF996GVdJqV2tkp6xZk8+Wp5Is8NmtvO5qXWKbEBLMwW9Culy8NXQdnj4f+vw1q0tg10UKufKOjhY91IfnMrJ92kuGsYv1hcTUbpL+7hBLdG06iHT2leUN74OoJkcF4GVo3q65466HGkN1ldaoQ47PhncBn9tPgbA3DZE5aIRr9X1apKEPa9fXyYjMpK1lXU3/7ioUJ+7/TIDY13jBrXAmtSz7qkr/FWxQpglRrouS1Xj/GvWYBXSs95hYiPeQLJGKS2AwQsZEaxeSL4IVLF+eKgjEqPD8LkJA5SRujs71MT7t7fm9Yh85u88bcHEDAxRGQpWI15fBKqbaJf6iVj7Uu9y0w2ViC6RDWQXwADYzHZEDNOJypD0waY0w6wHUiAfvhi8EJVP0lMzEIP1jbmpGRonx2DkdQ1NWyczMERlyGazYdi19ZCRW4g6ieVntmWp8lq1RUSBIx28LhC9z4Z2ro2hnWubuk4GMERl7Nm+jYNdBK/a107A7xuPBrsYRFSGHHYbfn28E/KLnLJJMMszBjBEJHNH+xoIcdjQjvNwEV1R2l4c78YqGMAQkYzDbsPt7WroL0hEFERsxEtERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5QQ1gFmyZAkGDBiAlJQU2Gw2/PHHH8EsDhEREVlEUAOY8+fPo1WrVpgwYUIwi0FEREQWExLML7/++utx/fXXB7MIREREZEFBDWB8lZ+fj/z8fPffWVlZQSwNERERBYulGvGOHTsWcXFx7v9q1KgR7CIRERFREFgqgBk1ahQyMzPd/6WlpQW7SERERBQElqpCCg8PR3h4eLCLQUREREFmqQwMERERERDkDExOTg727dvn/vvgwYPYtGkTEhISULNmzSCWjIiIiMqzoAYw69atw7XXXuv+e+TIkQCAoUOHYvLkyUEqFREREZV3QQ1gevToASFEMItAREREFsQ2MERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwRERFZDgMYIiIishwGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCynXAQwEyZMQO3atREREYGOHTtizZo1wS4SERERlWNBD2B++uknjBw5Eq+++io2bNiAVq1aoW/fvkhPTw920YiIiKicCnoA8/777+Phhx/G/fffj6ZNm+LTTz9FVFQUvv7662AXjYiIiMqpoAYwBQUFWL9+PXr37u1+zW63o3fv3li5cmUQS0ZERETlWUgwv/z06dMoLi5GUlKS7PWkpCTs2rXLY/n8/Hzk5+e7/87MzAQAZGVlBbagREREZBrXfVsI4fc6ghrA+Grs2LEYM2aMx+s1atQIQmmIiIioNLKzsxEXF+fXZ4MawCQmJsLhcODkyZOy10+ePInk5GSP5UeNGoWRI0e6/87IyECtWrVw+PBhvzcAlU5WVhZq1KiBtLQ0xMbGBrs4VyTug+DjPgg+7oPg82UfCCGQnZ2NlJQUv78vqAFMWFgY2rZti/nz5+Pmm28GADidTsyfPx/Dhw/3WD48PBzh4eEer8fFxfGADbLY2FjugyDjPgg+7oPg4z4IPqP7oLSJh6BXIY0cORJDhw5Fu3bt0KFDB3z44Yc4f/487r///mAXjYiIiMqpoAcwd9xxB06dOoX//ve/OHHiBFq3bo1Zs2Z5NOwlIiIicgl6AAMAw4cPV60y0hMeHo5XX31VtVqJygb3QfBxHwQf90HwcR8EX1nvA5soTR8mIiIioiAI+ki8RERERL5iAENERESWwwCGiIiILIcBDBEREVmOpQOYCRMmoHbt2oiIiEDHjh2xZs2aYBfpsjB27Fi0b98eMTExqFKlCm6++Wbs3r1btkxeXh6GDRuGSpUqITo6GrfeeqvHiMqHDx9G//79ERUVhSpVquDZZ59FUVFRWf6Uy8K4ceNgs9kwYsQI92vc/mXj6NGjuPvuu1GpUiVERkaiRYsWWLdunft9IQT++9//omrVqoiMjETv3r2xd+9e2TrOnj2LIUOGIDY2FvHx8XjwwQeRk5NT1j/FcoqLi/HKK6+gTp06iIyMRL169fDaa6/J5s7h9jffkiVLMGDAAKSkpMBms+GPP/6QvW/WNt+yZQu6deuGiIgI1KhRA2+//bbvhRUWNXXqVBEWFia+/vprsX37dvHwww+L+Ph4cfLkyWAXzfL69u0rvvnmG7Ft2zaxadMmccMNN4iaNWuKnJwc9zKPPfaYqFGjhpg/f75Yt26duPrqq0Xnzp3d7xcVFYnmzZuL3r17i40bN4p//vlHJCYmilGjRgXjJ1nWmjVrRO3atUXLli3F008/7X6d2z/wzp49K2rVqiXuu+8+sXr1anHgwAExe/ZssW/fPvcy48aNE3FxceKPP/4QmzdvFjfddJOoU6eOuHDhgnuZfv36iVatWolVq1aJpUuXivr164vBgwcH4ydZyhtvvCEqVaokZsyYIQ4ePCimTZsmoqOjxUcffeRehtvffP/884946aWXxG+//SYAiN9//132vhnbPDMzUyQlJYkhQ4aIbdu2iR9//FFERkaKzz77zKeyWjaA6dChgxg2bJj77+LiYpGSkiLGjh0bxFJdntLT0wUAsXjxYiGEEBkZGSI0NFRMmzbNvczOnTsFALFy5UohRMlJYLfbxYkTJ9zLTJo0ScTGxor8/Pyy/QEWlZ2dLRo0aCDmzp0runfv7g5guP3LxvPPPy+6du2q+b7T6RTJycninXfecb+WkZEhwsPDxY8//iiEEGLHjh0CgFi7dq17mZkzZwqbzSaOHj0auMJfBvr37y8eeOAB2WuDBg0SQ4YMEUJw+5cFZQBj1jafOHGiqFixouxa9Pzzz4tGjRr5VD5LViEVFBRg/fr16N27t/s1u92O3r17Y+XKlUEs2eUpMzMTAJCQkAAAWL9+PQoLC2Xbv3HjxqhZs6Z7+69cuRItWrSQjajct29fZGVlYfv27WVYeusaNmwY+vfvL9vOALd/Wfnzzz/Rrl073HbbbahSpQratGmDL774wv3+wYMHceLECdl+iIuLQ8eOHWX7IT4+Hu3atXMv07t3b9jtdqxevbrsfowFde7cGfPnz8eePXsAAJs3b8ayZctw/fXXA+D2DwaztvnKlStxzTXXICwszL1M3759sXv3bpw7d85wecrFSLy+On36NIqLiz2mG0hKSsKuXbuCVKrLk9PpxIgRI9ClSxc0b94cAHDixAmEhYUhPj5etmxSUhJOnDjhXkZt/7jeI++mTp2KDRs2YO3atR7vcfuXjQMHDmDSpEkYOXIkXnzxRaxduxZPPfUUwsLCMHToUPd2VNvO0v1QpUoV2fshISFISEjgftDxwgsvICsrC40bN4bD4UBxcTHeeOMNDBkyBAC4/YPArG1+4sQJ1KlTx2MdrvcqVqxoqDyWDGCo7AwbNgzbtm3DsmXLgl2UK0ZaWhqefvppzJ07FxEREcEuzhXL6XSiXbt2ePPNNwEAbdq0wbZt2/Dpp59i6NChQS7d5e/nn3/GDz/8gClTpqBZs2bYtGkTRowYgZSUFG5/AmDRXkiJiYlwOBwevS5OnjyJ5OTkIJXq8jN8+HDMmDEDCxcuRPXq1d2vJycno6CgABkZGbLlpds/OTlZdf+43iNt69evR3p6Oq666iqEhIQgJCQEixcvxscff4yQkBAkJSVx+5eBqlWromnTprLXmjRpgsOHDwO4tB29XYeSk5ORnp4ue7+oqAhnz57lftDx7LPP4oUXXsCdd96JFi1a4J577sG///1vjB07FgC3fzCYtc3Nuj5ZMoAJCwtD27ZtMX/+fPdrTqcT8+fPR6dOnYJYssuDEALDhw/H77//jgULFnik+tq2bYvQ0FDZ9t+9ezcOHz7s3v6dOnXC1q1bZQfy3LlzERsb63FTILlevXph69at2LRpk/u/du3aYciQIe5/c/sHXpcuXTyGD9izZw9q1aoFAKhTpw6Sk5Nl+yErKwurV6+W7YeMjAysX7/evcyCBQvgdDrRsWPHMvgV1pWbmwu7XX6LcjgccDqdALj9g8Gsbd6pUycsWbIEhYWF7mXmzp2LRo0aGa4+AmDtbtTh4eFi8uTJYseOHeKRRx4R8fHxsl4X5J/HH39cxMXFiUWLFonjx4+7/8vNzXUv89hjj4maNWuKBQsWiHXr1olOnTqJTp06ud93deO97rrrxKZNm8SsWbNE5cqV2Y3XT9JeSEJw+5eFNWvWiJCQEPHGG2+IvXv3ih9++EFERUWJ77//3r3MuHHjRHx8vJg+fbrYsmWLGDhwoGqX0jZt2ojVq1eLZcuWiQYNGrAbrwFDhw4V1apVc3ej/u2330RiYqJ47rnn3Mtw+5svOztbbNy4UWzcuFEAEO+//77YuHGjOHTokBDCnG2ekZEhkpKSxD333CO2bdsmpk6dKqKioq6cbtRCCPHJJ5+ImjVrirCwMNGhQwexatWqYBfpsgBA9b9vvvnGvcyFCxfEE088ISpWrCiioqLELbfcIo4fPy5bT2pqqrj++utFZGSkSExMFP/5z39EYWFhGf+ay4MygOH2Lxt//fWXaN68uQgPDxeNGzcWn3/+uex9p9MpXnnlFZGUlCTCw8NFr169xO7du2XLnDlzRgwePFhER0eL2NhYcf/994vs7Oyy/BmWlJWVJZ5++mlRs2ZNERERIerWrSteeuklWddbbn/zLVy4UPX6P3ToUCGEedt88+bNomvXriI8PFxUq1ZNjBs3zuey2oSQDGtIREREZAGWbANDREREVzYGMERERGQ5DGCIiIjIchjAEBERkeUwgCEiIiLLYQBDRERElsMAhoiIiCyHAQwR+aVHjx4YMWJEmX7n6NGj0bp161Kvp3bt2vjwww9LvR4iCh4GMERXuGAEIv565plnZPOwENGVKyTYBSAiMio6OhrR0dHBLgYRlQPMwBBdwe677z4sXrwYH330EWw2G2w2G1JTUwEAixcvRocOHRAeHo6qVavihRdeQFFRkea6/v77b8TFxeGHH34AAKSlpeH2229HfHw8EhISMHDgQPe6Xd998803491330XVqlVRqVIlDBs2TDZDrZKyCsnIOtLT0zFgwABERkaiTp067vJJZWRk4KGHHkLlypURGxuLnj17YvPmzQCAU6dOITk5GW+++aZ7+RUrViAsLIzZIKIgYgBDdAX76KOP0KlTJzz88MM4fvw4jh8/jho1auDo0aO44YYb0L59e2zevBmTJk3CV199hddff111PVOmTMHgwYPxww8/YMiQISgsLETfvn0RExODpUuXYvny5YiOjka/fv1QUFDg/tzChQuxf/9+LFy4EN9++y0mT56MyZMn+/Qb9NZx3333IS0tDQsXLsQvv/yCiRMnIj09XbaO2267Denp6Zg5cybWr1+Pq666Cr169cLZs2dRuXJlfP311xg9ejTWrVuH7Oxs3HPPPRg+fDh69erlU1mJyER+TlhJRJcJ5UzXQgjx4osvikaNGgmn0+l+bcKECSI6OloUFxfLPjd+/HgRFxcnFi1a5F72u+++8/h8fn6+iIyMFLNnzxZCCDF06FBRq1YtUVRU5F7mtttuE3fccYdmWV999VXRqlUr999669i9e7cAINasWeN+f+fOnQKA+OCDD4QQQixdulTExsaKvLw82XfVq1dPfPbZZ+6/n3jiCdGwYUNx1113iRYtWngsT0Rli21giMjDzp070alTJ9hsNvdrXbp0QU5ODo4cOYKaNWsCAH755Rekp6dj+fLlaN++vXvZzZs3Y9++fYiJiZGtNy8vD/v373f/3axZMzgcDvffVatWxdatW30qq7d17Ny5EyEhIWjbtq37/caNGyM+Pl5W1pycHFSqVEm23gsXLsjK+u6776J58+aYNm0a1q9fj/DwcJ/KSUTmYgBDRH5r06YNNmzYgK+//hrt2rVzBzw5OTlo27atanuTypUru/8dGhoqe89ms8HpdPpUhtKuIycnB1WrVsWiRYs83pMGOvv378exY8fgdDqRmpqKFi1a+FROIjIXAxiiK1xYWBiKi4tlrzVp0gS//vorhBDuoGT58uWIiYlB9erV3cvVq1cP7733Hnr06AGHw4Hx48cDAK666ir89NNPqFKlCmJjY8vuxyg0btwYRUVFWL9+vTtDtHv3bmRkZLiXueqqq3DixAmEhISgdu3aquspKCjA3XffjTvuuAONGjXCQw89hK1bt6JKlSpl8CuISA0b8RJd4WrXro3Vq1cjNTUVp0+fhtPpxBNPPIG0tDQ8+eST2LVrF6ZPn45XX30VI0eOhN0uv2w0bNgQCxcuxK+//uoeT2bIkCFITEzEwIEDsXTpUhw8eBCLFi3CU089hSNHjpTZb2vUqBH69euHRx99FKtXr8b69evx0EMPITIy0r1M79690alTJ9x8882YM2cOUlNTsWLFCrz00ktYt24dAOCll15CZmYmPv74Yzz//PNo2LAhHnjggTL7HUTkiQEM0RXumWeegcPhQNOmTVG5cmUcPnwY1apVwz///IM1a9agVatWeOyxx/Dggw/i5ZdfVl1Ho0aNsGDBAvz444/4z3/+g6ioKCxZsgQ1a9bEoEGD0KRJEzz44IPIy8sr84zMN998g5SUFHTv3h2DBg3CI488Isuc2Gw2/PPPP7jmmmtw//33o2HDhrjzzjtx6NAhJCUlYdGiRfjwww/x3XffITY2Fna7Hd999x2WLl2KSZMmlelvIaJLbEIIEexCEBEREfmCGRgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5TCAISIiIsthAENERESWwwCGiIiILIcBDBEREVkOAxgiIiKyHAYwREREZDkMYIiIiMhyGMAQERGR5fw/UsrGXuidSMwAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import pyplot as plt\n", + "loss_per_token = model.loss_per_token(x,y)\n", + "loss = model.loss(x,y)\n", + "loss_per_token.mean().item(), loss.item()\n", + "# print(loss_per_token.shape)\n", + "\n", + "plt.plot(loss_per_token.mean(dim=0).detach().cpu().numpy())\n", + "plt.xlim(0, 1024)\n", + "plt.ylim(0, 5)\n", + "plt.xlabel('token index')\n", + "plt.ylabel('loss')\n", + "plt.title('loss per token')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.174387269895637" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import math\n", + "-math.log(1/vocab_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "class Head(nn.Module):\n", + " \"\"\" one head of self-attention \"\"\"\n", + "\n", + " def __init__(self, d_model, d_head):\n", + " super().__init__()\n", + " self.key = nn.Linear(d_model, d_head, bias=False)\n", + " self.query = nn.Linear(d_model, d_head, bias=False)\n", + " self.value = nn.Linear(d_model, d_head, bias=False)\n", + " self.register_buffer('tril', torch.tril(torch.ones(sequence_length, sequence_length)))\n", + "\n", + " # self.dropout = nn.Dropout(dropout)\n", + "\n", + " def forward(self, x):\n", + " B,T,C = x.shape\n", + " k = self.key(x) # (B,T,C)\n", + " q = self.query(x) # (B,T,C)\n", + " # compute attention scores (\"affinities\")\n", + " wei = q @ k.transpose(-2,-1) * C**-0.5 # (B, T, C) @ (B, C, T) -> (B, T, T)\n", + " wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)\n", + " wei = F.softmax(wei, dim=-1) # (B, T, T)\n", + " # wei = self.dropout(wei)\n", + " # perform the weighted aggregation of the values\n", + " v = self.value(x) # (B,T,C)\n", + " out = wei @ v # (B, T, T) @ (B, T, C) -> (B, T, C)\n", + " return out\n", + "\n", + "class MultiHeadAttention(nn.Module):\n", + " \"\"\" multiple heads of self-attention in parallel \"\"\"\n", + "\n", + " def __init__(self, num_heads, d_model, d_head):\n", + " super().__init__()\n", + " self.heads = nn.ModuleList([Head(d_model, d_head) for _ in range(num_heads)])\n", + " self.proj = nn.Linear(d_model, d_model)\n", + " # self.dropout = nn.Dropout(dropout)\n", + "\n", + " def forward(self, x):\n", + " out = torch.cat([h(x) for h in self.heads], dim=-1)\n", + " # out = self.dropout(self.proj(out))\n", + " out = self.proj(out)\n", + " return out\n", + "\n", + "class FeedFoward(nn.Module):\n", + " \"\"\" a simple linear layer followed by a non-linearity \"\"\"\n", + "\n", + " def __init__(self, d_model):\n", + " super().__init__()\n", + " self.net = nn.Sequential(\n", + " nn.Linear(d_model, 4 * d_model),\n", + " nn.ReLU(),\n", + " nn.Linear(4 * d_model, d_model),\n", + " # nn.Dropout(dropout),\n", + " )\n", + "\n", + " def forward(self, x):\n", + " return self.net(x)\n", + "\n", + "class Block(nn.Module):\n", + " \"\"\" Transformer block: communication followed by computation \"\"\"\n", + "\n", + " def __init__(self, d_model, d_head, num_heads):\n", + " # d_model: embedding dimension, num_heads: the number of heads we'd like\n", + " super().__init__()\n", + " d_head = d_model // num_heads\n", + " self.sa = MultiHeadAttention(num_heads, d_model, d_head)\n", + " self.ffwd = FeedFoward(d_model)\n", + " self.ln1 = nn.LayerNorm(d_model)\n", + " self.ln2 = nn.LayerNorm(d_model)\n", + "\n", + " def forward(self, x):\n", + " x = x + self.sa(self.ln1(x))\n", + " x = x + self.ffwd(self.ln2(x))\n", + " return x\n", + "\n", + "# super simple bigram model\n", + "class AttentionLM(nn.Module):\n", + "\n", + " def __init__(self, vocab_size, sequence_length, d_model, d_head, num_heads, n_layer):\n", + " super().__init__()\n", + " # each token directly reads off the logits for the next token from a lookup table\n", + " self.token_embedding_table = nn.Embedding(vocab_size, d_model)\n", + " self.position_embedding_table = nn.Embedding(sequence_length, d_model)\n", + " self.blocks = nn.Sequential(*[Block(d_model, d_head=d_head, num_heads=num_heads) for _ in range(n_layer)])\n", + " self.ln_f = nn.LayerNorm(d_model) # final layer norm\n", + " self.lm_head = nn.Linear(d_model, vocab_size)\n", + " print('number of parameters:', sum(p.numel() for p in self.parameters()))\n", + " \n", + " def forward(self, idx):\n", + " B, T = idx.shape\n", + "\n", + " # idx and targets are both (B,T) tensor of integers\n", + " tok_emb = self.token_embedding_table(idx) # (B,T,C)\n", + " pos_emb = self.position_embedding_table(torch.arange(T, device=tok_emb.device)) # (T,C)\n", + " x = tok_emb + pos_emb # (B,T,C)\n", + " x = self.blocks(x) # (B,T,C)\n", + " x = self.ln_f(x) # (B,T,C)\n", + " logits = self.lm_head(x) # (B,T,vocab_size)\n", + " return logits\n", + "\n", + " def loss(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length)\n", + " )\n", + " # loss: scalar\n", + " return loss\n", + " \n", + " def generate(self, token_indexes, max_new_tokens):\n", + " # token_indexes: (batch_size, sequence_length)\n", + " batch_size, sequence_length = token_indexes.shape\n", + " for _ in range(max_new_tokens):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " next_token_logits = logits[:, -1, :]\n", + " # next_token_logits: (batch_size, vocab_size)\n", + " next_token_probs = F.softmax(next_token_logits, dim=-1)\n", + " # next_token_probs: (batch_size, vocab_size)\n", + " next_token = torch.multinomial(next_token_probs, num_samples=1)\n", + " # next_token: (batch_size, 1)\n", + " token_indexes = torch.cat([token_indexes, next_token], dim=1)\n", + " # token_indexes: (batch_size, sequence_length+1)\n", + " return token_indexes\n", + " \n", + " def loss_per_token(self, token_indexes, targets):\n", + " logits = self(token_indexes)\n", + " # logits: (batch_size, sequence_length, vocab_size)\n", + " # targets: (batch_size, sequence_length)\n", + " batch_size, sequence_length, vocab_size = logits.shape\n", + " loss = F.cross_entropy(\n", + " logits.view(batch_size*sequence_length, vocab_size),\n", + " targets.view(batch_size*sequence_length),\n", + " reduction='none'\n", + " )\n", + " # loss: (batch_size*sequence_length)\n", + " return loss.view(batch_size, sequence_length)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overfit one batch\n", + "> Overfit a single batch of only a few examples (e.g. as little as two). To do so we increase the capacity of our model (e.g. add layers or filters) and verify that we can reach the lowest achievable loss (e.g. zero). I also like to visualize in the same plot both the label and the prediction and ensure that they end up aligning perfectly once we reach the minimum loss. If they do not, there is a bug somewhere and we cannot continue to the next stage.\n", + "\n", + "> The approach I like to take to finding a good model has two stages: first get a model large enough that it can overfit (i.e. focus on training loss) and then regularize it appropriately (give up some training loss to improve the validation loss). The reason I like these two stages is that if we are not able to reach a low error rate with any model at all that may again indicate some issues, bugs, or misconfiguration.\n", + "\n", + "https://karpathy.github.io/2019/04/25/recipe/" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of parameters: 15059009\n", + "steps: 0 loss: 4.373307228088379\n", + "steps: 100 loss: 1.3534984588623047\n", + "steps: 200 loss: 0.14664000272750854\n", + "steps: 300 loss: 0.007650444284081459\n", + "steps: 400 loss: 0.003188813803717494\n", + "steps: 500 loss: 0.0019986487459391356\n", + "steps: 600 loss: 0.0014852412277832627\n", + "steps: 700 loss: 0.0012111642863601446\n", + "steps: 800 loss: 0.0010455630254000425\n", + "steps: 900 loss: 0.0009370149346068501\n", + "steps: 999 loss: 0.0008622257155366242\n", + "validation loss: 11.710304260253906\n" + ] + } + ], + "source": [ + "from data_char import enc, get_batch\n", + "vocab_size = enc.n_vocab\n", + "sequence_length = 1024\n", + "d_model = 768\n", + "d_head = 64\n", + "n_layer = 2\n", + "num_heads = 12\n", + "\n", + "model = AttentionLM(vocab_size, sequence_length, d_model, d_head, num_heads, n_layer).cuda()\n", + "optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4)\n", + "batch_size = 32\n", + "context_length = 1024\n", + "iterations = 1000\n", + "x, y = get_batch(batch_size, context_length, 'train')\n", + "for steps in range(iterations):\n", + " # print(x[0], y[0])\n", + " x, y = x.cuda(), y.cuda()\n", + " loss = model.loss(x, y)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " if steps % 100 == 0:\n", + " print('steps:', steps, 'loss:', loss.item())\n", + "print('steps:', steps, 'loss:', loss.item())\n", + "\n", + "with torch.no_grad():\n", + " val_x, val_y = get_batch(1, context_length, 'val')\n", + " val_x, val_y = val_x.cuda(), val_y.cuda()\n", + " loss = model.loss(val_x, val_y)\n", + " print('validation loss:', loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0000499835818495" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.exp(4.998233271180652e-05)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'loss per token')" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAHHCAYAAAChjmJTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAtUklEQVR4nO3de1xVVf7/8fcB5IABBxEESVDTxBRNU2sIyyYpb5ldvl3MGjW7Y+bY1WpGnabwW01jjZdqJqtHWTrqpE1ppoZaecfLaJmX8kJ+VTSDA6KosH5/zM8TRzARkc2C1/PxOA89e6+992evzeXN2nuf7TLGGAEAAFgkwOkCAAAAzhQBBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGsNQ777wjl8ulHTt2OF1KrTN69Gi5XC4dOHDA6VIAnAIBBkCtUlhYqNGjR2vRokVOlwLgHCLAAKhVCgsLNWbMGAIMUMsRYABY58iRIyopKXG6DAAOIsAAtczEiRPVtm1bud1uxcfHKz09Xbm5uX5ttm7dqptvvllxcXEKCQlRkyZNdPvttysvL8/XZv78+eratasiIyMVFhampKQkPf3006fdvsvl0tChQzVlyhQlJSUpJCREnTp10pIlS8q03b17t+6++27FxsbK7Xarbdu2mjx5sl+bRYsWyeVyaerUqXr22Wd1/vnnq379+vJ6vWXWt2PHDsXExEiSxowZI5fLJZfLpdGjR/vafPHFF7riiit03nnnKTIyUv369dOmTZtOu187d+5Uy5YtlZycrH379kmScnNzNXz4cCUkJMjtdqtly5b63//9X79wtWPHDrlcLr388st688031aJFC7ndbnXp0kWrVq067XYBlC/I6QIAVJ3Ro0drzJgxSktL04MPPqjNmzdr0qRJWrVqlb7++mvVq1dPR48eVY8ePVRUVKSHH35YcXFx2r17tz755BPl5ubK4/Hom2++0XXXXaf27dvrT3/6k9xut7Zt26avv/66QnUsXrxY06ZN07Bhw+R2uzVx4kT17NlTK1euVHJysiRp3759+s1vfuMLPDExMZo7d66GDBkir9er4cOH+63zueeeU3BwsB577DEVFRUpODi4zHZjYmI0adIkPfjgg7rxxht10003SZLat28vSVqwYIF69eqlCy64QKNHj9bhw4f1t7/9TampqVqzZo2aNWtW7v58//33uvrqqxUVFaX58+crOjpahYWF6tatm3bv3q37779fiYmJWrp0qUaOHKk9e/Zo3Lhxfuv44IMPlJ+fr/vvv18ul0svvviibrrpJv3www+qV69ehfoVQCkGgJXefvttI8ls377dGGNMTk6OCQ4ONtdee60pLi72tRs/fryRZCZPnmyMMWbt2rVGkpk+ffop1/3Xv/7VSDL79+8/47okGUlm9erVvmk7d+40ISEh5sYbb/RNGzJkiGncuLE5cOCA3/K333678Xg8prCw0BhjTGZmppFkLrjgAt+0X7N//34jyYwaNarMvA4dOphGjRqZn376yTdt/fr1JiAgwPzud7/zTRs1apRv/zdt2mTi4+NNly5dzMGDB31tnnvuOXPeeeeZLVu2+G3jqaeeMoGBgWbXrl3GGGO2b99uJJmGDRv6LT979mwjyfz73/8+7T4BKItTSEAtsWDBAh09elTDhw9XQMAv39r33nuvIiIi9Omnn0qSPB6PJGnevHkqLCwsd12RkZGSpNmzZ1fqWpOUlBR16tTJ9z4xMVH9+vXTvHnzVFxcLGOMZs6cqb59+8oYowMHDvhePXr0UF5entasWeO3zoEDByo0NPSMazlhz549WrdunQYNGqSoqCjf9Pbt2+uaa67RnDlzyiyzceNGdevWTc2aNdOCBQvUoEED37zp06friiuuUIMGDfzqT0tLU3FxcZlTZrfddpvf8ldccYUk6Ycffqj0PgF1GQEGqCV27twpSUpKSvKbHhwcrAsuuMA3v3nz5hoxYoT+8Y9/KDo6Wj169NCECRP8rn+57bbblJqaqnvuuUexsbG6/fbb9c9//rPCYebCCy8sM61Vq1YqLCzU/v37tX//fuXm5urNN99UTEyM32vw4MGSpJycHL/lmzdvXvHOKMep+keSLrroIh04cECHDh3ym963b1+Fh4dr3rx5ioiI8Ju3detWffbZZ2XqT0tLK7f+xMREv/cnwszPP/98VvsF1FVcAwPUQX/5y180aNAgzZ49W59//rmGDRumjIwMLV++XE2aNFFoaKiWLFmizMxMffrpp/rss880bdo0XX311fr8888VGBh4Vts/EYTuvPNODRw4sNw2J65bOeFsRl8q6+abb9a7776rKVOm6P777/ebV1JSomuuuUZPPPFEucu2atXK7/2p+swYUzXFAnUMAQaoJZo2bSpJ2rx5sy644ALf9KNHj2r79u2+kYET2rVrp3bt2unZZ5/V0qVLlZqaqtdff11//vOfJUkBAQHq3r27unfvrldeeUUvvPCCnnnmGWVmZpZZ18m2bt1aZtqWLVtUv359311C4eHhKi4uPu26zpTL5Sp3eun+Odl3332n6OhonXfeeX7TX3rpJQUFBemhhx5SeHi47rjjDt+8Fi1aqKCgoMrrB1AxnEICaom0tDQFBwfrtdde8/ur/q233lJeXp769OkjSfJ6vTp+/Ljfsu3atVNAQICKiookSQcPHiyz/g4dOkiSr82vWbZsmd81LNnZ2Zo9e7auvfZaBQYGKjAwUDfffLNmzpypjRs3lll+//79p9/hU6hfv74klbl1vHHjxurQoYPeffddv3kbN27U559/rt69e5dZl8vl0ptvvqn/+Z//0cCBA/Xxxx/75t16661atmyZ5s2bV2a53NzcMn0MoGoxAgPUEjExMRo5cqTGjBmjnj176vrrr9fmzZs1ceJEdenSRXfeeaek/34OytChQ3XLLbeoVatWOn78uN577z1fqJCkP/3pT1qyZIn69Omjpk2bKicnRxMnTlSTJk3UtWvX09aSnJysHj16+N1GLf33s1lOGDt2rDIzM3XZZZfp3nvvVZs2bXTw4EGtWbNGCxYsKDdEVURoaKjatGmjadOmqVWrVoqKilJycrKSk5P10ksvqVevXkpJSdGQIUN8t1F7PB6/z4opLSAgQO+//75uuOEG3XrrrZozZ46uvvpqPf744/r444913XXXadCgQerUqZMOHTqkDRs2aMaMGdqxY4eio6MrtQ8AKsDZm6AAVNbJt1GfMH78eNO6dWtTr149Exsbax588EHz888/++b/8MMP5u677zYtWrQwISEhJioqyvz2t781CxYs8LVZuHCh6devn4mPjzfBwcEmPj7e9O/fv8wtw+WRZNLT0837779vLrzwQuN2u03Hjh1NZmZmmbb79u0z6enpJiEhwdSrV8/ExcWZ7t27mzfffNPX5sRt1L922/fJli5dajp16mSCg4PL3FK9YMECk5qaakJDQ01ERITp27ev+fbbb/2WL30b9QmFhYWmW7duJiwszCxfvtwYY0x+fr4ZOXKkadmypQkODjbR0dHm8ssvNy+//LI5evSoMeaX26hfeumlcvuqvNu9AZyeyxiuIANQdVwul9LT0zV+/HinSwFQi3ENDAAAsA4BBgAAWIcAAwAArONogBk9erTvabEnXq1bt3ayJABnyRjD9S8AzjnHb6Nu27atFixY4HsfFOR4SQAAoIZzPC0EBQUpLi7O6TIAAIBFHA8wW7duVXx8vEJCQpSSkqKMjIwyDz07oaioyO9TQEtKSnTw4EE1bNjwlB8fDgAAahZjjPLz8xUfH6+AgMpdzeLo58DMnTtXBQUFSkpK0p49ezRmzBjt3r1bGzduVHh4eJn2o0eP9vskTwAAYK/s7Gw1adKkUsvWqA+yy83NVdOmTfXKK69oyJAhZeafPAKTl5enxMREZWdnl3nUPQAAqJm8Xq8SEhKUm5srj8dTqXU4fgqptMjISLVq1Urbtm0rd77b7Zbb7S4zPSIiggADAIBlzubyjxr1OTAFBQX6/vvv1bhxY6dLAQAANZijAeaxxx7T4sWLtWPHDi1dulQ33nijAgMD1b9/fyfLAgAANZyjp5B+/PFH9e/fXz/99JNiYmLUtWtXLV++XDExMU6WBQAAajhHA8zUqVOd3DwAALBUjboGBgAAoCIIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYp8YEmLFjx8rlcmn48OFOlwIAAGq4GhFgVq1apTfeeEPt27d3uhQAAGABxwNMQUGBBgwYoL///e9q0KCB0+UAAAALOB5g0tPT1adPH6WlpTldCgAAsESQkxufOnWq1qxZo1WrVlWofVFRkYqKinzvvV7vuSoNAADUYI6NwGRnZ+uRRx7RlClTFBISUqFlMjIy5PF4fK+EhIRzXCUAAKiJXMYY48SGZ82apRtvvFGBgYG+acXFxXK5XAoICFBRUZHfPKn8EZiEhATl5eUpIiKi2moHAACV5/V65fF4zur3t2OnkLp3764NGzb4TRs8eLBat26tJ598skx4kSS32y23211dJQIAgBrKsQATHh6u5ORkv2nnnXeeGjZsWGY6AABAaY7fhQQAAHCmHL0L6WSLFi1yugQAAGABRmAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArONogJk0aZLat2+viIgIRUREKCUlRXPnznWyJAAAYAFHA0yTJk00duxYZWVlafXq1br66qvVr18/ffPNN06WBQAAajiXMcY4XURpUVFReumllzRkyJDTtvV6vfJ4PMrLy1NEREQ1VAcAAM5WVfz+DqrimiqtuLhY06dP16FDh5SSklJum6KiIhUVFfnee73e6ioPAADUII5fxLthwwaFhYXJ7XbrgQce0EcffaQ2bdqU2zYjI0Mej8f3SkhIqOZqAQBATeD4KaSjR49q165dysvL04wZM/SPf/xDixcvLjfElDcCk5CQwCkkAAAsUhWnkBwPMCdLS0tTixYt9MYbb5y2LdfAAABgn6r4/e34KaSTlZSU+I2yAAAAnMzRi3hHjhypXr16KTExUfn5+frggw+0aNEizZs3z8myAABADedogMnJydHvfvc77dmzRx6PR+3bt9e8efN0zTXXOFkWAACo4RwNMG+99ZaTmwcAAJaqcdfAAAAAnA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDqVCjDvvvuuPv30U9/7J554QpGRkbr88su1c+fOKisOAACgPJUKMC+88IJCQ0MlScuWLdOECRP04osvKjo6Wr///e+rtEAAAICTBVVmoezsbLVs2VKSNGvWLN1888267777lJqaqquuuqoq6wMAACijUiMwYWFh+umnnyRJn3/+ua655hpJUkhIiA4fPlx11QEAAJSjUiMw11xzje655x517NhRW7ZsUe/evSVJ33zzjZo1a1aV9QEAAJRRqRGYCRMmKCUlRfv379fMmTPVsGFDSVJWVpb69+9fpQUCAACczGWMMU4XUVler1cej0d5eXmKiIhwuhwAAFABVfH7u1IjMJ999pm++uor3/sJEyaoQ4cOuuOOO/Tzzz9XqhAAAICKqlSAefzxx+X1eiVJGzZs0KOPPqrevXtr+/btGjFiRJUWCAAAcLJKXcS7fft2tWnTRpI0c+ZMXXfddXrhhRe0Zs0a3wW9AAAA50qlRmCCg4NVWFgoSVqwYIGuvfZaSVJUVJRvZAYAAOBcqdQITNeuXTVixAilpqZq5cqVmjZtmiRpy5YtatKkSZUWCAAAcLJKjcCMHz9eQUFBmjFjhiZNmqTzzz9fkjR37lz17NmzSgsEAAA4GbdRAwCAalUVv78rdQpJkoqLizVr1ixt2rRJktS2bVtdf/31CgwMrOwqAQAAKqRSAWbbtm3q3bu3du/eraSkJElSRkaGEhIS9Omnn6pFixZVWiQAAEBplboGZtiwYWrRooWys7O1Zs0arVmzRrt27VLz5s01bNiwqq4RAADAT6VGYBYvXqzly5crKirKN61hw4YaO3asUlNTq6w4AACA8lRqBMbtdis/P7/M9IKCAgUHB591UQAAAL+mUgHmuuuu03333acVK1bIGCNjjJYvX64HHnhA119/fVXXCAAA4KdSAea1115TixYtlJKSopCQEIWEhOjyyy9Xy5YtNW7cuCouEQAAwF+lroGJjIzU7NmztW3bNt9t1BdddJFatmxZpcUBAACUp8IB5nRPmc7MzPT9/5VXXql8RQAAAKdR4QCzdu3aCrVzuVyVLgYAAKAiKhxgSo+wAAAAOKlSF/ECAAA4iQADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUcDTAZGRnq0qWLwsPD1ahRI91www3avHmzkyUBAAALOBpgFi9erPT0dC1fvlzz58/XsWPHdO211+rQoUNOlgUAAGo4lzHGOF3ECfv371ejRo20ePFiXXnlladt7/V65fF4lJeXp4iIiGqoEAAAnK2q+P1do66BycvLkyRFRUU5XAkAAKjJgpwu4ISSkhINHz5cqampSk5OLrdNUVGRioqKfO+9Xm91lQcAAGqQGjMCk56ero0bN2rq1KmnbJORkSGPx+N7JSQkVGOFAACgpqgR18AMHTpUs2fP1pIlS9S8efNTtitvBCYhIYFrYAAAsEhVXAPj6CkkY4wefvhhffTRR1q0aNGvhhdJcrvdcrvd1VQdAACoqRwNMOnp6frggw80e/ZshYeHa+/evZIkj8ej0NBQJ0sDAAA1mKOnkFwuV7nT3377bQ0aNOi0y3MbNQAA9qkVp5AAAADOVI25CwkAAKCiCDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArONogFmyZIn69u2r+Ph4uVwuzZo1y8lyAACAJRwNMIcOHdLFF1+sCRMmOFkGAACwTJCTG+/Vq5d69erlZAkAAMBCjgaYM1VUVKSioiLfe6/X62A1AADAKVZdxJuRkSGPx+N7JSQkOF0SAABwgFUBZuTIkcrLy/O9srOznS4JAAA4wKpTSG63W2632+kyAACAw6wagQEAAJAcHoEpKCjQtm3bfO+3b9+udevWKSoqSomJiQ5WBgAAajJHA8zq1av129/+1vd+xIgRkqSBAwfqnXfecagqAABQ0zkaYK666ioZY5wsAQAAWIhrYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWIcAAwAArEOAAQAA1iHAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGAdAgwAALAOAQYAAFiHAAMAAKxDgAEAANYhwAAAAOsQYAAAgHUIMAAAwDoEGAAAYB0CDAAAsA4BBgAAWKdWBJjdPxc6XQIAAKhGtSLA5B0+5nQJAACgGtWKAHO8xDhdAgAAqEa1IsAUl5Q4XQIAAKhGtSLAHC92ugIAAFCdakWAKeYUEgAAdUqtCDDHDQEGAIC6pHYEmGKugQEAoC6pHQGGU0gAANQptSLAlBBgAACoU2pFgDnGKSQAAOqUWhFguAsJAIC6pVYEGK6BAQCgbiHAAAAA69SKAFPCowQAAKhTakWAYQQGAIC6pXYEmGICDAAAdUntCDCMwAAAUKfUigDDbdQAANQttSLAcAoJAIC6pVYEGEZgAACoW2pFgDluCDAAANQltSPA8CwkAADqlFoRYDiFBABA3UKAAQAA1qkVAeY4jxIAAKBOqR0BhtuoAQCoU2pFgOEUEgAAdUutCDA8SgAAgLqlVgSYYq6BAQCgTqkVAabwWLHTJQAAgGpUKwJMbuFxp0sAAADVqEYEmAkTJqhZs2YKCQnRZZddppUrV57R8rmFR89RZQAAoCZyPMBMmzZNI0aM0KhRo7RmzRpdfPHF6tGjh3Jyciq8jtzDBBgAAOoSxwPMK6+8onvvvVeDBw9WmzZt9Prrr6t+/fqaPHlyhddRcKRYx3geEgAAdUaQkxs/evSosrKyNHLkSN+0gIAApaWladmyZWXaFxUVqaioyPfe6/X6/v/EjP/IHRQgl8sll0tySf//X9c53YfSjCp2O/epHp59uqVP7NN////LfpXebk16MLerCru+JuzXmexPVdT7a9ur7v4o7+tO+uVr73T1nE25p+qGU/WPMb9sr3Rdv+xD5Y5lVXb5uT5+pffPdYrppeuoAd9ePqWPz8n11YSfA5VRlT8Lz6X/9m/pTnb5fd9UyTb+/79FhQVnvS5HA8yBAwdUXFys2NhYv+mxsbH67rvvyrTPyMjQmDFjyl3XR2t3n5MaAQBA1SopKjzrdTgaYM7UyJEjNWLECN97r9erhIQEDeveUqHnhcsYI2OkEvPfvwxrclo/VSL/tRGj0vtkdOZ/mZ7p9iriVKNO56Lva8pfMcZUrJaz6duKjOZV1+hieaMspb/+TjU6c67q+NU25peR19J/OZb+C76iI6Wlndi3mvI1+GvK+947eZ9P/hquzpHq0yn99VZ6FL2qRwKqSw3+NeTHGCnA5T/6dbbfNycrfSyPHCrQk+PObn2OBpjo6GgFBgZq3759ftP37dunuLi4Mu3dbrfcbneZ6fdd2UIRERHnrE4AAFB1vF6vnjzLdTh6EW9wcLA6deqkhQsX+qaVlJRo4cKFSklJcbAyAABQkzl+CmnEiBEaOHCgOnfurEsvvVTjxo3ToUOHNHjwYKdLAwAANZTjAea2227T/v379cc//lF79+5Vhw4d9Nlnn5W5sBcAAOAElzE1+VLXX+f1euXxeJSXl8c1MAAAWKIqfn87PgJzNk5kr9KfBwMAAGq2E7+3z2YMxeoA89NPP0mSEhISHK4EAACcqfz8fHk8nkota3WAiYqKkiTt2rWr0h2As3Pis3iys7M5jecQjoHzOAbO4xg470yOgTFG+fn5io+Pr/T2rA4wAQH/vQvc4/HwBeuwiIgIjoHDOAbO4xg4j2PgvIoeg7MdeHD8YY4AAABnigADAACsY3WAcbvdGjVqVLmPF0D14Bg4j2PgPI6B8zgGzqvuY2D158AAAIC6yeoRGAAAUDcRYAAAgHUIMAAAwDoEGAAAYB2rA8yECRPUrFkzhYSE6LLLLtPKlSudLqlWyMjIUJcuXRQeHq5GjRrphhtu0ObNm/3aHDlyROnp6WrYsKHCwsJ08803a9++fX5tdu3apT59+qh+/fpq1KiRHn/8cR0/frw6d6VWGDt2rFwul4YPH+6bRv9Xj927d+vOO+9Uw4YNFRoaqnbt2mn16tW++cYY/fGPf1Tjxo0VGhqqtLQ0bd261W8dBw8e1IABAxQREaHIyEgNGTJEBQUF1b0r1ikuLtYf/vAHNW/eXKGhoWrRooWee+45v2fn0P9Vb8mSJerbt6/i4+Plcrk0a9Ysv/lV1ef/+c9/dMUVVygkJEQJCQl68cUXz7xYY6mpU6ea4OBgM3nyZPPNN9+Ye++910RGRpp9+/Y5XZr1evToYd5++22zceNGs27dOtO7d2+TmJhoCgoKfG0eeOABk5CQYBYuXGhWr15tfvOb35jLL7/cN//48eMmOTnZpKWlmbVr15o5c+aY6OhoM3LkSCd2yVorV640zZo1M+3btzePPPKIbzr9f+4dPHjQNG3a1AwaNMisWLHC/PDDD2bevHlm27ZtvjZjx441Ho/HzJo1y6xfv95cf/31pnnz5ubw4cO+Nj179jQXX3yxWb58ufnyyy9Ny5YtTf/+/Z3YJas8//zzpmHDhuaTTz4x27dvN9OnTzdhYWHm1Vdf9bWh/6venDlzzDPPPGP+9a9/GUnmo48+8ptfFX2el5dnYmNjzYABA8zGjRvNhx9+aEJDQ80bb7xxRrVaG2AuvfRSk56e7ntfXFxs4uPjTUZGhoNV1U45OTlGklm8eLExxpjc3FxTr149M336dF+bTZs2GUlm2bJlxpj/fhMEBASYvXv3+tpMmjTJREREmKKiourdAUvl5+ebCy+80MyfP99069bNF2Do/+rx5JNPmq5du55yfklJiYmLizMvvfSSb1pubq5xu93mww8/NMYY8+233xpJZtWqVb42c+fONS6Xy+zevfvcFV8L9OnTx9x9991+02666SYzYMAAYwz9Xx1ODjBV1ecTJ040DRo08PtZ9OSTT5qkpKQzqs/KU0hHjx5VVlaW0tLSfNMCAgKUlpamZcuWOVhZ7ZSXlyfpl4dnZmVl6dixY37937p1ayUmJvr6f9myZWrXrp1iY2N9bXr06CGv16tvvvmmGqu3V3p6uvr06ePXzxL9X10+/vhjde7cWbfccosaNWqkjh076u9//7tv/vbt27V3716/4+DxeHTZZZf5HYfIyEh17tzZ1yYtLU0BAQFasWJF9e2MhS6//HItXLhQW7ZskSStX79eX331lXr16iWJ/ndCVfX5smXLdOWVVyo4ONjXpkePHtq8ebN+/vnnCtdj5cMcDxw4oOLiYr8fzpIUGxur7777zqGqaqeSkhINHz5cqampSk5OliTt3btXwcHBioyM9GsbGxurvXv3+tqUd3xOzMOvmzp1qtasWaNVq1aVmUf/V48ffvhBkyZN0ogRI/T0009r1apVGjZsmIKDgzVw4EBfP5bXz6WPQ6NGjfzmBwUFKSoqiuNwGk899ZS8Xq9at26twMBAFRcX6/nnn9eAAQMkif53QFX1+d69e9W8efMy6zgxr0GDBhWqx8oAg+qTnp6ujRs36quvvnK6lDojOztbjzzyiObPn6+QkBCny6mzSkpK1LlzZ73wwguSpI4dO2rjxo16/fXXNXDgQIerq/3++c9/asqUKfrggw/Utm1brVu3TsOHD1d8fDz9D0mW3oUUHR2twMDAMndd7Nu3T3FxcQ5VVfsMHTpUn3zyiTIzM9WkSRPf9Li4OB09elS5ubl+7Uv3f1xcXLnH58Q8nFpWVpZycnJ0ySWXKCgoSEFBQVq8eLFee+01BQUFKTY2lv6vBo0bN1abNm38pl100UXatWuXpF/68dd+DsXFxSknJ8dv/vHjx3Xw4EGOw2k8/vjjeuqpp3T77berXbt2uuuuu/T73/9eGRkZkuh/J1RVn1fVzycrA0xwcLA6deqkhQsX+qaVlJRo4cKFSklJcbCy2sEYo6FDh+qjjz7SF198UWaor1OnTqpXr55f/2/evFm7du3y9X9KSoo2bNjg94U8f/58RURElPmlAH/du3fXhg0btG7dOt+rc+fOGjBggO//9P+5l5qaWubjA7Zs2aKmTZtKkpo3b664uDi/4+D1erVixQq/45Cbm6usrCxfmy+++EIlJSW67LLLqmEv7FVYWKiAAP9fUYGBgSopKZFE/zuhqvo8JSVFS5Ys0bFjx3xt5s+fr6SkpAqfPpJk923UbrfbvPPOO+bbb7819913n4mMjPS76wKV8+CDDxqPx2MWLVpk9uzZ43sVFhb62jzwwAMmMTHRfPHFF2b16tUmJSXFpKSk+OafuI332muvNevWrTOfffaZiYmJ4TbeSip9F5Ix9H91WLlypQkKCjLPP/+82bp1q5kyZYqpX7++ef/9931txo4dayIjI83s2bPNf/7zH9OvX79ybynt2LGjWbFihfnqq6/MhRdeyG28FTBw4EBz/vnn+26j/te//mWio6PNE0884WtD/1e9/Px8s3btWrN27Vojybzyyitm7dq1ZufOncaYqunz3NxcExsba+666y6zceNGM3XqVFO/fv26cxu1Mcb87W9/M4mJiSY4ONhceumlZvny5U6XVCtIKvf19ttv+9ocPnzYPPTQQ6ZBgwamfv365sYbbzR79uzxW8+OHTtMr169TGhoqImOjjaPPvqoOXbsWDXvTe1wcoCh/6vHv//9b5OcnGzcbrdp3bq1efPNN/3ml5SUmD/84Q8mNjbWuN1u0717d7N582a/Nj/99JPp37+/CQsLMxEREWbw4MEmPz+/OnfDSl6v1zzyyCMmMTHRhISEmAsuuMA888wzfrfe0v9VLzMzs9yf/wMHDjTGVF2fr1+/3nTt2tW43W5z/vnnm7Fjx55xrS5jSn2sIQAAgAWsvAYGAADUbQQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAVMpVV12l4cOHV+s2R48erQ4dOpz1epo1a6Zx48ad9XoAOIcAA9RxTgSRynrsscf8nsMCoO4KcroAAKiosLAwhYWFOV0GgBqAERigDhs0aJAWL16sV199VS6XSy6XSzt27JAkLV68WJdeeqncbrcaN26sp556SsePHz/luj799FN5PB5NmTJFkpSdna1bb71VkZGRioqKUr9+/XzrPrHtG264QS+//LIaN26shg0bKj093e8JtSc7+RRSRdaRk5Ojvn37KjQ0VM2bN/fVV1pubq7uuecexcTEKCIiQldffbXWr18vSdq/f7/i4uL0wgsv+NovXbpUwcHBjAYBDiLAAHXYq6++qpSUFN17773as2eP9uzZo4SEBO3evVu9e/dWly5dtH79ek2aNElvvfWW/vznP5e7ng8++ED9+/fXlClTNGDAAB07dkw9evRQeHi4vvzyS3399dcKCwtTz549dfToUd9ymZmZ+v7775WZmal3331X77zzjt55550z2ofTrWPQoEHKzs5WZmamZsyYoYkTJyonJ8dvHbfccotycnI0d+5cZWVl6ZJLLlH37t118OBBxcTEaPLkyRo9erRWr16t/Px83XXXXRo6dKi6d+9+RrUCqEKVfGAlgFri5CddG2PM008/bZKSkkxJSYlv2oQJE0xYWJgpLi72W278+PHG4/GYRYsW+dq+9957ZZYvKioyoaGhZt68ecYYYwYOHGiaNm1qjh8/7mtzyy23mNtuu+2UtY4aNcpcfPHFvvenW8fmzZuNJLNy5Urf/E2bNhlJ5q9//asxxpgvv/zSREREmCNHjvhtq0WLFuaNN97wvX/ooYdMq1atzB133GHatWtXpj2A6sU1MADK2LRpk1JSUuRyuXzTUlNTVVBQoB9//FGJiYmSpBkzZignJ0dff/21unTp4mu7fv16bdu2TeHh4X7rPXLkiL7//nvf+7Zt2yowMND3vnHjxtqwYcMZ1fpr69i0aZOCgoLUqVMn3/zWrVsrMjLSr9aCggI1bNjQb72HDx/2q/Xll19WcnKypk+frqysLLnd7jOqE0DVIsAAqLSOHTtqzZo1mjx5sjp37uwLPAUFBerUqVO515vExMT4/l+vXj2/eS6XSyUlJWdUw9muo6CgQI0bN9aiRYvKzCsddL7//nv93//9n0pKSrRjxw61a9fujOoEULUIMEAdFxwcrOLiYr9pF110kWbOnCljjC+UfP311woPD1eTJk187Vq0aKG//OUvuuqqqxQYGKjx48dLki655BJNmzZNjRo1UkRERPXtzElat26t48ePKysryzdCtHnzZuXm5vraXHLJJdq7d6+CgoLUrFmzctdz9OhR3XnnnbrtttuUlJSke+65Rxs2bFCjRo2qYS8AlIeLeIE6rlmzZlqxYoV27NihAwcOqKSkRA899JCys7P18MMP67vvvtPs2bM1atQojRgxQgEB/j82WrVqpczMTM2cOdP3eTIDBgxQdHS0+vXrpy+//FLbt2/XokWLNGzYMP3444/Vtm9JSUnq2bOn7r//fq1YsUJZWVm65557FBoa6muTlpamlJQU3XDDDfr888+1Y8cOLV26VM8884xWr14tSXrmmWeUl5en1157TU8++aRatWqlu+++u9r2A0BZBBigjnvssccUGBioNm3aKCYmRrt27dL555+vOXPmaOXKlbr44ov1wAMPaMiQIXr22WfLXUdSUpK++OILffjhh3r00UdVv359LVmyRImJibrpppt00UUXaciQITpy5Ei1j8i8/fbbio+PV7du3XTTTTfpvvvu8xs5cblcmjNnjq688koNHjxYrVq10u23366dO3cqNjZWixYt0rhx4/Tee+8pIiJCAQEBeu+99/Tll19q0qRJ1bovAH7hMsYYp4sAAAA4E4zAAAAA6xBgAACAdQgwAADAOgQYAABgHQIMAACwDgEGAABYhwADAACsQ4ABAADWIcAAAADrEGAAAIB1CDAAAMA6BBgAAGCd/wemLfX1IHEClwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import pyplot as plt\n", + "loss_per_token = model.loss_per_token(x,y)\n", + "loss = model.loss(x,y)\n", + "loss_per_token.mean().item(), loss.item()\n", + "# print(loss_per_token.shape)\n", + "\n", + "plt.plot(loss_per_token.mean(dim=0).detach().cpu().numpy())\n", + "plt.xlim(0, 1024)\n", + "plt.ylim(-0.1, 5)\n", + "plt.xlabel('token index')\n", + "plt.ylabel('loss')\n", + "plt.title('loss per token')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "- https://www.youtube.com/watch?v=kCc8FmEb1nY \n", + "- https://github.com/karpathy/ng-video-lecture \n", + "- https://colab.research.google.com/drive/1JMLa53HDuA-i7ZBmqV7ZnA3c_fvtXnx-?usp=sharing " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/playground/data.py b/playground/data.py new file mode 100644 index 0000000..d6b43ab --- /dev/null +++ b/playground/data.py @@ -0,0 +1,21 @@ +import torch +import tiktoken + +with open('input.txt', 'r', encoding='utf-8') as f: + text = f.read() + +enc = tiktoken.get_encoding("gpt2") +seed = 1337 +torch.manual_seed(seed) +data = torch.tensor(enc.encode(text), dtype=torch.long) +n = int(0.9*len(data)) # first 90% will be train, rest val +train_data = data[:n] +val_data = data[n:] + + +def get_batch(batch_size, context_length, split='train'): + data = train_data if split == 'train' else val_data + index = torch.randint(len(data) - context_length, (batch_size,)) + x = torch.stack([data[i:i+context_length] for i in index]) + y = torch.stack([data[i+1:i+1+context_length] for i in index]) + return x, y \ No newline at end of file diff --git a/playground/data_char.py b/playground/data_char.py new file mode 100644 index 0000000..2baccfd --- /dev/null +++ b/playground/data_char.py @@ -0,0 +1,33 @@ +import torch + +with open('input.txt', 'r', encoding='utf-8') as f: + text = f.read() + +class CharTokenizer: + def __init__(self, text): + self.vocab = sorted(list(set(text))) + self.n_vocab = len(self.vocab) + self.encoder = {k: v for v, k in enumerate(self.vocab)} + self.decoder = {v: k for k, v in self.encoder.items()} + + def encode(self, text): + return [self.encoder[c] for c in text] + + def decode(self, tokens): + return [self.decoder[t] for t in tokens] + +seed = 1337 +torch.manual_seed(seed) +enc = CharTokenizer(text) +data = torch.tensor(enc.encode(text), dtype=torch.long) +n = int(0.9*len(data)) # first 90% will be train, rest val +train_data = data[:n] +val_data = data[n:] + + +def get_batch(batch_size, context_length, split='train'): + data = train_data if split == 'train' else val_data + index = torch.randint(len(data) - context_length, (batch_size,)) + x = torch.stack([data[i:i+context_length] for i in index]) + y = torch.stack([data[i+1:i+1+context_length] for i in index]) + return x, y \ No newline at end of file diff --git a/playground/model.py b/playground/model.py new file mode 100644 index 0000000..3848bb1 --- /dev/null +++ b/playground/model.py @@ -0,0 +1,5 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + +torch.manual_seed(1337) \ No newline at end of file diff --git a/playground/ngram.py b/playground/ngram.py new file mode 100644 index 0000000..0bb9df3 --- /dev/null +++ b/playground/ngram.py @@ -0,0 +1,149 @@ +from collections import defaultdict +import torch +from torch.nn import functional as F +import math +import copy + + +class Ngram: + def __init__(self, n, vocab, laplace=1): + self.n = n + self.vocab = vocab + self.laplace = laplace + self.ngram = defaultdict(lambda: laplace) + self.context_count = defaultdict(lambda: laplace * len(self.vocab)) + + def train(self, token_list): + assert isinstance(token_list, list) + for i in range(len(token_list) - self.n + 1): + ngram_list = copy.deepcopy(token_list[i:i+self.n]) + ngram_list = [str(i) for i in ngram_list] + context = ngram_list[:-1] + ngram_key = '-'.join(ngram_list) + context_key = '-'.join(context) + self.ngram[ngram_key] += 1 + self.context_count[context_key] += 1 + # print(ngram_key, context_key) + + + def train_batch(self, token_list): + for tokens in token_list: + self.train(tokens) + + def get_prob(self, ngram): + if self.n == 1: + return self.ngram[ngram] / len(self.vocab) + else: + context = ngram.split('-')[:-1] + context = '-'.join(context) + # if self.context_count[context] == 0: + # return 1 / len(self.vocab) + # else: + # if self.ngram[ngram] == 0: + # return 1e-20 + # return self.ngram[ngram] / self.context_count[context] + return self.ngram[ngram] / self.context_count[context] + + def get_prob_distribution(self, n_minus_1_gram): + distribution = [] + distribution_dict = {} + for word in self.vocab: + ngram_list = n_minus_1_gram + [word] + ngram = '-'.join([str(i) for i in ngram_list]) + # print('hi', ngram) + distribution.append(self.get_prob(ngram)) + distribution_dict[word] = self.get_prob(ngram) + return distribution, distribution_dict + + def forward(self, token_indexes): + # token_index: (batch_size, sequence_length) + if isinstance(token_indexes, torch.Tensor) or isinstance(token_indexes, torch.LongTensor): + token_indexes = token_indexes.tolist() + assert isinstance(token_indexes, list) + batch_size = len(token_indexes) + sequence_length = len(token_indexes[0]) + distributions = torch.ones(batch_size, sequence_length, len(self.vocab)) + distributions /= len(self.vocab) + for i in range(sequence_length): + for batch in range(batch_size): + if self.n == 2: + context = [token_indexes[batch][i]] + else: + if i < self.n - 1: + if i == 0: + context = [token_indexes[batch][i]] + else: + context = token_indexes[batch][:i+1] + else: + context = token_indexes[batch][i-self.n+2:i+1] + distribution, _ = self.get_prob_distribution(context) + distributions[batch, i] = torch.tensor(distribution) + # distributions: (batch_size, sequence_length, vocab_size) + return distributions + + def loss(self, token_indexes, targets): + # token_indexes: (batch_size, sequence_length) + # targets: (batch_size, sequence_length) + distributions = self.forward(token_indexes) + distributions = distributions.to(targets.device) + log_distributions = torch.log(distributions) + # print(log_distributions) + # targets: (batch_size, sequence_length) + batch_size, sequence_length, vocab_size = log_distributions.shape + loss = F.nll_loss( + log_distributions.view(batch_size*sequence_length, vocab_size), + targets.view(batch_size*sequence_length) + ) + # loss: scalar + return loss + + + +if __name__ == "__main__": + vocab_str = ["I", "am", "an", "NLPer", "a", "student", "in", "Tokyo", "University"] + tokenizer = { + "I": 0, + "am": 1, + "an": 2, + "NLPer": 3, + "a": 4, + "student": 5, + "in": 6, + "Tokyo": 7, + "University": 8 + } + decoder = {v: k for k, v in tokenizer.items()} + text = "I am an NLPer" + words = text.split() + words_token = [tokenizer[word] for word in words] + ngram = Ngram(2, tokenizer.values()) + # distribution, distribution_dict = ngram.get_prob_distribution((tokenizer["I"],)) + # print(distribution) + # print(distribution_dict) + x, y = words[:-1], words[1:] + x = [[tokenizer[word] for word in x]] + y = torch.tensor([[tokenizer[word] for word in y]]) + print(x, y) + loss = ngram.loss(x, y) + print(loss) + + ngram.train(words_token) + distribution, distribution_dict = ngram.get_prob_distribution((tokenizer["I"],)) + print(distribution) + print(distribution_dict) + loss = ngram.loss(x, y) + print(loss) + + ngram = Ngram(2, tokenizer.values(), 1e-5) + ngram.train(words_token) + loss = ngram.loss(x, y) + print(loss) + + # ngram = Ngram(2, tokenizer.values()) + # for epoch in range(100): + # ngram.train(words_token) + # loss = ngram.loss(x, y) + # print('Epoch: {}, Loss: {}'.format(epoch, loss)) + # distribution, distribution_dict = ngram.get_prob_distribution((tokenizer["I"],)) + # print(distribution) + # print(distribution_dict) \ No newline at end of file