LenaFleischer · ellen-moore · Sep 15, 2022 · Sep 15, 2022 · Sep 15, 2022 · Sep 15, 2022
diff --git a/.ipynb_checkpoints/version1-checkpoint.ipynb b/.ipynb_checkpoints/version1-checkpoint.ipynb
@@ -0,0 +1,253 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "075dc9c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mido import MidiFile, MidiTrack\n",
+    "from mido import Message, MidiFile, MidiTrack\n",
+    "import os\n",
+    "from mido import MetaMessage\n",
+    "import random\n",
+    "\n",
+    "# directory name should be midis, at the same level as this file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "6a6e0dd2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for i, track in enumerate(mid.tracks):\n",
+    "#     print('Track {}: {}'.format(i, track.name))\n",
+    "#     for msg in track:\n",
+    "#         if msg.type == 'key_signature':\n",
+    "#             print(msg)\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "a7298cde-f301-4d3a-b463-0110cd0e0575",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This function will take in a folder and convert each MIDI file into a list of tokens where each token is a note value of the piece\n",
+    "# It will return a 2D list where each row is a piece and each column represents a token/note of the piece\n",
+    "def tokenize_pieces(midi_folder):\n",
+    "    midi_pieces = []\n",
+    "    for midi_file_name in os.listdir(midi_folder):\n",
+    "        midsource = MidiFile(midi_folder + \"/\" + midi_file_name)\n",
+    "        note_token_sequence = []\n",
+    "        for i, track in enumerate(midsource.tracks):\n",
+    "            if track.name == 'Piano right':\n",
+    "                for msg in track:\n",
+    "                    if msg.type == 'note_on':\n",
+    "                         note_token_sequence.append(msg.note)\n",
+    "        midi_pieces.append(note_token_sequence)\n",
+    "    return midi_pieces"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "3acde28c-f50b-454a-b792-88ac6cc1df4c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This function will generate n grams given a list of token sequences\n",
+    "def create_n_grams(n, token_sequences):\n",
+    "    \n",
+    "    paddings = n - 1\n",
+    "    start_padding = \"<start>\"\n",
+    "    end_padding = \"<end>\"\n",
+    "    n_grams_count = {}\n",
+    "\n",
+    "    # Add the start padding (<start>) n-1 number of times\n",
+    "    if paddings > 0:\n",
+    "        for i in range(0, len(token_sequences)):\n",
+    "            for j in range(0, paddings):\n",
+    "                token_sequences[i].insert(0, start_padding)\n",
+    "\n",
+    "    # Add the end padding (<end>) once to the end of each sequence\n",
+    "    for i in range(0, len(token_sequences)):\n",
+    "        token_sequences[i].append(end_padding)\n",
+    "\n",
+    "    # Iterate through each word in each sequence and using slicing to get the n gram, then add to dictionary/update count\n",
+    "    for sequence in token_sequences:\n",
+    "        for i in range(len(sequence)-n+1): # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python\n",
+    "            gram = sequence[i:i+n] # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python\n",
+    "            gram = tuple(gram)\n",
+    "            if gram in n_grams_count:\n",
+    "                n_grams_count[gram] += 1\n",
+    "            else:\n",
+    "                n_grams_count[gram] = 1\n",
+    "\n",
+    "    return n_grams_count\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "8b32f8db-d4c4-4301-93b2-f96968e15f74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_song(n_gram_frequencies, k):\n",
+    "    \n",
+    "    # We need to first determine what the \"n\" is from the input frequency dictionary\n",
+    "    n = len(tuple(n_gram_frequencies.keys())[0])\n",
+    "\n",
+    "    # Start with an empty tweet\n",
+    "    song_sequence = []\n",
+    "\n",
+    "    # For bigrams and up...\n",
+    "    if n - 1 > 0:\n",
+    "        for i in range(0, n-1):\n",
+    "            song_sequence.insert(0, \"<start>\")\n",
+    "    \n",
+    "        while song_sequence[len(song_sequence)-1] != \"<end>\":\n",
+    "            \n",
+    "            # Slice the song so we can get the previous n-1 gram that comes before the predicted token\n",
+    "            previous_token_sequence = song_sequence[len(song_sequence)-(n-1):len(song_sequence)]\n",
+    "\n",
+    "            # Using dictionary comprehension, create a dictionary containing all n-grams that contain the previous token sequence\n",
+    "            matched_dictionary = {k:v for k,v in n_gram_frequencies.items() if k[0:n - 1] == tuple(previous_token_sequence)}\n",
+    "\n",
+    "            # Prepare a list of possible choices along with their corresponding weights\n",
+    "            choices = []\n",
+    "            choices_weights = []\n",
+    "\n",
+    "            # Populate the choices and weights by iterating through the keys in the matched dictionary \n",
+    "            matched_dictionary_keylist = list(matched_dictionary.keys())\n",
+    "            for j in range(0, len(matched_dictionary_keylist)):\n",
+    "                # insert the last word of the tuple key insert it as a choice\n",
+    "                choices.append(matched_dictionary_keylist[j][len(matched_dictionary_keylist[j])-1])\n",
+    "\n",
+    "                # Weights are calculated by taking the actual count of the key and dividing it by all that is found\n",
+    "                choices_weights.append(matched_dictionary[matched_dictionary_keylist[j]] / len(matched_dictionary) + k)\n",
+    "            \n",
+    "            chosen = random.choices(choices, weights=choices_weights, k=1)\n",
+    "            song_sequence.append(chosen[0])\n",
+    "    else:\n",
+    "        # Special case for unigrams only\n",
+    "        choices = []\n",
+    "        choices_weights = []\n",
+    "        n_gram_frequencies_keylist = list(n_gram_frequencies.keys())\n",
+    "        for j in range(0, len(n_gram_frequencies_keylist)):\n",
+    "            # insert the last word of the tuple key as a choice\n",
+    "            choices.append(n_gram_frequencies_keylist[j][0])\n",
+    "            choices_weights.append(n_gram_frequencies[n_gram_frequencies_keylist[j]] / len(n_gram_frequencies) + k)\n",
+    "        \n",
+    "        # Adding the first item so tweet isn't empty\n",
+    "        song_sequence.append(random.choices(choices, weights=choices_weights, k=1)[0])\n",
+    "        while song_sequence[len(song_sequence)-1] != \"<end>\":\n",
+    "            chosen = random.choices(choices, weights=choices_weights, k=1)\n",
+    "            song_sequence.append(chosen[0])\n",
+    "    \n",
+    "    # Removing the paddings and converting the grams of the tweet into a string\n",
+    "    # return ' '.join([word for word in tweet if word != \"<start>\" and word != \"<end>\"])\n",
+    "    for x in range(n-1):\n",
+    "        song_sequence.pop(0)\n",
+    "    song_sequence.pop(-1)\n",
+    "    return song_sequence"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "f44055c2-6ef5-423c-bc5e-ce6eeb99f6ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Our bigram of notes!\n",
+    "song_token_sequences = create_n_grams(4, tokenize_pieces(\"./midis\"))\n",
+    "song = generate_song(song_token_sequences, 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "d74ed28e-8533-4d0a-908d-5eee695b8b19",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_midi(song):\n",
+    "    mid = MidiFile()\n",
+    "    track = MidiTrack()\n",
+    "    mid.tracks.append(track)\n",
+    "    for note in song: \n",
+    "        track.append(mido.Message('note_on', note = note, time = 0))\n",
+    "        track.append(mido.Message('note_off', note = note, time = 256))\n",
+    "\n",
+    "    mid.save('new_song2.mid')\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "89cea2d8-711e-419c-b40c-6a4818aefc23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "create_midi(song)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5419f373",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convert_rhythm_dictionary_to_note_sequences(right_hand_time_dict, left_hand_time_dict):\n",
+    "    \n",
+    "    right_hand = []\n",
+    "    left_hand = []\n",
+    "    \n",
+    "    for time in right_hand_time_dict.keys():\n",
+    "        right_hand.append(right_hand_time_dict[time])\n",
+    "        left_hand.append(left_hand_time_dict[time])\n",
+    "    \n",
+    "    return right_hand\n",
+    "        \n",
+    "        \n",
+    "    \n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/FULL_PIANO_SONG.mid b/FULL_PIANO_SONG.mid
diff --git a/README.md b/README.md
@@ -1,2 +1,3 @@
-# NLP_study_buddy
-NLP Block 1 Final Project
+# Study Buddy Classical Music Generator
+Generating classical music using NLP techniques like n-grams and HMM for melody, harmony, and rhythm generation. \
+Utilized a MIDI dataset of 100+ classical pieces from various composers. Final project for Natural Language Processing course at Colorado College.
diff --git a/final_paper/CP341_Final.pdf b/final_paper/CP341_Final.pdf
diff --git a/final_paper/images/both-garageband.png b/final_paper/images/both-garageband.png
diff --git a/final_paper/images/chord.png b/final_paper/images/chord.png
diff --git a/final_paper/images/message.png b/final_paper/images/message.png
diff --git a/final_paper/images/message2.png b/final_paper/images/message2.png
diff --git a/final_paper/images/musical_complexity_fig.jpeg b/final_paper/images/musical_complexity_fig.jpeg
diff --git a/final_paper/images/notes.png b/final_paper/images/notes.png
diff --git a/final_paper/images/otherChord.png b/final_paper/images/otherChord.png
diff --git a/final_paper/images/right-garage2.png b/final_paper/images/right-garage2.png
diff --git a/final_paper/images/right-garageband.png b/final_paper/images/right-garageband.png
diff --git a/final_paper/images/rouge.png b/final_paper/images/rouge.png