diff --git a/moodys_challenge.ipynb b/moodys_challenge.ipynb index 7ee0e87..8dce1b1 100644 --- a/moodys_challenge.ipynb +++ b/moodys_challenge.ipynb @@ -161,15 +161,59 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4ec880cd", + "execution_count": 1, + "id": "9fb55a85", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4ec880cd", + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'sp500_full.csv'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Port the market data to the notebook\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m time_series \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mlog(\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msp500_full.csv\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheader\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mto_numpy()\u001b[38;5;241m.\u001b[39msqueeze())\n", + "File \u001b[1;32mc:\\Users\\JackOberman\\anaconda3\\envs\\moodys\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 1014\u001b[0m dialect,\n\u001b[0;32m 1015\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[0;32m 1023\u001b[0m )\n\u001b[0;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\JackOberman\\anaconda3\\envs\\moodys\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", + "File \u001b[1;32mc:\\Users\\JackOberman\\anaconda3\\envs\\moodys\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\JackOberman\\anaconda3\\envs\\moodys\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", + "File \u001b[1;32mc:\\Users\\JackOberman\\anaconda3\\envs\\moodys\\Lib\\site-packages\\pandas\\io\\common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[1;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'sp500_full.csv'" + ] + } + ], + "source": [ + "# Port the market data to the notebook\n", "\n", - "time_series = np.log(pd.read_csv(\"SP500.csv\", header=None).to_numpy().squeeze())" + "time_series = np.log(pd.read_csv(\"sp500_full.csv\", header=None)[1].to_numpy().squeeze())" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "f433c5cb", + "metadata": {}, + "outputs": [], + "source": [ + "# Set up the hyperparameters\n", + "\n", + "dimension = 4 \n", + "time_delay = 30\n", + "window = 40\n", + "epsilon = 0.0 # will be reset\n", + "precision_qubits = 4\n", + "stride = 1" ] }, { @@ -210,11 +254,73 @@ { "cell_type": "code", "execution_count": null, + "id": "def47fab", + "metadata": {}, + "outputs": [], + "source": [ + "# from gtda.time_series import TakensEmbedding\n", + "from gtda.time_series import SlidingWindow" + ] + }, + { + "cell_type": "code", + "execution_count": 96, "id": "7b701be6", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# Given a time series and the corresponding choices for dimension and time delay it produces the emebeded data\n", + "def embed_series(time_series: np.ndarray, dimension: int, time_delay: int):\n", + " l = len(time_series)\n", + " num_points = l - (dimension - 1) * time_delay\n", + " embedded = np.array(\n", + " [[time_series[z + m * time_delay] for m in range(dimension)]\n", + " for z in range(num_points)],\n", + " dtype=time_series.dtype\n", + " )\n", + " return embedded[np.newaxis, :]\n", + "\n", + "takens_embedding = embed_series(time_series, time_delay, dimension)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "5f3cc31f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.099999296028088\n" + ] + } + ], + "source": [ + "# Based on the pairwise distance of the embeded data points we first produce a reasonable choice for epsilone\n", + "\n", + "estim_epsilon = 0\n", + "for a, b in zip(takens_embedding[0][:-1], takens_embedding[0][1:]):\n", + " estim_epsilon += np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))\n", + "estim_epsilon /= (takens_embedding.shape[1] - 1)\n", + "epsilon = estim_epsilon + 1.1\n", + "\n", + "# Retrieve choice of epsilon\n", + "print(epsilon)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "76cd85eb", + "metadata": {}, + "outputs": [], + "source": [ + "# Divide our embeded data into time-dependent windows\n", + "\n", + "WindowSlider = SlidingWindow(size=window, stride=stride)\n", + "embeddings_windows = WindowSlider.fit_transform(takens_embedding.squeeze())" ] }, { @@ -274,11 +380,28 @@ { "cell_type": "code", "execution_count": null, + "id": "2baf3027", + "metadata": {}, + "outputs": [], + "source": [ + "import gudhi" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "026d6b61", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# Generate the simplex complexes for each time-dependent window\n", + "\n", + "simplex_trees = []\n", + "for i in range(len(embeddings_windows)):\n", + " rips_complex = gudhi.RipsComplex(points = embeddings_windows[i], max_edge_length = epsilon)\n", + " simplex_tree = rips_complex.create_simplex_tree(max_dimension=dimension)\n", + " simplex_trees.append(simplex_tree)\n", + " break" ] }, { @@ -337,12 +460,69 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "246e41ce", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# Given a simplex tree and a dimention k, compute kth boundry operator for the given tree\n", + "def get_boundary_operator(simplex_tree, k):\n", + " D_k = []\n", + "\n", + " for simplex_k in simplex_tree[k]:\n", + " S_k_sub_one = simplex_tree[k - 1]\n", + " d_k_s_k = np.zeros(len(S_k_sub_one))\n", + " \n", + " t = 0\n", + " for j, s_k_sub_one in enumerate(S_k_sub_one):\n", + " if set(s_k_sub_one).issubset(set(simplex_k)):\n", + " d_k_s_k[j] += (-1)**t\n", + " t += 1\n", + " D_k.append(d_k_s_k)\n", + "\n", + " return np.stack(D_k, axis=1) if len(D_k) > 1 else D_k[0][:, np.newaxis]\n", + "\n", + "# Given a simplex tree and a window indice it computes all boundry operators from the window indice - 1 to 0\n", + "def get_boundary_operators(simplex_tree, window):\n", + " D = []\n", + " for k in range(window - 1, 0, -1):\n", + " D_k = get_boundary_operator(simplex_tree, k)\n", + " D.append(D_k)\n", + " return D\n", + " \n", + "# Given a simplex tree and a window indice it returns a filtered tree\n", + "def create_filtered_simplex_tree(simplex_tree, window):\n", + " filtered_tree = [[] for i in range(window)]\n", + " for filtered_value in simplex_tree.get_filtration():\n", + " arr, weight = filtered_value\n", + " filtered_tree[len(arr) - 1].append(tuple(arr))\n", + " return filtered_tree\n", + "\n", + "# Generates the filtered tree corresponding to the toy example given in the notebook\n", + "def create_fake_filtered_simplex_tree(simplex_tree, window):\n", + " filtered_tree = [[] for i in range(window)]\n", + " for filtered_value in simplex_tree:\n", + " arr = filtered_value\n", + " filtered_tree[len(arr) - 1].append(tuple(arr))\n", + " return filtered_tree" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "75ef4871", + "metadata": {}, + "outputs": [], + "source": [ + "# Generates the appropriate filtered trees corresponding to each of the time-dependent window's simplex tree\n", + "filtered_trees = []\n", + "for simplex_tree in simplex_trees:\n", + " filtered_tree = create_filtered_simplex_tree(simplex_tree, window)\n", + " filtered_trees.append(filtered_tree)\n", + "\n", + "# Generates the \"fake tree\" - the one given as a toy example\n", + "fake_tree = [[1], [2], [3], [4], [5],[1, 2], [1, 3], [2, 3], [3, 4], [3, 5], [4, 5], [1, 2, 3]]\n", + "fake_tree = create_fake_filtered_simplex_tree(fake_tree, 3)" ] }, { @@ -423,12 +603,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "8f818f78", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# Given a filtered tree and a dimension k it returns the kth laplacian corresponding to the given simplex tree\n", + "def get_laplacian(filtered_tree, k) -> np.matrix:\n", + " # Approach 1\n", + " boundry_k = get_boundary_operator(filtered_tree, k)\n", + " boundry_k_1 = get_boundary_operator(filtered_tree, k + 1)\n", + "\n", + " return (boundry_k.conj().T @ boundry_k) + (boundry_k_1 @ boundry_k_1.conj().T)\n", + "\n", + "# Given a filtered tree and a dimension k it returnes the kth laplacian padded as shown, as well as the number of qubits and max eigenvalue\n", + "def get_padded_laplacian(filtered_tree, k) -> (np.matrix, int, int):\n", + " laplacian = get_laplacian(filtered_tree, k)\n", + " # Get #S_k and \n", + " s_k, _ = laplacian.shape\n", + " q = int(np.ceil(np.log2(s_k)))\n", + "\n", + " # Compute the maximum possible eigenvalue\n", + " max_eigen = float('-inf')\n", + " for i in range(0, s_k):\n", + " R_i = np.sum(np.abs(laplacian[i,:])) - np.abs(laplacian[i,i])\n", + " \n", + " cur_eigen_max = laplacian[i,i] + R_i\n", + " max_eigen = cur_eigen_max if cur_eigen_max > max_eigen else max_eigen\n", + " new_dim = 2**q\n", + "\n", + " padded_laplacian = np.pad(laplacian, ((0, new_dim - s_k),(0, new_dim - s_k)))\n", + " padded_laplacian[s_k:,s_k:] = (max_eigen / 2) * np.eye(new_dim - s_k)\n", + " \n", + " return (padded_laplacian, q, max_eigen)\n", + "\n", + "# Given a filtered tree and the dimension k it returns the H matrix corresponding to the kth laplacian, as well as the number of qubits\n", + "def get_h_mat(filtered_tree, k, precision=0.999) -> (np.matrix, int):\n", + " delta = precision * 2 * np.pi\n", + " padded_laplacian, q, max_eigen = get_padded_laplacian(filtered_tree, k)\n", + "\n", + " return ((delta / max_eigen) * padded_laplacian, q)\n", + "\n", + "# Computes and saves the H matrices and qubit counts for each window retrieved from our embedings\n", + "H_mats = []\n", + "for filtered_tree in filtered_trees:\n", + " H_mat_real, q_real = get_h_mat(filtered_tree, 1)\n", + " H_mats.append((H_mat_real, q_real))" ] }, { @@ -499,12 +719,154 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "848ef40c", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# Import qiskit related libraries\n", + "\n", + "from qiskit import QuantumCircuit, transpile\n", + "from qiskit_aer import Aer\n", + "from qiskit.circuit.library import PhaseEstimation\n", + "from qiskit.circuit.library import UnitaryGate\n", + "from scipy.linalg import expm\n", + "from qiskit.visualization import plot_histogram" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d7d66c4b", + "metadata": {}, + "outputs": [], + "source": [ + "# Given the H matrix corresponding to some window and the number of qubits needed it approximates the kth betti number applying QPE\n", + "def get_betti(H_mat, q):\n", + " H_mat = H_mat_real\n", + " q = q_real\n", + "\n", + " U = expm(1j * H_mat)\n", + " unitaryGate = UnitaryGate(U)\n", + "\n", + " qc = QuantumCircuit(precision_qubits + 2*q, precision_qubits)\n", + "\n", + " qc.h(range(precision_qubits, precision_qubits + q))\n", + "\n", + " for i in range(precision_qubits, precision_qubits+q):\n", + " qc.cx(i, i+q)\n", + "\n", + " pe = PhaseEstimation(num_evaluation_qubits=precision_qubits, unitary=unitaryGate)\n", + "\n", + " pe = qc.compose(pe)\n", + "\n", + " pe.measure(range(precision_qubits), range(precision_qubits))\n", + "\n", + " backend = Aer.get_backend('aer_simulator')\n", + "\n", + " qc_transpiled = transpile(pe, backend)\n", + "\n", + " shots = 2048\n", + " job_sim = backend.run(qc_transpiled, shots=shots)\n", + "\n", + " # # Grab the results from the job.\n", + " result_sim = job_sim.result().get_counts()\n", + "\n", + " num_zeroes = result_sim['0'*precision_qubits] if '0'*precision_qubits in result_sim else 0\n", + " prob_zero = num_zeroes / shots\n", + " betti = 2**q * prob_zero\n", + " betti_num = round(betti)\n", + "\n", + " dims = H_mat.shape[0]\n", + " rank = np.linalg.matrix_rank(H_mat)\n", + " gt_betti = dims - rank\n", + "\n", + " return prob_zero, betti_num, gt_betti" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "e6aa6a77", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(32, 32)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/1 [00:00" ] }, + { + "cell_type": "markdown", + "id": "736db30d", + "metadata": {}, + "source": [ + "### RESPONSES:\n", + "1. The reason for measuring all-zero state is that it allows us to find the dimention of the kernel of the kth laplacian, which is then used to find the kth betty number. More precisely, the probabilty of measuring the all-zero state is roughly equal to the proportion of zero eigenvalues to all eigenvalues of the kth laplacian. Thus multiplying this probability, times the rows of the kth laplacian (number of possible eigenvalues) which in our case will be $2^q$. We can then use the number of zero eigenvalues, which we just calculated as a rough estimate of the dimention of the kernel of the kth laplacian which rounded to the nearest integer would be our estimate for the kth betty number. \n", + "2. When considering the density matrix, a maximally mixed state has a uniform density. The second matrix has density operators that describe the distribution of the eigenvalues of the combinatorial laplacian. \n", + "3. As we found through testing, one parameter that seemed to affect the accuracy of our estimation before rounding was the number of preicsion qubits used in our circuit. Furthemore, the number of shots taken to perform our experiment also stabelized our results (in a law of large numbers kind of fashion - the probability of the zero state should approach the ideal proportion of zero-eigenvalues). Lastly, we found that tuning the delta parameter that is multiplied to the kth laplacian to compute the $H_0$ matrix also behaved as a threshold for the probabilites of the eigenvalues." + ] + }, { "cell_type": "markdown", "id": "1cbe5cf2", @@ -560,7 +933,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 66, "id": "5ef68f98", "metadata": {}, "outputs": [], @@ -575,12 +948,100 @@ " dim: the dimension on which the Betti number is calculated\n", " '''\n", " result = ripser(point_cloud, maxdim=dim)\n", + " # print(result)\n", " diagrams = result[\"dgms\"]\n", + " # print(diagrams)\n", " return len(\n", " [interval for interval in diagrams[dim] if interval[0] < epsilon < interval[1]]\n", - " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "b7a9a34d", + "metadata": {}, + "outputs": [], + "source": [ + "# We now scale up to computing our epsilon-dependent betti curves for each of the windows\n", "\n", - "# write your code here" + "epsilon_vals = np.linspace(0.01, 1.0, 25)\n", + "betti_curves = []\n", + "\n", + "for window in embeddings_windows:\n", + " window_bettis = []\n", + " for epsilon in epsilon_vals:\n", + " window_bettis.append(classical_betti_solver(window, epsilon, 0))\n", + " betti_curves.append(window_bettis)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f89f1fbe", + "metadata": {}, + "outputs": [], + "source": [ + "# Given a vector it computes their L^p norm\n", + "def lp_norm(x, p=2):\n", + " return np.sum(np.abs(x)**p)**(1/p)\n", + "\n", + "# Given the window-order epsilon-dependent betti curves, it computes their succesive distance using the L^p norm\n", + "def pairwise_distances(betti_curves, p=2):\n", + " distances = []\n", + " for i in range(len(betti_curves) - 1):\n", + " distance = lp_norm(np.subtract(betti_curves[i+1], betti_curves[i]), p)\n", + " distances.append(distance)\n", + " return np.array(distances) " + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "id": "b86935a5", + "metadata": {}, + "outputs": [], + "source": [ + "# We now compute the pairwise succesive distances in the betti curves and filter only those that are within a certain threshold\n", + "distances = pairwise_distances(betti_curves, p=4)\n", + "\n", + "crash_indices = np.where(distances > 0.99)[0] \n", + "crash_vals = distances[crash_indices]" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "id": "8612aa08", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 198, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the models predictions vs the time series itself\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.plot(crash_indices, crash_vals, '+')\n", + "plt.plot(time_series - 6)" ] }, { @@ -619,12 +1080,173 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "0de4a8f2", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "import yfinance as yf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf26ce9c", + "metadata": {}, + "outputs": [], + "source": [ + "start_date = \"2019-01-01\"\n", + "end_date = \"2021-12-31\"\n", + "dimension = 100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b117bab", + "metadata": {}, + "outputs": [], + "source": [ + "sector_etfs = [\"XLY\", \"XLK\", \"XLC\", \"XLV\", \"XLF\", \"XLP\"]\n", + "sector_data = {}\n", + "\n", + "overall_max = 0\n", + "for sector in sector_etfs:\n", + " data = yf.download(sector, start=start_date, end=end_date)\n", + " data = data['Close'][sector].to_numpy()\n", + " sector_data[sector] = data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "811e1144", + "metadata": {}, + "outputs": [], + "source": [ + "for sector in sector_etfs:\n", + " plt.plot(sector_data[sector], label=sector)\n", + "plt.title(\"Sector ETF Prices\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5cccd5c", + "metadata": {}, + "outputs": [], + "source": [ + "plt.clf()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff9b65ea", + "metadata": {}, + "outputs": [], + "source": [ + "def verma_embed_series(time_series: np.ndarray, dimension: int):\n", + " embedded = [\n", + " time_series[i:(i + dimension)] for i in range(len(time_series) - dimension)\n", + " ]\n", + " return embedded\n", + "\n", + "for sector in sector_etfs:\n", + " sector_data[sector] = verma_embed_series(sector_data[sector], dimension)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2fca0ed", + "metadata": {}, + "outputs": [], + "source": [ + "def get_sector_complex(timeslice):\n", + " complex = []\n", + " for sector in sector_etfs:\n", + " complex.append(sector_data[sector][timeslice])\n", + " return np.stack(complex).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "834843be", + "metadata": {}, + "outputs": [], + "source": [ + "sector_complex = get_sector_complex(1)\n", + "sector_complex.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32681a99", + "metadata": {}, + "outputs": [], + "source": [ + "betti_curves = []\n", + "for timeslice in range(len(sector_data[\"XLC\"])):\n", + " betti_curve = []\n", + " sector_complex = get_sector_complex(timeslice)\n", + " for epsilon in range(1, 50):\n", + " betti = classical_betti_solver(sector_complex, epsilon / 75, 0)\n", + " betti_curve.append(betti)\n", + " betti_curves.append(betti_curve)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c9f717f", + "metadata": {}, + "outputs": [], + "source": [ + "betti_curves = np.array(betti_curves)\n", + "distances = betti_curves[1:] - betti_curves[:-1]\n", + "distances = np.linalg.norm(distances, axis=1, ord=6)\n", + "\n", + "max_distance_location = np.argmax(distances)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2da4f748", + "metadata": {}, + "outputs": [], + "source": [ + "spy_data = yf.download(\"SPY\", start=start_date, end=end_date)\n", + "spy_data = spy_data['Close'][\"SPY\"].to_numpy()\n", + "spy_data = spy_data[:-dimension]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532c1f6b", + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(max_distance_location, spy_data[max_distance_location], 'ro', label=\"Max Betti Curve Distance\") \n", + "plt.plot(spy_data)\n", + "\n", + "date_index = pd.date_range(start_date, end_date)\n", + "date_index = [date.strftime(\"%Y-%m-%d\") for date in date_index]\n", + "date_index = date_index[:-dimension]\n", + "date_index = [date_index[i] for i in range(0, len(date_index), 100)]\n", + "\n", + "plt.xticks(np.linspace(0, len(spy_data), len(date_index)), date_index)\n", + "plt.xticks(rotation=70)\n", + "plt.xlabel(\"Time\")\n", + "plt.ylabel(\"S&P Price\")\n", + "plt.title(\"Sector Level Betti Analysis\")\n", + "plt.legend()" ] }, { @@ -638,7 +1260,7 @@ ], "metadata": { "kernelspec": { - "display_name": "tda", + "display_name": "moodys", "language": "python", "name": "python3" }, @@ -652,7 +1274,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.9" } }, "nbformat": 4,