From 776728e4bdf1c51812fc24daabbf5d3c1ef78bc9 Mon Sep 17 00:00:00 2001 From: Carrie Holt Date: Tue, 2 Jul 2024 10:43:20 -0700 Subject: [PATCH 1/6] Add Github Actions configuration --- .github/workflows/main.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 000000000..21ef9d023 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,34 @@ +name: CI + +# We can specify which Github events will trigger a CI build +on: push + +# now define a single job 'build' (but could define more) +jobs: + + build: + + # we can also specify the OS to run tests on + runs-on: ubuntu-latest + + # a job is a seq of steps + steps: + + # Next we need to checkout out repository, and set up Python + # A 'name' is just an optional label shown in the log - helpful to clarify progress - and can be anything + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Test with PyTest + run: | + python -m pytest --cov=lcanalyzer.models tests/test_models.py \ No newline at end of file From 9ab7e6c643970904b01d0f4831e70973fe73bb82 Mon Sep 17 00:00:00 2001 From: Carrie Holt Date: Tue, 2 Jul 2024 11:00:34 -0700 Subject: [PATCH 2/6] Add GA build matrix for os and Python version --- .github/workflows/main.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 21ef9d023..8746be8f3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -8,8 +8,12 @@ jobs: build: - # we can also specify the OS to run tests on - runs-on: ubuntu-latest + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.10", "3.11"] + + runs-on: ${{ matrix.os }} # a job is a seq of steps steps: @@ -19,10 +23,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - - name: Set up Python + - name: Set up Python 3.11 uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: ${{ matrix.python-version }} - name: Install Python dependencies run: | From 3b61a259bb91ecb4d877c24e64604ece74486013 Mon Sep 17 00:00:00 2001 From: Carrie Holt Date: Wed, 3 Jul 2024 08:06:23 -0700 Subject: [PATCH 3/6] added more tests after looking at debugging strategies --- lcanalyzer/models.py | 27 ++++++++++ light-curve-analysis.ipynb | 27 ++++++++++ test-development.ipynb | 100 ++++++++++++++++++++++++------------- tests/test_models.py | 68 ++++++++++++++++++++++++- 4 files changed, 187 insertions(+), 35 deletions(-) diff --git a/lcanalyzer/models.py b/lcanalyzer/models.py index bc15671bc..0045a537d 100644 --- a/lcanalyzer/models.py +++ b/lcanalyzer/models.py @@ -42,3 +42,30 @@ def min_mag(data,mag_col): :returns: The min value of the column. """ return data[mag_col].min() + +def calc_stats(lc, bands, mag_col): + """Calculate max, mean and min values for all bands of a lightcurve. + :param lc: pd.DataFrame with observed magnitudes for a single source. + :param bands: a list of all magnitude bands. + :param mag_col: a string with the name of the column for calculating the min value. + :returns: The max, mean, and min values of the column for all bands. + """ + stats = {} + for b in bands: + stat = {} + stat["max"] = max_mag(lc[b], mag_col) + stat["mean"] = mean_mag(lc[b], mag_col) + stat["min"] = min_mag(lc[b], mag_col) + stats[b] = stat + return pd.DataFrame.from_records(stats) + +def normalize_lc(df,mag_col): + """ Normalize a single lightcurve. + :param df: pd.DataFrame with observed magnitudes for a single source. + :param mag_col: a string with the name of the column for normalizing. + :returns: Normalized lightcurve. + """ + min = min_mag(df,mag_col) + max = max_mag((df-min),mag_col) + lc = (df[mag_col]-min)/max + return lc \ No newline at end of file diff --git a/light-curve-analysis.ipynb b/light-curve-analysis.ipynb index c86f3c8af..7420e3daa 100644 --- a/light-curve-analysis.ipynb +++ b/light-curve-analysis.ipynb @@ -55,6 +55,33 @@ "plot_filter_symbols = {\"u\": \"o\", \"g\": \"^\", \"r\": \"v\", \"i\": \"s\", \"z\": \"*\", \"y\": \"p\"}" ] }, + { + "cell_type": "markdown", + "id": "9f6b7870-2b08-491b-a95a-02b3d7091bc2", + "metadata": {}, + "source": [ + "## Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0140e69c-4a9b-45ee-a1a5-9d48d9ab3bc6", + "metadata": {}, + "outputs": [], + "source": [ + "def calc_stats(lc, bands, mag_col):\n", + " # Calculate max, mean and min values for all bands of a light curve\n", + " stats = {}\n", + " for b in bands:\n", + " stat = {}\n", + " stat[\"max\"] = models.max_mag(lc[b], mag_col)\n", + " stat[\"mean\"] = models.max_mag(lc[b], mag_col)\n", + " stat[\"min\"] = models.mean_mag(lc[b], mag_col)\n", + " stats[b] = stat\n", + " return pd.DataFrame.from_records(stats)" + ] + }, { "cell_type": "markdown", "id": "01f08b64-6d72-402b-b586-8bd0b148bd6d", diff --git a/test-development.ipynb b/test-development.ipynb index 48c22ea9e..3f5775b43 100644 --- a/test-development.ipynb +++ b/test-development.ipynb @@ -16,6 +16,7 @@ "outputs": [], "source": [ "import pandas as pd\n", + "import pandas.testing as pdt\n", "import lcanalyzer.models as models" ] }, @@ -49,19 +50,36 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "id": "4db3c851-f2bf-4a30-932a-b5267728e82d", "metadata": {}, "outputs": [], "source": [ - "### Get maximum values for all bands\n", - "def calc_stat(lc, bands, mag_col):\n", - " # Define an empty dictionary where we will store the results\n", - " stat = {}\n", - " # For each band get the maximum value and store it in the dictionary\n", + "def calc_stats(lc, bands, mag_col):\n", + " # Calculate max, mean and min values for all bands of a light curve\n", + " stats = {}\n", " for b in bands:\n", - " stat[b + \"_max\"] = models.max_mag(lc[b], mag_col)\n", - " return stat" + " stat = {}\n", + " stat[\"max\"] = models.max_mag(lc[b], mag_col)\n", + " stat[\"mean\"] = models.mean_mag(lc[b], mag_col)\n", + " stat[\"min\"] = models.min_mag(lc[b], mag_col)\n", + " stats[b] = stat\n", + " return pd.DataFrame.from_records(stats)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "fdea13d3-cb23-4357-a878-c7767232b444", + "metadata": {}, + "outputs": [], + "source": [ + "def normalize_lc(df,mag_col):\n", + " # Normalize a single light curve\n", + " min = min_mag(df,mag_col)\n", + " max = max_mag((df-min),mag_col)\n", + " lc = (df[mag_col]-min)/max\n", + " return lc" ] }, { @@ -343,21 +361,46 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, + "id": "104e0600-286f-4c77-ad41-2e2c51e62f54", + "metadata": {}, + "outputs": [], + "source": [ + "test_cols = list(\"abc\")\n", + "test_dict = {}\n", + "test_dict[\"df0\"] = pd.DataFrame(\n", + " data=[[8, 8, 0], \n", + " [0, 1, 1], \n", + " [2, 3, 1], \n", + " [7, 9, 7]], columns=test_cols\n", + ")\n", + "test_dict[\"df1\"] = pd.DataFrame(\n", + " data=[[3, 8, 2], \n", + " [3, 8, 0], \n", + " [3, 9, 8], \n", + " [8, 2, 5]], columns=test_cols\n", + ")\n", + "test_dict[\"df2\"] = pd.DataFrame(\n", + " data=[[8, 4, 3], \n", + " [7, 6, 3], \n", + " [4, 2, 9], \n", + " [6, 4, 0]], columns=test_cols\n", + ")\n", + "\n", + "test_output = pd.DataFrame(data=[[9,9,6],[5.25,6.75,4.],[1,2,2]],columns=['df0','df1','df2'],index=['max','mean','min'])\n", + "\n", + "pdt.assert_frame_equal(calc_stats(test_dict, test_dict.keys(), 'b'),\n", + " test_output,\n", + " check_exact=False,\n", + " atol=0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "a11542ef-d432-47c9-b90e-b0f9cdc11af9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "test_input = pd.DataFrame(data=[[1, 5, 3], [7, 8, 9], [3, 4, 1]], columns=list(\"abc\"))\n", "test_output = 7\n", @@ -366,21 +409,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "b3eff457-9f61-4c47-9276-90c893925944", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1 = pd.DataFrame(data=[[1, 5, 3], [7, 8, 9], [3, 4, 1]], columns=list(\"abc\"))\n", "df2 = pd.DataFrame(data=[[7, 3, 2], [8, 4, 2], [5, 6, 4]], columns=list(\"abc\"))\n", diff --git a/tests/test_models.py b/tests/test_models.py index 5921b027d..4a9b6359b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,6 +1,7 @@ """Tests for statistics functions within the Model layer.""" import pandas as pd +import pandas.testing as pdt import pytest @pytest.mark.parametrize( @@ -81,4 +82,69 @@ def test_max_mag_strings(): test_input_colname = "b" with pytest.raises(TypeError): - error_expected = max_mag('string', test_input_colname) \ No newline at end of file + error_expected = max_mag('string', test_input_colname) + +def test_calc_stats(): + """ Test calc_stats function works for multiple bands. """ + from lcanalyzer.models import calc_stats + + test_cols = list("abc") + test_dict = {} + test_dict["df0"] = pd.DataFrame( + data=[[8, 8, 0], + [0, 1, 1], + [2, 3, 1], + [7, 9, 7]], columns=test_cols + ) + test_dict["df1"] = pd.DataFrame( + data=[[3, 8, 2], + [3, 8, 0], + [3, 9, 8], + [8, 2, 5]], columns=test_cols + ) + test_dict["df2"] = pd.DataFrame( + data=[[8, 4, 3], + [7, 6, 3], + [4, 2, 9], + [6, 4, 0]], columns=test_cols + ) + + test_output = pd.DataFrame(data=[[9,9,6],[5.25,6.75,4.],[1,2,2]],columns=['df0','df1','df2'],index=['max','mean','min']) + + pdt.assert_frame_equal(calc_stats(test_dict, test_dict.keys(), 'b'), + test_output, + check_exact=False, + atol=0.01) + + +# Parametrization for normalize_lc function testing +@pytest.mark.parametrize( + "test_input_df, test_input_colname, expected", + [ + (pd.DataFrame(data=[[8, 9, 1], + [1, 4, 1], + [1, 2, 4], + [1, 4, 1]], + columns=list("abc")), + "b", + pd.Series(data=[1,0.285,0,0.285])), + (pd.DataFrame(data=[[1, 1, 1], + [1, 1, 1], + [1, 1, 1], + [1, 1, 1]], + columns=list("abc")), + "b", + pd.Series(data=[0.,0.,0.,0.])), + (pd.DataFrame(data=[[0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0]], + columns=list("abc")), + "b", + pd.Series(data=[0.,0.,0.,0.])), + ]) +def test_normalize_lc(test_input_df, test_input_colname, expected): + """Test how normalize_lc function works for arrays of positive integers.""" + from lcanalyzer.models import normalize_lc + import pandas.testing as pdt + pdt.assert_series_equal(normalize_lc(test_input_df,test_input_colname),expected,check_exact=False,atol=0.01,check_names=False) \ No newline at end of file From 4958309b856bc6c5fd23dac2b9d576eec8469ff7 Mon Sep 17 00:00:00 2001 From: Carrie Holt Date: Wed, 3 Jul 2024 08:20:42 -0700 Subject: [PATCH 4/6] added code to replace NaNs with zeros --- lcanalyzer/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lcanalyzer/models.py b/lcanalyzer/models.py index 0045a537d..20faa9c93 100644 --- a/lcanalyzer/models.py +++ b/lcanalyzer/models.py @@ -68,4 +68,5 @@ def normalize_lc(df,mag_col): min = min_mag(df,mag_col) max = max_mag((df-min),mag_col) lc = (df[mag_col]-min)/max + lc = lc.fillna(0) return lc \ No newline at end of file From 22e2bfd3b74e4feef99b81073f82f7e8cbe63edf Mon Sep 17 00:00:00 2001 From: Miranda Gorsuch Date: Thu, 4 Jul 2024 11:06:14 -0500 Subject: [PATCH 5/6] Added negative test to mean_mag function --- tests/test_models.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_models.py b/tests/test_models.py index 4a9b6359b..1a37d421f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -41,6 +41,12 @@ def test_max_mag(test_df, test_colname, expected): columns=list("abc")), "b", 0), + (pd.DataFrame(data=[[-7, -7, -3], + [-4, -3, -1], + [-1, -5, -3]], + columns=list("abc")), + "a", + -4), ]) def test_mean_mag(test_df, test_colname, expected): """Test mean function works for array of zeroes and positive integers.""" From 2f569c5b8e9cc1658d8c2939de4bc7e7783c0e62 Mon Sep 17 00:00:00 2001 From: Meet Vyas <87984759+Meet-Vyas-Dev@users.noreply.github.com> Date: Thu, 4 Jul 2024 22:19:06 +0530 Subject: [PATCH 6/6] Added a comment for the description for the function --- test-development.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test-development.ipynb b/test-development.ipynb index 3f5775b43..8ffaff181 100644 --- a/test-development.ipynb +++ b/test-development.ipynb @@ -414,6 +414,8 @@ "metadata": {}, "outputs": [], "source": [ + "# Here we create a function that checks the maximum magnitude for each array in the dataset\n", + "\n", "df1 = pd.DataFrame(data=[[1, 5, 3], [7, 8, 9], [3, 4, 1]], columns=list(\"abc\"))\n", "df2 = pd.DataFrame(data=[[7, 3, 2], [8, 4, 2], [5, 6, 4]], columns=list(\"abc\"))\n", "df3 = pd.DataFrame(data=[[2, 6, 3], [1, 3, 6], [8, 9, 1]], columns=list(\"abc\"))\n",