From 8c9e1b3415c73beccbc98f666b13d1c54f3d46f8 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 14:20:42 +0800
Subject: [PATCH 01/24] Update model.py

---
 desidlas/training/model.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/desidlas/training/model.py b/desidlas/training/model.py
index 5aea498..e3c5fea 100644
--- a/desidlas/training/model.py
+++ b/desidlas/training/model.py
@@ -173,7 +173,10 @@ def build_model(hyperparameters,INPUT_SIZE,matrix_size):
     
     #tf.compat.v1.placeholder:claim a tensor that needs to be filled (the data type, shape and name)
     #x: the empty tensor need to be filled with the input data
-    x = tf.compat.v1.placeholder(tf.float32, shape=[None,matrix_size, INPUT_SIZE], name='x')
+    if matrix_size == 1:
+        x = tf.compat.v1.placeholder(tf.float32, shape=[None,INPUT_SIZE], name='x')
+    if matrix_size == 4:
+        x = tf.compat.v1.placeholder(tf.float32, shape=[None,matrix_size, INPUT_SIZE], name='x')
    
     
     #claim the tensor for three labels
@@ -189,7 +192,7 @@ def build_model(hyperparameters,INPUT_SIZE,matrix_size):
     # Stride (4,1)
     # number of filters = 4 (features?)
     # Neuron activation = ReLU (rectified linear unit)
-    W_conv1 = weight_variable([conv1_kernel, 1, 4, conv1_filters])
+    W_conv1 = weight_variable([conv1_kernel, 1, matrix_size, conv1_filters])
     b_conv1 = bias_variable([conv1_filters])
 
     # https://www.tensorflow.org/versions/r0.10/api_docs/python/nn.html#convolution

From 795d5d926aac92fc90ca5c4b71ec74bbf63c5f81 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 14:28:41 +0800
Subject: [PATCH 02/24] Update parameterset.py

---
 desidlas/training/parameterset.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/desidlas/training/parameterset.py b/desidlas/training/parameterset.py
index 4badb15..caf0aab 100644
--- a/desidlas/training/parameterset.py
+++ b/desidlas/training/parameterset.py
@@ -19,7 +19,7 @@
         # batch_size
         [400,700,           400, 500, 600, 700, 850, 1000],
         # l2_regularization_penalty
-        [0.005,         0.01, 0.008, 0.005, 0.003],
+        [0.005,0.005,         0.01, 0.008, 0.005, 0.003],
         # dropout_keep_prob
         [0.9,0.98,          0.75, 0.9, 0.95, 0.98, 1],
         # fc1_n_neurons
@@ -29,7 +29,7 @@
         # fc2_2_n_neurons
         [500,350,           200, 350, 500, 700, 900, 1500],
         # fc2_3_n_neurons
-        [150,           200, 350, 500, 700, 900, 1500],
+        [150,150,           200, 350, 500, 700, 900, 1500],
         # conv1_kernel
         [40,32,            20, 22, 24, 26, 28, 32, 40, 48, 54],
         # conv2_kernel
@@ -37,7 +37,7 @@
         # conv3_kernel
         [20,16,            10, 14, 16, 20, 24, 28, 32, 34],
         # conv1_filters
-        [100,           64, 80, 90, 100, 110, 120, 140, 160, 200],
+        [100,100,           64, 80, 90, 100, 110, 120, 140, 160, 200],
         # conv2_filters
         [256,96,            80, 96, 128, 192, 256],
         # conv3_filters
@@ -49,11 +49,11 @@
         # conv3_stride
         [1,1,             1, 2, 3, 4, 5, 6],
         # pool1_kernel
-        [7,             3, 4, 5, 6, 7, 8, 9],
+        [7,7,             3, 4, 5, 6, 7, 8, 9],
         # pool2_kernel
         [4,6,             4, 5, 6, 7, 8, 9, 10],
         # pool3_kernel
-        [6,             4, 5, 6, 7, 8, 9, 10],
+        [6,6,             4, 5, 6, 7, 8, 9, 10],
         # pool1_stride
         [1,4,             1, 2, 4, 5, 6],
         # pool2_stride

From 7d451e3482ce3a8fe298008db956337109c34aa5 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 14:33:21 +0800
Subject: [PATCH 03/24] Update get_partprediction.py

---
 desidlas/prediction/get_partprediction.py | 26 ++++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py
index afc71b0..e7770af 100644
--- a/desidlas/prediction/get_partprediction.py
+++ b/desidlas/prediction/get_partprediction.py
@@ -40,12 +40,14 @@ def t(tensor_name):
 
 
 
-def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''):
+def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_filename, TF_DEVICE=''):
     '''
     Perform training
     Parameters
     ----------
     hyperparameters:hyperparameters for the CNN model structure
+    INPUT_SIZE: pixels numbers for each window , 400 for high SNR and 600 for low SNR
+    matrix_size: 1 if without smoothing, 4 if smoothing for low SNR
     flux:list (400 or 600 length), flux from sightline
     checkpoint_filename: CNN model file used to detect DLAs
     TF_DEVICE: use which gpu to train, default is '/gpu:1'
@@ -69,7 +71,7 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''):
 
 
     with tf.Graph().as_default():
-        build_model(hyperparameters) # build the CNN model according to hyperparameters
+        build_model(hyperparameters,INPUT_SIZE,matrix_size) # build the CNN model according to hyperparameters
 
         with tf.device(TF_DEVICE), tf.compat.v1.Session() as sess:
             tf.compat.v1.train.Saver().restore(sess, checkpoint_filename+".ckpt") #load model files
@@ -96,12 +98,17 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''):
     parser = argparse.ArgumentParser()
     parser.add_argument('-p', '--preddataset', help='Datasets to detect DLAs , npy format', required=True, default=False)
     parser.add_argument('-o', '--output_file', help='output files to save the prediction result, npy format', required=False, default=False)
-    parser.add_argument('-m', '--modelfiles', help='CNN models for prediction, high snr model or mid snr model', required=False, default=False)
+    parser.add_argument('-model', '--modelfiles', help='CNN models for prediction, high snr model or mid snr model', required=False, default=False)
+    parser.add_argument('-t', '--INPUT_SIZE', help='set the input data size', required=False, default=400)
+    parser.add_argument('-m', '--matrix_size', help='set the matrix size when using smooth', required=False, default=1)
+
     args = vars(parser.parse_args())
 
-    RUN_SINGLE_ITERATION = not args['hyperparamsearch']
-    checkpoint_filename = args['checkpoint_file'] if RUN_SINGLE_ITERATION else None
+    
     batch_results_file = args['output_file']
+    INPUT_SIZE = args['INPUT_SIZE']
+    matrix_size = args['matrix_size']
+
     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
 
 
@@ -111,8 +118,7 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''):
     from desidlas.training.parameterset import parameter_names
     from desidlas.training.parameterset import parameters
     hyperparameters = {}
-    for k in range(0,len(parameter_names)):
-        hyperparameters[parameter_names[k]] = parameters[k][0]
+    
 
 
     pred_dataset=args['preddataset']
@@ -123,8 +129,12 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''):
     modelfile=args['modelfiles']
     if modelfile == 'high':
         checkpoint_filename='desidlas/prediction/model/train_highsnr/current_99999'
+        for k in range(0,len(parameter_names)):
+            hyperparameters[parameter_names[k]] = parameters[k][1]
     if modelfile == 'mid':
         checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999'
+        for k in range(0,len(parameter_names)):
+            hyperparameters[parameter_names[k]] = parameters[k][1]
     
 
     dataset={}
@@ -140,7 +150,7 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''):
     
         flux=np.array(r[sight_id]['FLUX'])
 
-        (pred, conf, offset, coldensity)=predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE='')
+        (pred, conf, offset, coldensity)=predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_filename, TF_DEVICE='')
 
 
         dataset[sight_id]={'pred':pred,'conf':conf,'offset': offset, 'coldensity':coldensity }

From d0bde2c447f498416447a64f732b4911f00b2300 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 14:34:03 +0800
Subject: [PATCH 04/24] Update training.py

---
 desidlas/training/training.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/desidlas/training/training.py b/desidlas/training/training.py
index 99aac8e..3923ac5 100644
--- a/desidlas/training/training.py
+++ b/desidlas/training/training.py
@@ -314,8 +314,8 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse)
     parser.add_argument('-c', '--checkpoint_file', help='Name of the checkpoint file to save (without file extension)', required=False, default=savemodel_path) #../models/training/current
     parser.add_argument('-r', '--train_dataset_filename', help='File name of the training dataset without extension', required=False, default=traindata_path)
     parser.add_argument('-e', '--test_dataset_filename', help='File name of the testing dataset without extension', required=False, default=testdata_path)
-    parser.add_argument('-t', '--INPUT_SIZE', help='set the input data size', required=False, default=600)
-    parser.add_argument('-m', '--matrix_size', help='set the matrix size when using smooth', required=False, default=4)
+    parser.add_argument('-t', '--INPUT_SIZE', help='set the input data size', required=False, default=400)
+    parser.add_argument('-m', '--matrix_size', help='set the matrix size when using smooth', required=False, default=1)
     args = vars(parser.parse_args())
 
     RUN_SINGLE_ITERATION = not args['hyperparamsearch']

From 0f8056e0b5d88c5d8b9f7728ca36a42f31b656cb Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 14:42:44 +0800
Subject: [PATCH 05/24] Delete training_prediction.ipynb

---
 docs/notebook/training_prediction.ipynb | 151 ------------------------
 1 file changed, 151 deletions(-)
 delete mode 100644 docs/notebook/training_prediction.ipynb

diff --git a/docs/notebook/training_prediction.ipynb b/docs/notebook/training_prediction.ipynb
deleted file mode 100644
index ebe28fe..0000000
--- a/docs/notebook/training_prediction.ipynb
+++ /dev/null
@@ -1,151 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#This notebook is about how to do the training and prediction"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#the codes used for training is in desidlas/training/training.py\n",
-    "#for training the model, all you need is to run:\n",
-    "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' \n",
-    "\n",
-    "#1000000:training iterations\n",
-    "# -r : path to the training dataset\n",
-    "# -e : path to the testing dataset\n",
-    "# -c : path to save the model file , paht+'/current'\n",
-    "#result : print training accuracy every 200 steps , print test accuracy every 5000 steps (classification accuracy)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#for low SNR training, we used smoothing method, so the command is\n",
-    "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' -t 600 -m 4\n",
-    "# -t : pixel numbers of each window\n",
-    "#"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Prediction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#the codes used for detect DLAs is in desidlas/prediction/get_partprediction.py\n",
-    "#to get the prediction for every part(400 or 600 pixels) , all you need is to run:\n",
-    "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -m high\n",
-    "\n",
-    "\n",
-    "# -p : path to the dataset used to detect DLAs\n",
-    "# -o : path to the output file\n",
-    "# -m : high or mid , which CNN model is used\n",
-    "#we recommand to use high model for snr>6 spectra and  mid model for 2.5<snr<6 spectra"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Make DLA catalog"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from desidlas.prediction.pred_sightline import get_results,save_pred\n",
-    "from desidlas.prediction.dla_catalog import catalog_fits\n",
-    "sightlines=np.load('desidlas/tests/datafile/sightlines-16-1375.npy',allow_pickle = True,encoding='latin1')\n",
-    "preds=np.load('desidlas/tests/datafile/partpre-16-1375.npy',allow_pickle=True).item()\n",
-    "#make real DLA catalog and QSO catalog for mock spectra\n",
-    "real_catalog=catalog_fits(sightlines,dlafile='desidlas/tests/datafile/realdlas-16-1375.fits',qsofile='desidlas/tests/datafile/qso-16-1375.fits')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#get predicted DLA catalog for each sightline\n",
-    "level1=0.5 \n",
-    "level2=0.2\n",
-    "pred_catalog=save_pred(sightlines,preds,level2,level1,filename='desidlas/tests/datafile/preddlas-16-1375.fits')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#compare prediction with real absorbers,generate NHI,z hist and calculate confusion matrix\n",
-    "get_results(real_catalog,pred_catalog,realname='desidlas/tests/datafile/label_realdlas-16-1375.fits',predname='desidlas/tests/datafile/label_preddlas-16-1375.fits',tpname='desidlas/tests/datafile/tppreds-16-1375.npy')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From c0949e2536df37db1d7454b47d6d80b18ea236eb Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 14:43:24 +0800
Subject: [PATCH 06/24] Add files via upload

---
 docs/notebook/training_prediction.ipynb | 151 ++++++++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100644 docs/notebook/training_prediction.ipynb

diff --git a/docs/notebook/training_prediction.ipynb b/docs/notebook/training_prediction.ipynb
new file mode 100644
index 0000000..8dfb403
--- /dev/null
+++ b/docs/notebook/training_prediction.ipynb
@@ -0,0 +1,151 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#This notebook is about how to do the training and prediction"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#the codes used for training is in desidlas/training/training.py\n",
+    "#for training the model, all you need is to run:\n",
+    "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' \n",
+    "\n",
+    "#1000000:training iterations\n",
+    "# -r : path to the training dataset\n",
+    "# -e : path to the testing dataset\n",
+    "# -c : path to save the model file , paht+'/current'\n",
+    "#result : print training accuracy every 200 steps , print test accuracy every 5000 steps (classification accuracy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#for low SNR training, we used smoothing method, so the command is\n",
+    "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' -t 600 -m 4\n",
+    "# -t : pixel numbers of each window\n",
+    "#"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#the codes used for detect DLAs is in desidlas/prediction/get_partprediction.py\n",
+    "#to get the prediction for every part(400 or 600 pixels) , all you need is to run:\n",
+    "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -model high\n",
+    "\n",
+    "\n",
+    "# -p : path to the dataset used to detect DLAs\n",
+    "# -o : path to the output file\n",
+    "# -m : high or mid , which CNN model is used\n",
+    "#we recommand to use high model for snr>6 spectra and  mid model for 2.5<snr<6 spectra"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Make DLA catalog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from desidlas.prediction.pred_sightline import get_results,save_pred\n",
+    "from desidlas.prediction.dla_catalog import catalog_fits\n",
+    "sightlines=np.load('desidlas/tests/datafile/sightlines-16-1375.npy',allow_pickle = True,encoding='latin1')\n",
+    "preds=np.load('desidlas/tests/datafile/partpre-16-1375.npy',allow_pickle=True).item()\n",
+    "#make real DLA catalog and QSO catalog for mock spectra\n",
+    "real_catalog=catalog_fits(sightlines,dlafile='desidlas/tests/datafile/realdlas-16-1375.fits',qsofile='desidlas/tests/datafile/qso-16-1375.fits')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#get predicted DLA catalog for each sightline\n",
+    "level1=0.5 \n",
+    "level2=0.2\n",
+    "pred_catalog=save_pred(sightlines,preds,level2,level1,filename='desidlas/tests/datafile/preddlas-16-1375.fits')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#compare prediction with real absorbers,generate NHI,z hist and calculate confusion matrix\n",
+    "get_results(real_catalog,pred_catalog,realname='desidlas/tests/datafile/label_realdlas-16-1375.fits',predname='desidlas/tests/datafile/label_preddlas-16-1375.fits',tpname='desidlas/tests/datafile/tppreds-16-1375.npy')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From b143d4505b21337b3c9a8e44362ed92f75352081 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 14:46:06 +0800
Subject: [PATCH 07/24] Update get_partprediction.py

---
 desidlas/prediction/get_partprediction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py
index e7770af..5df7b88 100644
--- a/desidlas/prediction/get_partprediction.py
+++ b/desidlas/prediction/get_partprediction.py
@@ -134,7 +134,7 @@ def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_fil
     if modelfile == 'mid':
         checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999'
         for k in range(0,len(parameter_names)):
-            hyperparameters[parameter_names[k]] = parameters[k][1]
+            hyperparameters[parameter_names[k]] = parameters[k][0]
     
 
     dataset={}

From bf5e74501897c62d7987cf2ab3d1ad1e403edf6a Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 20:49:34 +0800
Subject: [PATCH 08/24] Update Data.py

---
 desidlas/data_model/Data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/desidlas/data_model/Data.py b/desidlas/data_model/Data.py
index d3cc8f5..50640f7 100644
--- a/desidlas/data_model/Data.py
+++ b/desidlas/data_model/Data.py
@@ -2,8 +2,8 @@
 
 from abc import ABCMeta
 
-from dla_cnn.data_model import Id
-from dla_cnn.data_model import Sightline
+from desidlas.data_model import Id
+from desidlas.data_model import Sightline
 
 
 class Data(object):

From f80a11809064c50b5eb203b0f906dea8ff547d93 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 1 Nov 2021 20:55:48 +0800
Subject: [PATCH 09/24] Update get_dataset.py

---
 desidlas/datasets/get_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/desidlas/datasets/get_dataset.py b/desidlas/datasets/get_dataset.py
index d4b8f55..a0bf580 100644
--- a/desidlas/datasets/get_dataset.py
+++ b/desidlas/datasets/get_dataset.py
@@ -96,7 +96,7 @@ def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANG
             col_density=np.hstack([data_split[3]])
             lam=np.vstack([data_split[4]])
             flux_matrix=smooth_flux(flux)
-            dataset[sightline.id]={'FLUXMATRIX':flux_matrix,'lam':lam,'labels_classifier':  labels_classifier, 'labels_offset':labels_offset , 'col_density': col_density,'wavelength_dlas':wavelength_dlas,'coldensity_dlas':coldensity_dlas} 
+            dataset[sightline.id]={'FLUX':flux_matrix,'lam':lam,'labels_classifier':  labels_classifier, 'labels_offset':labels_offset , 'col_density': col_density,'wavelength_dlas':wavelength_dlas,'coldensity_dlas':coldensity_dlas} 
         else:
             sample_masks=select_samples_50p_pos_neg(sightline,kernel=kernel)
             if sample_masks !=[]:
@@ -105,7 +105,7 @@ def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANG
                 labels_offset=np.hstack([data_split[2][m] for m in sample_masks])
                 col_density=np.hstack([data_split[3][m] for m in sample_masks])
                 flux_matrix=smooth_flux(flux)
-                dataset[sightline.id]={'FLUXMATRIX':flux_matrix,'labels_classifier':labels_classifier,'labels_offset':labels_offset,'col_density': col_density}
+                dataset[sightline.id]={'FLUX':flux_matrix,'labels_classifier':labels_classifier,'labels_offset':labels_offset,'col_density': col_density}
     np.save(output,dataset)
     return dataset
 

From 80b49452382667e7e18267e3408de84cc737da02 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Thu, 4 Nov 2021 22:37:49 +0800
Subject: [PATCH 10/24] Update installing.rst

---
 docs/installing.rst | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/docs/installing.rst b/docs/installing.rst
index 877fd76..8821f15 100644
--- a/docs/installing.rst
+++ b/docs/installing.rst
@@ -83,20 +83,18 @@ Do these for docs::
 Get The Model File
 ==============
 
- The model files are too large to upload to github, you can find the model files for high S/N spectra here:
+ The model files are too large to upload to github, you can find the model files here:
 
- https://drive.google.com/drive/folders/1DYOE_k9S_F0JmnAdFbTmHkVqyxFlc4t-?usp=sharing
-
- The model files are too large to upload to github, you can find the model files for low S/N spectra here : 
-
- https://drive.google.com/drive/folders/1s5km1NAg5j0Y-tWI1q58Y09hjj0Jjc8C?usp=sharing
+ https://drive.google.com/drive/folders/1Cl07CuRBE9ljtvIoTWexEVNSd8Zzwyvg?usp=sharing
+ 
+ The folders are different models for different S/N spectra. (high: >6. mid:3-6. low:<3)
  
 Test CNN
 ==============
 
- When you finish the installing and want to test the CNN model (training and prediction), you can firstly download all the model files here:
+ When you finish the installing and want to test the CNN model (training and prediction), you can firstly download all the model files here(same link as above):
  
- https://drive.google.com/drive/folders/1Cl07CuRBE9ljtvIoTWexEVNSd8Zzwyvg?usp=sharing
+  https://drive.google.com/drive/folders/1Cl07CuRBE9ljtvIoTWexEVNSd8Zzwyvg?usp=sharing
  
  And then add the environmental variable CNN_MODEL as the path to the model files like this:
  

From b82d8c3a64275a977ef6ada956ed78cb36e5c8a7 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Sun, 7 Nov 2021 20:52:18 +0800
Subject: [PATCH 11/24] Update training.py

---
 desidlas/training/training.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/desidlas/training/training.py b/desidlas/training/training.py
index 3923ac5..a06ee8e 100644
--- a/desidlas/training/training.py
+++ b/desidlas/training/training.py
@@ -140,7 +140,7 @@ def train_ann_test_batch(sess, ixs, data, summary_writer=None):
 
 
 
-def train_ann(hyperparameters, train_dataset, test_dataset, save_filename=None, load_filename=None, tblogs = "../tmp/tblogs", TF_DEVICE='/gpu:1'):
+def train_ann(hyperparameters, train_dataset, test_dataset, INPUT_SIZE,matrix_size,,save_filename=None, load_filename=None, tblogs = "../tmp/tblogs", TF_DEVICE='/gpu:1'):
     """
     Perform training
 
@@ -352,7 +352,7 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse)
 
     #start the training
     (best_accuracy, last_accuracy, last_objective, best_offset_rmse, last_offset_rmse, best_coldensity_rmse,
-    last_coldensity_rmse) = train_ann(hyperparameters, train_dataset, test_dataset,
+    last_coldensity_rmse) = train_ann(hyperparameters, train_dataset, test_dataset,INPUT_SIZE,matrix_size,
                                     save_filename=checkpoint_filename, load_filename=args['loadmodel'])
 
     

From 67306329551bbf36bcfe96b8613298eb828c31b5 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Sun, 7 Nov 2021 21:03:27 +0800
Subject: [PATCH 12/24] Update training.py

---
 desidlas/training/training.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/desidlas/training/training.py b/desidlas/training/training.py
index a06ee8e..84dec3b 100644
--- a/desidlas/training/training.py
+++ b/desidlas/training/training.py
@@ -11,6 +11,7 @@
 import tensorflow as tf
 import os
 from pathlib import Path
+from pkg_resources import resource_filename
 
 from tensorflow.python.framework import ops
 ops.reset_default_graph()

From ab3638a01037206f64740aec57f8f6426a38beee Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 8 Nov 2021 14:35:48 +0800
Subject: [PATCH 13/24] Update training.py

---
 desidlas/training/training.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/desidlas/training/training.py b/desidlas/training/training.py
index 84dec3b..3db3bf8 100644
--- a/desidlas/training/training.py
+++ b/desidlas/training/training.py
@@ -297,6 +297,8 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse)
     # Execute batch mode
     #
     from desidlas.data_model.Dataset import Dataset
+    from desidlas.training.parameterset import parameter_names
+    from desidlas.training.parameterset import parameters
     
     datafile_path = os.path.join(resource_filename('desidlas', 'tests'), 'datafile')
     traindata_path=os.path.join(datafile_path, 'sightlines-16-1375.npy')
@@ -339,10 +341,6 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse)
     os.remove(batch_results_file) if os.path.exists(batch_results_file) else None
     with open(batch_results_file, "a") as csvoutput:
         csvoutput.write("iteration_num,normalized_score,best_accuracy,last_accuracy,last_objective,best_offset_rmse,last_offset_rmse,best_coldensity_rmse,last_coldensity_rmse," + ",".join(parameter_names) + "\n")
-
-
-    from desidlas.training.parameterset import parameter_names
-    from desidlas.training.parameterset import parameters
     
     #hyperparameter search
    

From 146649c00159c93127df698f529de0df45790385 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 8 Nov 2021 20:36:00 +0800
Subject: [PATCH 14/24] Update get_partprediction.py

---
 desidlas/prediction/get_partprediction.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py
index 5df7b88..37b7c15 100644
--- a/desidlas/prediction/get_partprediction.py
+++ b/desidlas/prediction/get_partprediction.py
@@ -133,8 +133,14 @@ def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_fil
             hyperparameters[parameter_names[k]] = parameters[k][1]
     if modelfile == 'mid':
         checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999'
+        for k in range(0,len(parameter_names)):
+            hyperparameters[parameter_names[k]] = parameters[k][1]
+    if modelfile == 'low':
+        checkpoint_filename='desidlas/prediction/model/train_lowsnr/current_99999'
         for k in range(0,len(parameter_names)):
             hyperparameters[parameter_names[k]] = parameters[k][0]
+            INPUT_SIZE = 600
+            matrix_size = 4
     
 
     dataset={}

From cdfe4d3ffd76704b1e1aaa22f93e3a77c3f528c7 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Thu, 11 Nov 2021 19:59:49 +0800
Subject: [PATCH 15/24] Update get_partprediction.py

---
 desidlas/prediction/get_partprediction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py
index 37b7c15..be3e3d2 100644
--- a/desidlas/prediction/get_partprediction.py
+++ b/desidlas/prediction/get_partprediction.py
@@ -134,7 +134,7 @@ def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_fil
     if modelfile == 'mid':
         checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999'
         for k in range(0,len(parameter_names)):
-            hyperparameters[parameter_names[k]] = parameters[k][1]
+            hyperparameters[parameter_names[k]] = parameters[k][0]
     if modelfile == 'low':
         checkpoint_filename='desidlas/prediction/model/train_lowsnr/current_99999'
         for k in range(0,len(parameter_names)):

From da9bd57f681bb1099442a46b49ef997f7d83a714 Mon Sep 17 00:00:00 2001
From: Jiaqi Zou <zoujq20@mails.tsinghua.edu.cn>
Date: Fri, 12 Nov 2021 16:46:14 +0800
Subject: [PATCH 16/24] Update Sightline.py

---
 desidlas/data_model/Sightline.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/desidlas/data_model/Sightline.py b/desidlas/data_model/Sightline.py
index 47813b0..fc69380 100644
--- a/desidlas/data_model/Sightline.py
+++ b/desidlas/data_model/Sightline.py
@@ -1,5 +1,6 @@
 import numpy as np
 from desidlas.dla_cnn.spectra_utils import get_lam_data
+from desidlas.datasets.datasetting import split_sightline_into_samples
 
 class Sightline(object):
 
@@ -77,17 +78,16 @@ def is_lyb(self, peakix):
         """
         assert self.prediction is not None and peakix in self.prediction.peaks_ixs
 
-        lam, lam_rest, ix_dla_range = get_lam_data(self.loglam, self.z_qso)
-        kernelrangepx = 200
-        cut=((np.nonzero(ix_dla_range)[0])>=kernelrangepx)&((np.nonzero(ix_dla_range)[0])<=(len(lam)-kernelrangepx-1))   
-        lam_analyse=lam[ix_dla_range][cut]
+        data_split=split_sightline_into_samples(self)
+        lam_analyse=data_split[5]
+        
         lambda_higher = (lam_analyse[peakix]) / (1025.722/1215.67)#找这个peak对应的dla
 
         # An array of how close each peak is to beign the ly-b of peakix in spectrum reference frame
         peak_difference_spectrum = np.abs(lam_analyse[self.prediction.peaks_ixs] - lambda_higher)
-        nearest_peak_ix = np.argmin(peak_difference_spectrum)#找距离这个dla最近的peak
+        nearest_peak_ix = np.argmin(peak_difference_spectrum)
 
-        # get the column density of the identfied nearest peak算这两个的nhi
+        # get the column density of the identfied nearest peak
         _, potential_lya_nhi, _, _ = \
             self.prediction.get_coldensity_for_peak(self.prediction.peaks_ixs[nearest_peak_ix])
         _, potential_lyb_nhi, _, _ = \
@@ -95,10 +95,10 @@ def is_lyb(self, peakix):
 
         # Validations: check that the nearest peak is close enough to match
         #              sanity check that the LyB is at least 0.3 less than the DLA
-        is_nearest_peak_within_range = peak_difference_spectrum[nearest_peak_ix] <= 15#两者距离小于15
-        is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3#nhi差距0.3以上？
+        is_nearest_peak_within_range = peak_difference_spectrum[nearest_peak_ix] <= 15
+        is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3
 
-        return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity#true为lyb，false为lya
+        return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity#true lyb,false lya
 
 
     def get_lyb_index(self, peakix):

From 7e7ca053f9de5164d7385c206fe9ec45a7d69811 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Mon, 22 Nov 2021 22:05:14 +0800
Subject: [PATCH 17/24] Update training_prediction.ipynb

---
 desidlas/notebook/training_prediction.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/desidlas/notebook/training_prediction.ipynb b/desidlas/notebook/training_prediction.ipynb
index ebe28fe..8dfb403 100644
--- a/desidlas/notebook/training_prediction.ipynb
+++ b/desidlas/notebook/training_prediction.ipynb
@@ -67,7 +67,7 @@
    "source": [
     "#the codes used for detect DLAs is in desidlas/prediction/get_partprediction.py\n",
     "#to get the prediction for every part(400 or 600 pixels) , all you need is to run:\n",
-    "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -m high\n",
+    "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -model high\n",
     "\n",
     "\n",
     "# -p : path to the dataset used to detect DLAs\n",

From adf75f43d7ff2cf80ebb92dbec3856a43c3b1790 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Tue, 14 Dec 2021 16:07:19 +0800
Subject: [PATCH 18/24] Create _init_.py

---
 desidlas/prediction/model/train_lowsnr/_init_.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 desidlas/prediction/model/train_lowsnr/_init_.py

diff --git a/desidlas/prediction/model/train_lowsnr/_init_.py b/desidlas/prediction/model/train_lowsnr/_init_.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/desidlas/prediction/model/train_lowsnr/_init_.py
@@ -0,0 +1 @@
+

From 1b4ba17af26849cc4cba559c7992c00a869f44fe Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Tue, 14 Dec 2021 16:21:00 +0800
Subject: [PATCH 19/24] Create lowsnrmodel

---
 desidlas/prediction/model/train_lowsnr/lowsnrmodel | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 desidlas/prediction/model/train_lowsnr/lowsnrmodel

diff --git a/desidlas/prediction/model/train_lowsnr/lowsnrmodel b/desidlas/prediction/model/train_lowsnr/lowsnrmodel
new file mode 100644
index 0000000..bd745fe
--- /dev/null
+++ b/desidlas/prediction/model/train_lowsnr/lowsnrmodel
@@ -0,0 +1,2 @@
+The model files are too large to upload to github, you can find the model files here : 
+https://drive.google.com/drive/folders/15iX-R0o2HmUeLGBKPHjT94xuqaI2tJHr?usp=sharing

From 7c84b8cc1754b45e57300e9862654a476951e1ea Mon Sep 17 00:00:00 2001
From: Jiaqi Zou <zoujq20@mails.tsinghua.edu.cn>
Date: Tue, 14 Dec 2021 21:20:08 +0800
Subject: [PATCH 20/24] Update datasetting.py

---
 desidlas/datasets/datasetting.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/desidlas/datasets/datasetting.py b/desidlas/datasets/datasetting.py
index 42cbe96..e2247b5 100644
--- a/desidlas/datasets/datasetting.py
+++ b/desidlas/datasets/datasetting.py
@@ -16,7 +16,7 @@
 from desidlas.dla_cnn.spectra_utils import get_lam_data
 from desidlas.dla_cnn.defs import REST_RANGE,kernel,best_v
     
-def pad_sightline(sightline, lam, lam_rest, ix_dla_range,kernelrangepx,v=best_v['b']):
+def pad_sightline(sightline, lam, lam_rest, ix_dla_range,kernelrangepx,v=best_v['all']):
     """
     padding the left and right sides of the spectra
 
@@ -60,7 +60,7 @@ def pad_sightline(sightline, lam, lam_rest, ix_dla_range,kernelrangepx,v=best_v[
     lam_padded = np.hstack((pad_lam_left,lam,pad_lam_right))
     return flux_padded,lam_padded,pixel_num_left
 
-def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel):
+def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel,v=best_v['all']):
     """
     Split the sightline into a series of snippets, each with length kernel
 
@@ -78,7 +78,7 @@ def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel
     kernelrangepx = int(kernel/2) #200
     
     #padding the sightline:
-    flux_padded,lam_padded,pixel_num_left=pad_sightline(sightline,lam,lam_rest,ix_dla_range,kernelrangepx,v=best_v['b'])
+    flux_padded,lam_padded,pixel_num_left=pad_sightline(sightline,lam,lam_rest,ix_dla_range,kernelrangepx,v=v)
      
     
     

From 4f481db1c8aa3696279ad35fb05b999a57d1fda1 Mon Sep 17 00:00:00 2001
From: Jiaqi Zou <zoujq20@mails.tsinghua.edu.cn>
Date: Tue, 14 Dec 2021 21:24:44 +0800
Subject: [PATCH 21/24] Update get_dataset.py

add v
---
 desidlas/datasets/get_dataset.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/desidlas/datasets/get_dataset.py b/desidlas/datasets/get_dataset.py
index a0bf580..f4e0824 100644
--- a/desidlas/datasets/get_dataset.py
+++ b/desidlas/datasets/get_dataset.py
@@ -7,8 +7,9 @@
 REST_RANGE = defs.REST_RANGE
 kernel = defs.kernel
 smooth_kernel= defs.smooth_kernel
+best_v = defs.best_v
 
-def make_datasets(sightlines,validate=True,kernel=kernel, REST_RANGE=REST_RANGE, output=None):
+def make_datasets(sightlines, kernel=kernel, REST_RANGE=REST_RANGE, v=best_v['all'],output=None, validate=True):
     """
     Generate training set or validation set for DESI.
     
@@ -28,7 +29,7 @@ def make_datasets(sightlines,validate=True,kernel=kernel, REST_RANGE=REST_RANGE,
         wavelength_dlas=[dla.central_wavelength for dla in sightline.dlas]
         coldensity_dlas=[dla.col_density for dla in sightline.dlas]   
         label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE)
-        data_split=split_sightline_into_samples(sightline,REST_RANGE=REST_RANGE, kernel=kernel)
+        data_split=split_sightline_into_samples(sightline,REST_RANGE=REST_RANGE, kernel=kernel,v=v)
         if validate:
             flux=np.vstack([data_split[0]])
             labels_classifier=np.hstack([data_split[1]])
@@ -38,7 +39,7 @@ def make_datasets(sightlines,validate=True,kernel=kernel, REST_RANGE=REST_RANGE,
             dataset[sightline.id]={'FLUX':flux,'lam':lam,'labels_classifier':  labels_classifier, 'labels_offset':labels_offset , 'col_density': col_density,'wavelength_dlas':wavelength_dlas,'coldensity_dlas':coldensity_dlas} 
         else:
             sample_masks=select_samples_50p_pos_neg(sightline, kernel=kernel)
-            if len(sample_masks) >0:
+            if sample_masks !=[]:
                 flux=np.vstack([data_split[0][m] for m in sample_masks])
                 labels_classifier=np.hstack([data_split[1][m] for m in sample_masks])
                 labels_offset=np.hstack([data_split[2][m] for m in sample_masks])
@@ -69,7 +70,7 @@ def smooth_flux(flux):
     return flux_matrix
 
 #smooth flux for low S/N sightlines
-def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANGE=REST_RANGE, output=None):
+def make_smoothdatasets(sightlines,kernel=smooth_kernel, REST_RANGE=REST_RANGE, v=best_v['all'], output=None, validate=True):
     """
     Generate smoothed training set or validation set for DESI.
     
@@ -88,7 +89,7 @@ def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANG
         wavelength_dlas=[dla.central_wavelength for dla in sightline.dlas]
         coldensity_dlas=[dla.col_density for dla in sightline.dlas]   
         label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE)
-        data_split=split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel)
+        data_split=split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel,v=v)
         if validate:
             flux=np.vstack([data_split[0]])
             labels_classifier=np.hstack([data_split[1]])

From 7be6ef3cd48b9d51367fcf6b59a403c87e9d1431 Mon Sep 17 00:00:00 2001
From: Jiaqi Zou <zoujq20@mails.tsinghua.edu.cn>
Date: Tue, 14 Dec 2021 21:32:11 +0800
Subject: [PATCH 22/24] Update preprocess.py

---
 desidlas/datasets/preprocess.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/desidlas/datasets/preprocess.py b/desidlas/datasets/preprocess.py
index d8fe688..d18e2f6 100644
--- a/desidlas/datasets/preprocess.py
+++ b/desidlas/datasets/preprocess.py
@@ -112,7 +112,6 @@ def rebin(sightline, v):
     -------
     :class:`dla_cnn.data_model.Sightline.Sightline`:
     """
-    # TODO -- Add inline comments
     c = 2.9979246e8
     
     # Set a constant dispersion
@@ -209,8 +208,9 @@ def normalize(sightline, full_wavelength, full_flux):
     assert blue_limit <= red_limit,"No Lymann-alpha forest, Please check this spectra: %i"%sightline.id#when no lymann alpha forest exists, assert error.
     #use the slice we chose above to normalize this spectra, normalize both flux and error array using the same factor to maintain the s/n.
     good_pix = (rest_wavelength>=blue_limit)&(rest_wavelength<=red_limit)
-    sightline.flux = sightline.flux/np.median(full_flux[good_pix])
-    sightline.error = sightline.error/np.median(full_flux[good_pix])
+    normalizer=np.abs(np.nanmedian(full_flux[good_pix]))
+    sightline.flux = sightline.flux/normalizer
+    sightline.error = sightline.error/normalizer
     
 def estimate_s2n(sightline):
     """
@@ -237,9 +237,9 @@ def estimate_s2n(sightline):
     #for dla in sightline.dlas:
         #test = test&((wavelength>dla.central_wavelength+delta)|(wavelength<dla.central_wavelength-delta))
     #assert np.sum(test)>0, "this sightline doesn't contain lymann forest, sightline id: %i"%sightline.id
-    s2n = sightline.flux/sightline.error
+    s2n = np.abs(sightline.flux/sightline.error)
     #return s/n
-    return np.median(s2n[test])
+    return np.nanmedian(s2n[test])
 
 def generate_summary_table(sightlines, output_dir, mode = "w"):
     """

From 16fc65fb0d8a6fc77cc923de2c905ab782be3189 Mon Sep 17 00:00:00 2001
From: benwang <56830442+samwang141224@users.noreply.github.com>
Date: Fri, 17 Dec 2021 23:48:17 +0800
Subject: [PATCH 23/24] Update training.py

---
 desidlas/training/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/desidlas/training/training.py b/desidlas/training/training.py
index 3db3bf8..e62cbd7 100644
--- a/desidlas/training/training.py
+++ b/desidlas/training/training.py
@@ -141,7 +141,7 @@ def train_ann_test_batch(sess, ixs, data, summary_writer=None):
 
 
 
-def train_ann(hyperparameters, train_dataset, test_dataset, INPUT_SIZE,matrix_size,,save_filename=None, load_filename=None, tblogs = "../tmp/tblogs", TF_DEVICE='/gpu:1'):
+def train_ann(hyperparameters, train_dataset, test_dataset, INPUT_SIZE,matrix_size,save_filename=None,load_filename=None,tblogs = "../tmp/tblogs",TF_DEVICE='/gpu:1'):
     """
     Perform training
 

From 3988f25cb83f3fa57f02ce1cfa7e1a0fb9aa6d19 Mon Sep 17 00:00:00 2001
From: Jiaqi Zou <zoujq20@mails.tsinghua.edu.cn>
Date: Tue, 21 Dec 2021 19:03:55 +0800
Subject: [PATCH 24/24] Update get_sightlines.py

---
 desidlas/datasets/get_sightlines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/desidlas/datasets/get_sightlines.py b/desidlas/datasets/get_sightlines.py
index 5c09dc6..64bd794 100644
--- a/desidlas/datasets/get_sightlines.py
+++ b/desidlas/datasets/get_sightlines.py
@@ -35,7 +35,7 @@ def get_sightlines(spectra,truth,zbest,outpath):
                 sightline.flux = sightline.flux[0:sightline.split_point_br]
                 sightline.error = sightline.error[0:sightline.split_point_br]
                 sightline.loglam = sightline.loglam[0:sightline.split_point_br]
-                rebin(sightline, best_v['b'])
+                rebin(sightline, best_v['all'])
                 sightlines.append(sightline)
                 
     np.save(outpath,sightlines)