From 8c9e1b3415c73beccbc98f666b13d1c54f3d46f8 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 1 Nov 2021 14:20:42 +0800 Subject: [PATCH 01/24] Update model.py --- desidlas/training/model.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/desidlas/training/model.py b/desidlas/training/model.py index 5aea498..e3c5fea 100644 --- a/desidlas/training/model.py +++ b/desidlas/training/model.py @@ -173,7 +173,10 @@ def build_model(hyperparameters,INPUT_SIZE,matrix_size): #tf.compat.v1.placeholder:claim a tensor that needs to be filled (the data type, shape and name) #x: the empty tensor need to be filled with the input data - x = tf.compat.v1.placeholder(tf.float32, shape=[None,matrix_size, INPUT_SIZE], name='x') + if matrix_size == 1: + x = tf.compat.v1.placeholder(tf.float32, shape=[None,INPUT_SIZE], name='x') + if matrix_size == 4: + x = tf.compat.v1.placeholder(tf.float32, shape=[None,matrix_size, INPUT_SIZE], name='x') #claim the tensor for three labels @@ -189,7 +192,7 @@ def build_model(hyperparameters,INPUT_SIZE,matrix_size): # Stride (4,1) # number of filters = 4 (features?) # Neuron activation = ReLU (rectified linear unit) - W_conv1 = weight_variable([conv1_kernel, 1, 4, conv1_filters]) + W_conv1 = weight_variable([conv1_kernel, 1, matrix_size, conv1_filters]) b_conv1 = bias_variable([conv1_filters]) # https://www.tensorflow.org/versions/r0.10/api_docs/python/nn.html#convolution From 795d5d926aac92fc90ca5c4b71ec74bbf63c5f81 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 1 Nov 2021 14:28:41 +0800 Subject: [PATCH 02/24] Update parameterset.py --- desidlas/training/parameterset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/desidlas/training/parameterset.py b/desidlas/training/parameterset.py index 4badb15..caf0aab 100644 --- a/desidlas/training/parameterset.py +++ b/desidlas/training/parameterset.py @@ -19,7 +19,7 @@ # batch_size [400,700, 400, 500, 600, 700, 850, 1000], # l2_regularization_penalty - [0.005, 0.01, 0.008, 0.005, 0.003], + [0.005,0.005, 0.01, 0.008, 0.005, 0.003], # dropout_keep_prob [0.9,0.98, 0.75, 0.9, 0.95, 0.98, 1], # fc1_n_neurons @@ -29,7 +29,7 @@ # fc2_2_n_neurons [500,350, 200, 350, 500, 700, 900, 1500], # fc2_3_n_neurons - [150, 200, 350, 500, 700, 900, 1500], + [150,150, 200, 350, 500, 700, 900, 1500], # conv1_kernel [40,32, 20, 22, 24, 26, 28, 32, 40, 48, 54], # conv2_kernel @@ -37,7 +37,7 @@ # conv3_kernel [20,16, 10, 14, 16, 20, 24, 28, 32, 34], # conv1_filters - [100, 64, 80, 90, 100, 110, 120, 140, 160, 200], + [100,100, 64, 80, 90, 100, 110, 120, 140, 160, 200], # conv2_filters [256,96, 80, 96, 128, 192, 256], # conv3_filters @@ -49,11 +49,11 @@ # conv3_stride [1,1, 1, 2, 3, 4, 5, 6], # pool1_kernel - [7, 3, 4, 5, 6, 7, 8, 9], + [7,7, 3, 4, 5, 6, 7, 8, 9], # pool2_kernel [4,6, 4, 5, 6, 7, 8, 9, 10], # pool3_kernel - [6, 4, 5, 6, 7, 8, 9, 10], + [6,6, 4, 5, 6, 7, 8, 9, 10], # pool1_stride [1,4, 1, 2, 4, 5, 6], # pool2_stride From 7d451e3482ce3a8fe298008db956337109c34aa5 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 1 Nov 2021 14:33:21 +0800 Subject: [PATCH 03/24] Update get_partprediction.py --- desidlas/prediction/get_partprediction.py | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py index afc71b0..e7770af 100644 --- a/desidlas/prediction/get_partprediction.py +++ b/desidlas/prediction/get_partprediction.py @@ -40,12 +40,14 @@ def t(tensor_name): -def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''): +def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_filename, TF_DEVICE=''): ''' Perform training Parameters ---------- hyperparameters:hyperparameters for the CNN model structure + INPUT_SIZE: pixels numbers for each window , 400 for high SNR and 600 for low SNR + matrix_size: 1 if without smoothing, 4 if smoothing for low SNR flux:list (400 or 600 length), flux from sightline checkpoint_filename: CNN model file used to detect DLAs TF_DEVICE: use which gpu to train, default is '/gpu:1' @@ -69,7 +71,7 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''): with tf.Graph().as_default(): - build_model(hyperparameters) # build the CNN model according to hyperparameters + build_model(hyperparameters,INPUT_SIZE,matrix_size) # build the CNN model according to hyperparameters with tf.device(TF_DEVICE), tf.compat.v1.Session() as sess: tf.compat.v1.train.Saver().restore(sess, checkpoint_filename+".ckpt") #load model files @@ -96,12 +98,17 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''): parser = argparse.ArgumentParser() parser.add_argument('-p', '--preddataset', help='Datasets to detect DLAs , npy format', required=True, default=False) parser.add_argument('-o', '--output_file', help='output files to save the prediction result, npy format', required=False, default=False) - parser.add_argument('-m', '--modelfiles', help='CNN models for prediction, high snr model or mid snr model', required=False, default=False) + parser.add_argument('-model', '--modelfiles', help='CNN models for prediction, high snr model or mid snr model', required=False, default=False) + parser.add_argument('-t', '--INPUT_SIZE', help='set the input data size', required=False, default=400) + parser.add_argument('-m', '--matrix_size', help='set the matrix size when using smooth', required=False, default=1) + args = vars(parser.parse_args()) - RUN_SINGLE_ITERATION = not args['hyperparamsearch'] - checkpoint_filename = args['checkpoint_file'] if RUN_SINGLE_ITERATION else None + batch_results_file = args['output_file'] + INPUT_SIZE = args['INPUT_SIZE'] + matrix_size = args['matrix_size'] + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG) @@ -111,8 +118,7 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''): from desidlas.training.parameterset import parameter_names from desidlas.training.parameterset import parameters hyperparameters = {} - for k in range(0,len(parameter_names)): - hyperparameters[parameter_names[k]] = parameters[k][0] + pred_dataset=args['preddataset'] @@ -123,8 +129,12 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''): modelfile=args['modelfiles'] if modelfile == 'high': checkpoint_filename='desidlas/prediction/model/train_highsnr/current_99999' + for k in range(0,len(parameter_names)): + hyperparameters[parameter_names[k]] = parameters[k][1] if modelfile == 'mid': checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999' + for k in range(0,len(parameter_names)): + hyperparameters[parameter_names[k]] = parameters[k][1] dataset={} @@ -140,7 +150,7 @@ def predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE=''): flux=np.array(r[sight_id]['FLUX']) - (pred, conf, offset, coldensity)=predictions_ann(hyperparameters, flux, checkpoint_filename, TF_DEVICE='') + (pred, conf, offset, coldensity)=predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_filename, TF_DEVICE='') dataset[sight_id]={'pred':pred,'conf':conf,'offset': offset, 'coldensity':coldensity } From d0bde2c447f498416447a64f732b4911f00b2300 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 1 Nov 2021 14:34:03 +0800 Subject: [PATCH 04/24] Update training.py --- desidlas/training/training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/desidlas/training/training.py b/desidlas/training/training.py index 99aac8e..3923ac5 100644 --- a/desidlas/training/training.py +++ b/desidlas/training/training.py @@ -314,8 +314,8 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse) parser.add_argument('-c', '--checkpoint_file', help='Name of the checkpoint file to save (without file extension)', required=False, default=savemodel_path) #../models/training/current parser.add_argument('-r', '--train_dataset_filename', help='File name of the training dataset without extension', required=False, default=traindata_path) parser.add_argument('-e', '--test_dataset_filename', help='File name of the testing dataset without extension', required=False, default=testdata_path) - parser.add_argument('-t', '--INPUT_SIZE', help='set the input data size', required=False, default=600) - parser.add_argument('-m', '--matrix_size', help='set the matrix size when using smooth', required=False, default=4) + parser.add_argument('-t', '--INPUT_SIZE', help='set the input data size', required=False, default=400) + parser.add_argument('-m', '--matrix_size', help='set the matrix size when using smooth', required=False, default=1) args = vars(parser.parse_args()) RUN_SINGLE_ITERATION = not args['hyperparamsearch'] From 0f8056e0b5d88c5d8b9f7728ca36a42f31b656cb Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 1 Nov 2021 14:42:44 +0800 Subject: [PATCH 05/24] Delete training_prediction.ipynb --- docs/notebook/training_prediction.ipynb | 151 ------------------------ 1 file changed, 151 deletions(-) delete mode 100644 docs/notebook/training_prediction.ipynb diff --git a/docs/notebook/training_prediction.ipynb b/docs/notebook/training_prediction.ipynb deleted file mode 100644 index ebe28fe..0000000 --- a/docs/notebook/training_prediction.ipynb +++ /dev/null @@ -1,151 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#This notebook is about how to do the training and prediction" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# training" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#the codes used for training is in desidlas/training/training.py\n", - "#for training the model, all you need is to run:\n", - "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' \n", - "\n", - "#1000000:training iterations\n", - "# -r : path to the training dataset\n", - "# -e : path to the testing dataset\n", - "# -c : path to save the model file , paht+'/current'\n", - "#result : print training accuracy every 200 steps , print test accuracy every 5000 steps (classification accuracy)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#for low SNR training, we used smoothing method, so the command is\n", - "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' -t 600 -m 4\n", - "# -t : pixel numbers of each window\n", - "#" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prediction" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#the codes used for detect DLAs is in desidlas/prediction/get_partprediction.py\n", - "#to get the prediction for every part(400 or 600 pixels) , all you need is to run:\n", - "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -m high\n", - "\n", - "\n", - "# -p : path to the dataset used to detect DLAs\n", - "# -o : path to the output file\n", - "# -m : high or mid , which CNN model is used\n", - "#we recommand to use high model for snr>6 spectra and mid model for 2.5 Date: Mon, 1 Nov 2021 14:43:24 +0800 Subject: [PATCH 06/24] Add files via upload --- docs/notebook/training_prediction.ipynb | 151 ++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 docs/notebook/training_prediction.ipynb diff --git a/docs/notebook/training_prediction.ipynb b/docs/notebook/training_prediction.ipynb new file mode 100644 index 0000000..8dfb403 --- /dev/null +++ b/docs/notebook/training_prediction.ipynb @@ -0,0 +1,151 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#This notebook is about how to do the training and prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#the codes used for training is in desidlas/training/training.py\n", + "#for training the model, all you need is to run:\n", + "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' \n", + "\n", + "#1000000:training iterations\n", + "# -r : path to the training dataset\n", + "# -e : path to the testing dataset\n", + "# -c : path to save the model file , paht+'/current'\n", + "#result : print training accuracy every 200 steps , print test accuracy every 5000 steps (classification accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#for low SNR training, we used smoothing method, so the command is\n", + "python training.py -i 1000000 -r 'traingset.npy' -e 'testset.npy' -c 'trainingmode/current' -t 600 -m 4\n", + "# -t : pixel numbers of each window\n", + "#" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#the codes used for detect DLAs is in desidlas/prediction/get_partprediction.py\n", + "#to get the prediction for every part(400 or 600 pixels) , all you need is to run:\n", + "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -model high\n", + "\n", + "\n", + "# -p : path to the dataset used to detect DLAs\n", + "# -o : path to the output file\n", + "# -m : high or mid , which CNN model is used\n", + "#we recommand to use high model for snr>6 spectra and mid model for 2.5 Date: Mon, 1 Nov 2021 14:46:06 +0800 Subject: [PATCH 07/24] Update get_partprediction.py --- desidlas/prediction/get_partprediction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py index e7770af..5df7b88 100644 --- a/desidlas/prediction/get_partprediction.py +++ b/desidlas/prediction/get_partprediction.py @@ -134,7 +134,7 @@ def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_fil if modelfile == 'mid': checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999' for k in range(0,len(parameter_names)): - hyperparameters[parameter_names[k]] = parameters[k][1] + hyperparameters[parameter_names[k]] = parameters[k][0] dataset={} From bf5e74501897c62d7987cf2ab3d1ad1e403edf6a Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 1 Nov 2021 20:49:34 +0800 Subject: [PATCH 08/24] Update Data.py --- desidlas/data_model/Data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/desidlas/data_model/Data.py b/desidlas/data_model/Data.py index d3cc8f5..50640f7 100644 --- a/desidlas/data_model/Data.py +++ b/desidlas/data_model/Data.py @@ -2,8 +2,8 @@ from abc import ABCMeta -from dla_cnn.data_model import Id -from dla_cnn.data_model import Sightline +from desidlas.data_model import Id +from desidlas.data_model import Sightline class Data(object): From f80a11809064c50b5eb203b0f906dea8ff547d93 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 1 Nov 2021 20:55:48 +0800 Subject: [PATCH 09/24] Update get_dataset.py --- desidlas/datasets/get_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/desidlas/datasets/get_dataset.py b/desidlas/datasets/get_dataset.py index d4b8f55..a0bf580 100644 --- a/desidlas/datasets/get_dataset.py +++ b/desidlas/datasets/get_dataset.py @@ -96,7 +96,7 @@ def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANG col_density=np.hstack([data_split[3]]) lam=np.vstack([data_split[4]]) flux_matrix=smooth_flux(flux) - dataset[sightline.id]={'FLUXMATRIX':flux_matrix,'lam':lam,'labels_classifier': labels_classifier, 'labels_offset':labels_offset , 'col_density': col_density,'wavelength_dlas':wavelength_dlas,'coldensity_dlas':coldensity_dlas} + dataset[sightline.id]={'FLUX':flux_matrix,'lam':lam,'labels_classifier': labels_classifier, 'labels_offset':labels_offset , 'col_density': col_density,'wavelength_dlas':wavelength_dlas,'coldensity_dlas':coldensity_dlas} else: sample_masks=select_samples_50p_pos_neg(sightline,kernel=kernel) if sample_masks !=[]: @@ -105,7 +105,7 @@ def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANG labels_offset=np.hstack([data_split[2][m] for m in sample_masks]) col_density=np.hstack([data_split[3][m] for m in sample_masks]) flux_matrix=smooth_flux(flux) - dataset[sightline.id]={'FLUXMATRIX':flux_matrix,'labels_classifier':labels_classifier,'labels_offset':labels_offset,'col_density': col_density} + dataset[sightline.id]={'FLUX':flux_matrix,'labels_classifier':labels_classifier,'labels_offset':labels_offset,'col_density': col_density} np.save(output,dataset) return dataset From 80b49452382667e7e18267e3408de84cc737da02 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Thu, 4 Nov 2021 22:37:49 +0800 Subject: [PATCH 10/24] Update installing.rst --- docs/installing.rst | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/docs/installing.rst b/docs/installing.rst index 877fd76..8821f15 100644 --- a/docs/installing.rst +++ b/docs/installing.rst @@ -83,20 +83,18 @@ Do these for docs:: Get The Model File ============== - The model files are too large to upload to github, you can find the model files for high S/N spectra here: + The model files are too large to upload to github, you can find the model files here: - https://drive.google.com/drive/folders/1DYOE_k9S_F0JmnAdFbTmHkVqyxFlc4t-?usp=sharing - - The model files are too large to upload to github, you can find the model files for low S/N spectra here : - - https://drive.google.com/drive/folders/1s5km1NAg5j0Y-tWI1q58Y09hjj0Jjc8C?usp=sharing + https://drive.google.com/drive/folders/1Cl07CuRBE9ljtvIoTWexEVNSd8Zzwyvg?usp=sharing + + The folders are different models for different S/N spectra. (high: >6. mid:3-6. low:<3) Test CNN ============== - When you finish the installing and want to test the CNN model (training and prediction), you can firstly download all the model files here: + When you finish the installing and want to test the CNN model (training and prediction), you can firstly download all the model files here(same link as above): - https://drive.google.com/drive/folders/1Cl07CuRBE9ljtvIoTWexEVNSd8Zzwyvg?usp=sharing + https://drive.google.com/drive/folders/1Cl07CuRBE9ljtvIoTWexEVNSd8Zzwyvg?usp=sharing And then add the environmental variable CNN_MODEL as the path to the model files like this: From b82d8c3a64275a977ef6ada956ed78cb36e5c8a7 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Sun, 7 Nov 2021 20:52:18 +0800 Subject: [PATCH 11/24] Update training.py --- desidlas/training/training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/desidlas/training/training.py b/desidlas/training/training.py index 3923ac5..a06ee8e 100644 --- a/desidlas/training/training.py +++ b/desidlas/training/training.py @@ -140,7 +140,7 @@ def train_ann_test_batch(sess, ixs, data, summary_writer=None): -def train_ann(hyperparameters, train_dataset, test_dataset, save_filename=None, load_filename=None, tblogs = "../tmp/tblogs", TF_DEVICE='/gpu:1'): +def train_ann(hyperparameters, train_dataset, test_dataset, INPUT_SIZE,matrix_size,,save_filename=None, load_filename=None, tblogs = "../tmp/tblogs", TF_DEVICE='/gpu:1'): """ Perform training @@ -352,7 +352,7 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse) #start the training (best_accuracy, last_accuracy, last_objective, best_offset_rmse, last_offset_rmse, best_coldensity_rmse, - last_coldensity_rmse) = train_ann(hyperparameters, train_dataset, test_dataset, + last_coldensity_rmse) = train_ann(hyperparameters, train_dataset, test_dataset,INPUT_SIZE,matrix_size, save_filename=checkpoint_filename, load_filename=args['loadmodel']) From 67306329551bbf36bcfe96b8613298eb828c31b5 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Sun, 7 Nov 2021 21:03:27 +0800 Subject: [PATCH 12/24] Update training.py --- desidlas/training/training.py | 1 + 1 file changed, 1 insertion(+) diff --git a/desidlas/training/training.py b/desidlas/training/training.py index a06ee8e..84dec3b 100644 --- a/desidlas/training/training.py +++ b/desidlas/training/training.py @@ -11,6 +11,7 @@ import tensorflow as tf import os from pathlib import Path +from pkg_resources import resource_filename from tensorflow.python.framework import ops ops.reset_default_graph() From ab3638a01037206f64740aec57f8f6426a38beee Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 8 Nov 2021 14:35:48 +0800 Subject: [PATCH 13/24] Update training.py --- desidlas/training/training.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/desidlas/training/training.py b/desidlas/training/training.py index 84dec3b..3db3bf8 100644 --- a/desidlas/training/training.py +++ b/desidlas/training/training.py @@ -297,6 +297,8 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse) # Execute batch mode # from desidlas.data_model.Dataset import Dataset + from desidlas.training.parameterset import parameter_names + from desidlas.training.parameterset import parameters datafile_path = os.path.join(resource_filename('desidlas', 'tests'), 'datafile') traindata_path=os.path.join(datafile_path, 'sightlines-16-1375.npy') @@ -339,10 +341,6 @@ def calc_normalized_score(best_accuracy, best_offset_rmse, best_coldensity_rmse) os.remove(batch_results_file) if os.path.exists(batch_results_file) else None with open(batch_results_file, "a") as csvoutput: csvoutput.write("iteration_num,normalized_score,best_accuracy,last_accuracy,last_objective,best_offset_rmse,last_offset_rmse,best_coldensity_rmse,last_coldensity_rmse," + ",".join(parameter_names) + "\n") - - - from desidlas.training.parameterset import parameter_names - from desidlas.training.parameterset import parameters #hyperparameter search From 146649c00159c93127df698f529de0df45790385 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 8 Nov 2021 20:36:00 +0800 Subject: [PATCH 14/24] Update get_partprediction.py --- desidlas/prediction/get_partprediction.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py index 5df7b88..37b7c15 100644 --- a/desidlas/prediction/get_partprediction.py +++ b/desidlas/prediction/get_partprediction.py @@ -133,8 +133,14 @@ def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_fil hyperparameters[parameter_names[k]] = parameters[k][1] if modelfile == 'mid': checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999' + for k in range(0,len(parameter_names)): + hyperparameters[parameter_names[k]] = parameters[k][1] + if modelfile == 'low': + checkpoint_filename='desidlas/prediction/model/train_lowsnr/current_99999' for k in range(0,len(parameter_names)): hyperparameters[parameter_names[k]] = parameters[k][0] + INPUT_SIZE = 600 + matrix_size = 4 dataset={} From cdfe4d3ffd76704b1e1aaa22f93e3a77c3f528c7 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Thu, 11 Nov 2021 19:59:49 +0800 Subject: [PATCH 15/24] Update get_partprediction.py --- desidlas/prediction/get_partprediction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desidlas/prediction/get_partprediction.py b/desidlas/prediction/get_partprediction.py index 37b7c15..be3e3d2 100644 --- a/desidlas/prediction/get_partprediction.py +++ b/desidlas/prediction/get_partprediction.py @@ -134,7 +134,7 @@ def predictions_ann(hyperparameters, INPUT_SIZE,matrix_size,flux, checkpoint_fil if modelfile == 'mid': checkpoint_filename='desidlas/prediction/model/train_midsnr/current_99999' for k in range(0,len(parameter_names)): - hyperparameters[parameter_names[k]] = parameters[k][1] + hyperparameters[parameter_names[k]] = parameters[k][0] if modelfile == 'low': checkpoint_filename='desidlas/prediction/model/train_lowsnr/current_99999' for k in range(0,len(parameter_names)): From da9bd57f681bb1099442a46b49ef997f7d83a714 Mon Sep 17 00:00:00 2001 From: Jiaqi Zou Date: Fri, 12 Nov 2021 16:46:14 +0800 Subject: [PATCH 16/24] Update Sightline.py --- desidlas/data_model/Sightline.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/desidlas/data_model/Sightline.py b/desidlas/data_model/Sightline.py index 47813b0..fc69380 100644 --- a/desidlas/data_model/Sightline.py +++ b/desidlas/data_model/Sightline.py @@ -1,5 +1,6 @@ import numpy as np from desidlas.dla_cnn.spectra_utils import get_lam_data +from desidlas.datasets.datasetting import split_sightline_into_samples class Sightline(object): @@ -77,17 +78,16 @@ def is_lyb(self, peakix): """ assert self.prediction is not None and peakix in self.prediction.peaks_ixs - lam, lam_rest, ix_dla_range = get_lam_data(self.loglam, self.z_qso) - kernelrangepx = 200 - cut=((np.nonzero(ix_dla_range)[0])>=kernelrangepx)&((np.nonzero(ix_dla_range)[0])<=(len(lam)-kernelrangepx-1)) - lam_analyse=lam[ix_dla_range][cut] + data_split=split_sightline_into_samples(self) + lam_analyse=data_split[5] + lambda_higher = (lam_analyse[peakix]) / (1025.722/1215.67)#找这个peak对应的dla # An array of how close each peak is to beign the ly-b of peakix in spectrum reference frame peak_difference_spectrum = np.abs(lam_analyse[self.prediction.peaks_ixs] - lambda_higher) - nearest_peak_ix = np.argmin(peak_difference_spectrum)#找距离这个dla最近的peak + nearest_peak_ix = np.argmin(peak_difference_spectrum) - # get the column density of the identfied nearest peak算这两个的nhi + # get the column density of the identfied nearest peak _, potential_lya_nhi, _, _ = \ self.prediction.get_coldensity_for_peak(self.prediction.peaks_ixs[nearest_peak_ix]) _, potential_lyb_nhi, _, _ = \ @@ -95,10 +95,10 @@ def is_lyb(self, peakix): # Validations: check that the nearest peak is close enough to match # sanity check that the LyB is at least 0.3 less than the DLA - is_nearest_peak_within_range = peak_difference_spectrum[nearest_peak_ix] <= 15#两者距离小于15 - is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3#nhi差距0.3以上? + is_nearest_peak_within_range = peak_difference_spectrum[nearest_peak_ix] <= 15 + is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3 - return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity#true为lyb,false为lya + return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity#true lyb,false lya def get_lyb_index(self, peakix): From 7e7ca053f9de5164d7385c206fe9ec45a7d69811 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Mon, 22 Nov 2021 22:05:14 +0800 Subject: [PATCH 17/24] Update training_prediction.ipynb --- desidlas/notebook/training_prediction.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desidlas/notebook/training_prediction.ipynb b/desidlas/notebook/training_prediction.ipynb index ebe28fe..8dfb403 100644 --- a/desidlas/notebook/training_prediction.ipynb +++ b/desidlas/notebook/training_prediction.ipynb @@ -67,7 +67,7 @@ "source": [ "#the codes used for detect DLAs is in desidlas/prediction/get_partprediction.py\n", "#to get the prediction for every part(400 or 600 pixels) , all you need is to run:\n", - "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -m high\n", + "python get_partprediction.py -p 'pre_dataset.npy' -o 'partpre.npy' -model high\n", "\n", "\n", "# -p : path to the dataset used to detect DLAs\n", From adf75f43d7ff2cf80ebb92dbec3856a43c3b1790 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Tue, 14 Dec 2021 16:07:19 +0800 Subject: [PATCH 18/24] Create _init_.py --- desidlas/prediction/model/train_lowsnr/_init_.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 desidlas/prediction/model/train_lowsnr/_init_.py diff --git a/desidlas/prediction/model/train_lowsnr/_init_.py b/desidlas/prediction/model/train_lowsnr/_init_.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/desidlas/prediction/model/train_lowsnr/_init_.py @@ -0,0 +1 @@ + From 1b4ba17af26849cc4cba559c7992c00a869f44fe Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Tue, 14 Dec 2021 16:21:00 +0800 Subject: [PATCH 19/24] Create lowsnrmodel --- desidlas/prediction/model/train_lowsnr/lowsnrmodel | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 desidlas/prediction/model/train_lowsnr/lowsnrmodel diff --git a/desidlas/prediction/model/train_lowsnr/lowsnrmodel b/desidlas/prediction/model/train_lowsnr/lowsnrmodel new file mode 100644 index 0000000..bd745fe --- /dev/null +++ b/desidlas/prediction/model/train_lowsnr/lowsnrmodel @@ -0,0 +1,2 @@ +The model files are too large to upload to github, you can find the model files here : +https://drive.google.com/drive/folders/15iX-R0o2HmUeLGBKPHjT94xuqaI2tJHr?usp=sharing From 7c84b8cc1754b45e57300e9862654a476951e1ea Mon Sep 17 00:00:00 2001 From: Jiaqi Zou Date: Tue, 14 Dec 2021 21:20:08 +0800 Subject: [PATCH 20/24] Update datasetting.py --- desidlas/datasets/datasetting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/desidlas/datasets/datasetting.py b/desidlas/datasets/datasetting.py index 42cbe96..e2247b5 100644 --- a/desidlas/datasets/datasetting.py +++ b/desidlas/datasets/datasetting.py @@ -16,7 +16,7 @@ from desidlas.dla_cnn.spectra_utils import get_lam_data from desidlas.dla_cnn.defs import REST_RANGE,kernel,best_v -def pad_sightline(sightline, lam, lam_rest, ix_dla_range,kernelrangepx,v=best_v['b']): +def pad_sightline(sightline, lam, lam_rest, ix_dla_range,kernelrangepx,v=best_v['all']): """ padding the left and right sides of the spectra @@ -60,7 +60,7 @@ def pad_sightline(sightline, lam, lam_rest, ix_dla_range,kernelrangepx,v=best_v[ lam_padded = np.hstack((pad_lam_left,lam,pad_lam_right)) return flux_padded,lam_padded,pixel_num_left -def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel): +def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel,v=best_v['all']): """ Split the sightline into a series of snippets, each with length kernel @@ -78,7 +78,7 @@ def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel kernelrangepx = int(kernel/2) #200 #padding the sightline: - flux_padded,lam_padded,pixel_num_left=pad_sightline(sightline,lam,lam_rest,ix_dla_range,kernelrangepx,v=best_v['b']) + flux_padded,lam_padded,pixel_num_left=pad_sightline(sightline,lam,lam_rest,ix_dla_range,kernelrangepx,v=v) From 4f481db1c8aa3696279ad35fb05b999a57d1fda1 Mon Sep 17 00:00:00 2001 From: Jiaqi Zou Date: Tue, 14 Dec 2021 21:24:44 +0800 Subject: [PATCH 21/24] Update get_dataset.py add v --- desidlas/datasets/get_dataset.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/desidlas/datasets/get_dataset.py b/desidlas/datasets/get_dataset.py index a0bf580..f4e0824 100644 --- a/desidlas/datasets/get_dataset.py +++ b/desidlas/datasets/get_dataset.py @@ -7,8 +7,9 @@ REST_RANGE = defs.REST_RANGE kernel = defs.kernel smooth_kernel= defs.smooth_kernel +best_v = defs.best_v -def make_datasets(sightlines,validate=True,kernel=kernel, REST_RANGE=REST_RANGE, output=None): +def make_datasets(sightlines, kernel=kernel, REST_RANGE=REST_RANGE, v=best_v['all'],output=None, validate=True): """ Generate training set or validation set for DESI. @@ -28,7 +29,7 @@ def make_datasets(sightlines,validate=True,kernel=kernel, REST_RANGE=REST_RANGE, wavelength_dlas=[dla.central_wavelength for dla in sightline.dlas] coldensity_dlas=[dla.col_density for dla in sightline.dlas] label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE) - data_split=split_sightline_into_samples(sightline,REST_RANGE=REST_RANGE, kernel=kernel) + data_split=split_sightline_into_samples(sightline,REST_RANGE=REST_RANGE, kernel=kernel,v=v) if validate: flux=np.vstack([data_split[0]]) labels_classifier=np.hstack([data_split[1]]) @@ -38,7 +39,7 @@ def make_datasets(sightlines,validate=True,kernel=kernel, REST_RANGE=REST_RANGE, dataset[sightline.id]={'FLUX':flux,'lam':lam,'labels_classifier': labels_classifier, 'labels_offset':labels_offset , 'col_density': col_density,'wavelength_dlas':wavelength_dlas,'coldensity_dlas':coldensity_dlas} else: sample_masks=select_samples_50p_pos_neg(sightline, kernel=kernel) - if len(sample_masks) >0: + if sample_masks !=[]: flux=np.vstack([data_split[0][m] for m in sample_masks]) labels_classifier=np.hstack([data_split[1][m] for m in sample_masks]) labels_offset=np.hstack([data_split[2][m] for m in sample_masks]) @@ -69,7 +70,7 @@ def smooth_flux(flux): return flux_matrix #smooth flux for low S/N sightlines -def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANGE=REST_RANGE, output=None): +def make_smoothdatasets(sightlines,kernel=smooth_kernel, REST_RANGE=REST_RANGE, v=best_v['all'], output=None, validate=True): """ Generate smoothed training set or validation set for DESI. @@ -88,7 +89,7 @@ def make_smoothdatasets(sightlines,validate=True,kernel=smooth_kernel, REST_RANG wavelength_dlas=[dla.central_wavelength for dla in sightline.dlas] coldensity_dlas=[dla.col_density for dla in sightline.dlas] label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE) - data_split=split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel) + data_split=split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel,v=v) if validate: flux=np.vstack([data_split[0]]) labels_classifier=np.hstack([data_split[1]]) From 7be6ef3cd48b9d51367fcf6b59a403c87e9d1431 Mon Sep 17 00:00:00 2001 From: Jiaqi Zou Date: Tue, 14 Dec 2021 21:32:11 +0800 Subject: [PATCH 22/24] Update preprocess.py --- desidlas/datasets/preprocess.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/desidlas/datasets/preprocess.py b/desidlas/datasets/preprocess.py index d8fe688..d18e2f6 100644 --- a/desidlas/datasets/preprocess.py +++ b/desidlas/datasets/preprocess.py @@ -112,7 +112,6 @@ def rebin(sightline, v): ------- :class:`dla_cnn.data_model.Sightline.Sightline`: """ - # TODO -- Add inline comments c = 2.9979246e8 # Set a constant dispersion @@ -209,8 +208,9 @@ def normalize(sightline, full_wavelength, full_flux): assert blue_limit <= red_limit,"No Lymann-alpha forest, Please check this spectra: %i"%sightline.id#when no lymann alpha forest exists, assert error. #use the slice we chose above to normalize this spectra, normalize both flux and error array using the same factor to maintain the s/n. good_pix = (rest_wavelength>=blue_limit)&(rest_wavelength<=red_limit) - sightline.flux = sightline.flux/np.median(full_flux[good_pix]) - sightline.error = sightline.error/np.median(full_flux[good_pix]) + normalizer=np.abs(np.nanmedian(full_flux[good_pix])) + sightline.flux = sightline.flux/normalizer + sightline.error = sightline.error/normalizer def estimate_s2n(sightline): """ @@ -237,9 +237,9 @@ def estimate_s2n(sightline): #for dla in sightline.dlas: #test = test&((wavelength>dla.central_wavelength+delta)|(wavelength0, "this sightline doesn't contain lymann forest, sightline id: %i"%sightline.id - s2n = sightline.flux/sightline.error + s2n = np.abs(sightline.flux/sightline.error) #return s/n - return np.median(s2n[test]) + return np.nanmedian(s2n[test]) def generate_summary_table(sightlines, output_dir, mode = "w"): """ From 16fc65fb0d8a6fc77cc923de2c905ab782be3189 Mon Sep 17 00:00:00 2001 From: benwang <56830442+samwang141224@users.noreply.github.com> Date: Fri, 17 Dec 2021 23:48:17 +0800 Subject: [PATCH 23/24] Update training.py --- desidlas/training/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desidlas/training/training.py b/desidlas/training/training.py index 3db3bf8..e62cbd7 100644 --- a/desidlas/training/training.py +++ b/desidlas/training/training.py @@ -141,7 +141,7 @@ def train_ann_test_batch(sess, ixs, data, summary_writer=None): -def train_ann(hyperparameters, train_dataset, test_dataset, INPUT_SIZE,matrix_size,,save_filename=None, load_filename=None, tblogs = "../tmp/tblogs", TF_DEVICE='/gpu:1'): +def train_ann(hyperparameters, train_dataset, test_dataset, INPUT_SIZE,matrix_size,save_filename=None,load_filename=None,tblogs = "../tmp/tblogs",TF_DEVICE='/gpu:1'): """ Perform training From 3988f25cb83f3fa57f02ce1cfa7e1a0fb9aa6d19 Mon Sep 17 00:00:00 2001 From: Jiaqi Zou Date: Tue, 21 Dec 2021 19:03:55 +0800 Subject: [PATCH 24/24] Update get_sightlines.py --- desidlas/datasets/get_sightlines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desidlas/datasets/get_sightlines.py b/desidlas/datasets/get_sightlines.py index 5c09dc6..64bd794 100644 --- a/desidlas/datasets/get_sightlines.py +++ b/desidlas/datasets/get_sightlines.py @@ -35,7 +35,7 @@ def get_sightlines(spectra,truth,zbest,outpath): sightline.flux = sightline.flux[0:sightline.split_point_br] sightline.error = sightline.error[0:sightline.split_point_br] sightline.loglam = sightline.loglam[0:sightline.split_point_br] - rebin(sightline, best_v['b']) + rebin(sightline, best_v['all']) sightlines.append(sightline) np.save(outpath,sightlines)