-
Notifications
You must be signed in to change notification settings - Fork 0
/
spectra_processing.py
293 lines (240 loc) · 9.11 KB
/
spectra_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
#! /usr/bin/env python3
'''
Written by Oleksandra Rebrysh.
'''
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import glob
from astropy.modeling.fitting import LevMarLSQFitter, LinearLSQFitter
from astropy.modeling import models
from astropy import units as u
import lineid_plot
def LoadData(filename):
df = pd.read_csv(filename, skiprows = 14, sep = '\t', header = None)
#print(df)
df.columns = ['wavelength', 'intensity']
# Remove commas and convert to numeric values for each relevant column
df['wavelength'] = pd.to_numeric(df['wavelength'].str.replace(',', '.'))
df['intensity'] = pd.to_numeric(df['intensity'].str.replace(',', '.'), downcast='float')
return df
def LoadAllSpectra(path, common_part):
# Use glob to find all files matching the pattern
pattern = path + common_part + '*.txt'
files = glob.glob(pattern)
# Load each file into a DataFrame and store in a list
spectra = []
for filename in files:
df = LoadData(filename)
spectra.append(df)
return spectra
def AverageSpectra(spectra):
'''
Please have your spectra in the following form:
spectra = [spectrum1,
spectrum2]
'''
combined_spectra = pd.concat(spectra)
average_spectra = combined_spectra.groupby('wavelength', as_index=False)['intensity'].mean()
return average_spectra
def ProcessDataEach(rawdata, background, dark, subtract_background = False, subtract_dark = False):
#required_columns = ['wavelength', 'intensity']
## Check if input data frames are valid
#for df in [background, dark, *rawdata]:
#if not isinstance(df, pd.DataFrame) or set(df.columns) != set(required_columns):
#raise ValueError("Invalid input data frame format or columns.")
## Check if input data frames have the same length
#background_length = len(background)
#dark_length = len(dark)
#if any(len(data) != background_length or len(data) != dark_length for data in rawdata):
#raise ValueError("Input data frames must have the same length.")
processed_data = []
# Subtract background and dark from each raw data spectrum
for data in rawdata:
processed_df = data.copy()
if subtract_background:
processed_df['intensity'] -= background['intensity'].values
if subtract_dark:
processed_df['intensity'] -= dark['intensity'].values
#processed_df['intensity'] = processed_df['intensity'].clip(lower=0) # Ensure non-negative intensities
processed_data.append(processed_df)
return processed_data
def ProcessData(rawdata, background, dark):
# Ensure the dataframes have the expected columns
required_columns = ['wavelength', 'intensity']
for df in [rawdata, background, dark]:
if df.shape[1] != 2:
print(f'ERROR EXCEPTION: Data should have exactly two columns')
return None
df.columns = required_columns
# Ensure the lengths of the dataframes match
if not (rawdata.shape[0] == background.shape[0] == dark.shape[0]):
print(f'ERROR EXCEPTION: Data should have the same length')
return None
# Process the data
processed_data = rawdata.copy()
processed_data['intensity'] = rawdata['intensity'] - background['intensity'] - dark['intensity']
return processed_data
def AddSpectra(spectra):
'''
Please have your spectra in the following form:
spectra = [spectrum1,
spectrum2]
'''
combined_spectra = pd.concat(spectra)
summed_spectra = combined_spectra.groupby('wavelength', as_index=False)['intensity'].sum()
#print(summed_spectra)
return summed_spectra
def PlotData(ax, data, name, labelname):
#print(data)
wavelength = data.loc[:, 'wavelength']
intensity = data.loc[:, 'intensity']
ax.plot(wavelength, intensity, label = labelname)
ax.minorticks_on()
ax.xaxis.set_minor_locator(plt.MultipleLocator(10))
ax.grid(True, 'both')
ax.set_xlabel(f'Wavelength, nm')
ax.set_ylabel(f'Intensity, counts')
ax.set_title(name)
ax.legend()
return
def DefSpectrumForLines(spectr, minwavelength, maxwavelength):
wavelength = sum_spectra.loc[:, 'wavelength'].values
intensity = sum_spectra.loc[:, 'intensity'].values
mask = (wavelength >= minwavelength) & (wavelength <= maxwavelength)
wavelength_truncated = wavelength[mask]
intensity_truncated = intensity[mask]
# spectrum = Spectrum1D(flux=intensity_truncated*u.adu, spectral_axis=wavelength_truncated*u.nm)
return wavelength_truncated, intensity_truncated
def FitContinuum(degree, wavelength, intensity, image = False):
chebyshev_model = models.Chebyshev1D(degree=degree)
fitter = LinearLSQFitter()
g1_fit = fitter(chebyshev_model, wavelength, intensity)
spec_norm = (intensity - g1_fit(wavelength))/np.linalg.norm(intensity - g1_fit(wavelength))
if image == True:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
ax[0].plot(wavelength, intensity, label='Original Spectrum')
ax[0].plot(wavelength, g1_fit(wavelength), label='Fitted Continuum')
ax[0].set_title('Continuum Fitting')
ax[0].legend()
ax[0].grid(True)
# ax[1].plot(wavelength_truncated, spec_norm.flux)
ax[1].plot(wavelength, spec_norm)
ax[1].set_title('Continuum Normalized Spectrum')
ax[1].grid(True)
return spec_norm
def DetectAndPlotLines(spectra, wavelength, spectral_lines, telluric_lines, threshold, title = None, save = False, path = None, name = None):
detected_lines = {}
telluric_lines_d = {}
# Step 4: Check for each line if its depth/strength exceeds the threshold
for line_name, line_wavelength in spectral_lines.items():
index = np.argmin(np.abs(wavelength_truncated - line_wavelength))
flux = spectra[index]
depth = - flux
if depth >= threshold:
if flux < spectra[index - 10] and flux < spectra[index + 10]:
detected_lines[line_name] = line_wavelength
for line_wavelength in telluric_lines["TL"]:
index = np.argmin(np.abs(wavelength_truncated - line_wavelength))
flux = spectra[index]
depth = - flux
if depth >= 0.02:
if flux < spectra[index - 1] and flux < spectra[index + 1]:
telluric_lines_d[line_wavelength] = line_wavelength
fig, ax = lineid_plot.plot_line_ids(
wavelength_truncated,
spectra,
list(detected_lines.values()) + list(telluric_lines_d.values()), # Flux values of detected lines
list(detected_lines.keys()) + ["TL"]*len(telluric_lines_d), # Names of detected lines
max_iter = 100)
fig.set_size_inches(12, 6)
num_detected_lines = len(detected_lines) # Count of detected lines
num_telluric_lines = len(telluric_lines_d) # Count of telluric lines
for index in range(num_detected_lines+1, num_detected_lines + num_telluric_lines+1):
line = ax.lines[index]
line.set_color("red")
line.set_linestyle("--")
for text in ax.texts:
if text.get_text() == "TL": # Target labels with "TL"
text.set_visible(False)
if text.get_text() != "TL": # Ignore telluric lines
text.set_y(text.get_position()[1] + 0.005)
ax.set_xlabel('Wavelength, nm')
ax.set_ylabel('Normalised flux')
tl = mpl.lines.Line2D([], [], color='r', linestyle = '--')
ax.legend([tl], ['Telluric line'])
ax.set_title(title, y = 1.2, fontweight="bold")
if save:
plt.savefig(path+name, dpi = 600)
return
#if __name__ == '__main__':
#path = ''
#savefold = ''
#spectral_lines = {
#"Hα": 656.3,
#"Hβ": 486.1,
#"Hγ": 434.0,
#"Hδ": 410.2,
#"He II": 420.0,
#"He II": 454.1,
#"He I": 447.1,
#"He I": 402.6,
#"He I": 667.8,
#"Fe I": 495.8,
#"Fe I": 466.8,
#"Fe I": 438.4,
#"Ca I": 420.8,
#"Fe I": 527.0,
#"Fe II": 516.90,
#"Mg I": 518.0,
#"Na I D1": 589.00,
#"Na I D2": 589.60,
#"Ca II H": 396.85,
#"Ca II K": 393.37,
#"Ca II IR 1": 849.80,
#"Ca II IR 2": 854.20,
#"Ca II IR 3": 866.20,
#"[O I] 1": 630.0,
#"[O I] 2": 636.4,
#"C II": 426.7,
#"Si II": 412.8,
#"Si II": 634.7,
#"Si II": 637.1,
#"Mg II": 448.1,
#"O I": 898.8,
#"O I": 822.7,
#"O I": 759.4,
#"O I": 686.7,
#"O I": 627.7,
#"O I": 777.1,
#"O I": 777.4,
#"O I": 777.5,
#"He I": 587.6,
#"Ti II": 336.1,
#"Ni I": 299.4,
#"TiO": 476.1,
#"TiO": 495.4,
#"TiO": 516.7
#}
#telluric_lines = {"TL":
#[687.8,
#718.5,
#719.4,
#725.0,
#759.3, # O2 (760.0 nm region)
#760.5,
#761.0, # O2 (760.5 nm region)
#762.0, # O2 (762.0 nm region)
#764.0, # H2O (763.0-764.0 nm region)
#820.5, # H2O (820.0-821.0 nm region)
#822.2, # H2O (822.0 nm region)
#935.0, # H2O (934.0-935.0 nm region)
#940.0, # H2O (940.0 nm region)
#942.0, # H2O (942.0 nm region)
#940.5, # H2O (940.5 nm region)
#943.0, # H2O (943.0 nm region)
#946.0, # H2O (946.0 nm region)
#953.0, # H2O (953.0 nm region)
#960.0, # H2O (960.0 nm region)
#]}