Merge branch 'main' of github.com:LCOGT/banzai into actions-e2e

LCOGT · Jul 24, 2024 · 433ecbf · 433ecbf
2 parents ee8dae2 + 8c20311
commit 433ecbf
Show file tree

Hide file tree

Showing 15 changed files with 93 additions and 38 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,25 @@
+1.17.0 (2023-04-24)
+-------------------
+- We now omit sources in the photometry stage that have an area larger than 1000 pixels as they lead to long 
+  processing times and are almost invariably spurious.
+
+1.16.1 (2023-04-23)
+-------------------
+- Correction to aperture photometry. We were incorrectly using the radius instead of the diameter
+
+1.16.0 (2023-04-18)
+-------------------
+- Calibration frames are now associated with output data products rather than frames
+  so that we have more than one calibration data product produced per frame.
+
+1.15.2 (2023-04-12)
+-------------------
+- Fix to fpacking data when the image data array is None
+
+1.15.1 (2024-02-29)
+-------------------
+- Minor fixes in photometry when there are bad pixels near the image edges
+
 1.15.0 (2024-02-14)
 -------------------
 - Migrated photometry extraction to be done by astropy's photutils instead of SEP.

diff --git a/banzai/data.py b/banzai/data.py
@@ -85,8 +85,8 @@ def to_fits(self, context) -> Union[fits.HDUList, list]:
 
 
 class HeaderOnly(Data):
-    def __init__(self, meta: Union[dict, fits.Header]):
-        super().__init__(data=np.zeros(0), meta=meta, memmap=False)
+    def __init__(self, meta: Union[dict, fits.Header], name):
+        super().__init__(data=np.zeros(0), meta=meta, memmap=False, name=name)
 
     def to_fits(self, context):
         return fits.HDUList([fits.ImageHDU(data=None, header=self.meta)])

diff --git a/banzai/lco.py b/banzai/lco.py
@@ -141,7 +141,7 @@ def write(self, runtime_context):
             if runtime_context.post_to_archive:
                 archived_image_info = file_utils.post_to_ingester(data_product.file_buffer, self,
                                                                   data_product.filename, meta=data_product.meta)
-                self.frame_id = archived_image_info.get('frameid')
+                data_product.frame_id = archived_image_info.get('frameid')
 
             if not runtime_context.no_file_cache:
                 os.makedirs(self.get_output_directory(runtime_context), exist_ok=True)
@@ -170,7 +170,7 @@ def to_db_record(self, output_product):
                              'instrument_id': self.instrument.id,
                              'is_master': self.is_master,
                              'is_bad': self.is_bad,
-                             'frameid': self.frame_id,
+                             'frameid': output_product.frame_id,
                              'attributes': {}}
         for attribute in self.grouping_criteria:
             record_attributes['attributes'][attribute] = str(getattr(self, attribute))
@@ -412,7 +412,7 @@ def open(self, file_info, runtime_context) -> Optional[ObservationFrame]:
                     for hdu in fits_hdu_list if hdu.data is not None):
             for hdu in fits_hdu_list:
                 if hdu.data is None or hdu.data.size == 0:
-                    hdu_list.append(HeaderOnly(meta=hdu.header))
+                    hdu_list.append(HeaderOnly(meta=hdu.header, name=hdu.header.get('EXTNAME')))
                 else:
                     hdu_list.append(self.data_class(data=hdu.data, meta=hdu.header, name=hdu.header.get('EXTNAME')))
         else:
@@ -424,7 +424,7 @@ def open(self, file_info, runtime_context) -> Optional[ObservationFrame]:
                     continue
                 # Otherwise parse the fits file into a frame object and the corresponding data objects
                 if hdu.data is None or hdu.data.size == 0:
-                    hdu_list.append(HeaderOnly(meta=hdu.header))
+                    hdu_list.append(HeaderOnly(meta=hdu.header, name=hdu.header.get('EXTNAME')))
                     primary_hdu = hdu
                 elif isinstance(hdu, fits.BinTableHDU):
                     hdu_list.append(DataTable(data=Table(hdu.data), meta=hdu.header, name=hdu.header.get('EXTNAME')))
@@ -601,7 +601,7 @@ def _munge_data_cube(hdu):
         :return: List CCDData objects
         """
         # The first extension gets to be a header only object
-        hdu_list = [HeaderOnly(meta=hdu.header)]
+        hdu_list = [HeaderOnly(meta=hdu.header, name=hdu.header.get('EXTNAME'))]
 
         # We need to properly set the datasec and detsec keywords in case we didn't read out the
         # middle row (the "Missing Row Problem").

diff --git a/banzai/photometry.py b/banzai/photometry.py
@@ -51,10 +51,10 @@ def flag_edge_sources(image, sources, flag_value=8):
     minor_xmax = sources['x'] + sources['b'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
     minor_ymin = sources['y'] - sources['b'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
     minor_ymax = sources['y'] + sources['b'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
-    major_ymin = sources['y'] - sources['a'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
-    major_ymax = sources['y'] + sources['a'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
-    major_xmin = sources['x'] - sources['a'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
-    major_xmax = sources['x'] + sources['a'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
+    major_ymin = sources['y'] - sources['a'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
+    major_ymax = sources['y'] + sources['a'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
+    major_xmin = sources['x'] - sources['a'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
+    major_xmax = sources['x'] + sources['a'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
 
     # Note we are already 1 indexed here
     sources_off = np.logical_or(minor_xmin < 1, major_xmin < 1)
@@ -64,7 +64,7 @@ def flag_edge_sources(image, sources, flag_value=8):
     sources_off = np.logical_or(sources_off, major_xmax > nx)
     sources_off = np.logical_or(sources_off, minor_ymax > ny)
     sources_off = np.logical_or(sources_off, major_ymax > ny)
-    sources[sources_off]['flag'] |= flag_value
+    sources['flag'][sources_off] |= flag_value
 
 
 class SourceDetector(Stage):
@@ -102,6 +102,10 @@ def do_stage(self, image):
             # Do an initial source detection
             segmentation_map = detect_sources(convolved_data, self.threshold, npixels=self.min_area)
 
+            # We now remove any sources with an area > 1000 pixels because they are almost invariably spurious
+            segmentation_map.remove_labels(segmentation_map.labels[segmentation_map.areas > 1000])
+            segmentation_map.relabel_consecutive(1)
+
             logger.info('Deblending sources', image=image)
             # Note that nlevels here is DEBLEND_NTHRESH in source extractor which is 32 by default
             deblended_seg_map = deblend_sources(convolved_data, segmentation_map,
@@ -125,9 +129,9 @@ def do_stage(self, image):
                              'xy': catalog.covar_sigxy.value,
                              'background': catalog.background_mean})
 
-            for r in range(1, 7):
-                radius_arcsec = r / image.pixel_scale
-                sources[f'fluxaper{r}'], sources[f'fluxerr{r}'] = catalog.circular_photometry(radius_arcsec)
+            for d in range(1, 7):
+                radius_arcsec = d / image.pixel_scale / 2.0
+                sources[f'fluxaper{d}'], sources[f'fluxerr{d}'] = catalog.circular_photometry(radius_arcsec)
 
             for r in [0.25, 0.5, 0.75]:
                 sources['fluxrad' + f'{r:.2f}'.lstrip("0.")] = catalog.fluxfrac_radius(r)
@@ -145,10 +149,20 @@ def do_stage(self, image):
             # Flag = 16 if source has cosmic ray pixels
             flag_sources(sources, catalog.labels, deblended_seg_map, image.mask, flag=16, mask_value=8)
 
-            sources = array_utils.prune_nans_from_table(sources)
+            rows_with_nans = array_utils.find_nans_in_table(sources)
+            catalog = catalog[~rows_with_nans]
+            sources = sources[~rows_with_nans]
 
             # Cut individual bright pixels. Often cosmic rays
-            sources = sources[sources['fluxrad50'] > 0.5]
+            not_individual_bright_pixels = sources['fluxrad50'] > 0.5
+            catalog = catalog[not_individual_bright_pixels]
+            sources = sources[not_individual_bright_pixels]
+
+            # Cut sources that are less than 2 pixels wide
+            thin_sources = np.logical_or((catalog.bbox_ymax - catalog.bbox_ymin) < 1.5,
+                                         (catalog.bbox_xmax - catalog.bbox_xmin) < 1.5)
+            catalog = catalog[~thin_sources]
+            sources = sources[~thin_sources]
 
             # Calculate the FWHMs of the stars:
             sources['fwhm'] = np.nan

diff --git a/banzai/tests/test_array_utils.py b/banzai/tests/test_array_utils.py
@@ -6,6 +6,7 @@
 
 pytestmark = pytest.mark.array_utils
 
+
 def test_pruning_nans():
     a = np.arange(100, dtype=float)
     b = np.arange(100, dtype=float) + 100
@@ -16,7 +17,7 @@ def test_pruning_nans():
     c[78] = np.nan
 
     t = Table([a, b, c], names=('a', 'b', 'c'))
-    t = array_utils.prune_nans_from_table(t)
+    t = t[~array_utils.find_nans_in_table(t)]
     assert len(t) == 97
     assert 51 not in t['a']
     assert 32 not in t['a']

diff --git a/banzai/tests/test_frames.py b/banzai/tests/test_frames.py
@@ -63,9 +63,8 @@ def test_frame_to_db_record():
                                   'CONFMODE': 'full_frame'}, name='SCI')]
     test_frame = LCOCalibrationFrame(hdu_list=hdu_list, file_path='/foo/bar')
     test_frame.is_bad = False
-    test_frame.frame_id = 1234
     test_frame.instrument = MagicMock(id=7)
-    mock_data_product = MagicMock(filename='test.fits.fz', filepath='/path/to/test/test.fits.fz')
+    mock_data_product = MagicMock(filename='test.fits.fz', filepath='/path/to/test/test.fits.fz', frame_id=1234)
     db_record = test_frame.to_db_record(mock_data_product)
 
     assert type(db_record) == CalibrationImage

diff --git a/banzai/tests/test_munge.py b/banzai/tests/test_munge.py
@@ -23,7 +23,7 @@ def test_when_has_partial_coefficients():
     for i in range(2):
         for j in range(2):
             header['CRSTLK{i}{j}'.format(i=i+1, j=j+1)] = 1.0
-    hdu_list = [HeaderOnly(meta=header)] + [FakeCCDData() for i in range(4)]
+    hdu_list = [HeaderOnly(meta=header, name='')] + [FakeCCDData() for i in range(4)]
     with pytest.raises(MissingCrosstalkCoefficients):
         LCOFrameFactory._init_crosstalk(FakeLCOObservationFrame(hdu_list=hdu_list))
 
@@ -34,7 +34,7 @@ def test_when_has_coefficients():
     for i in range(4):
         for j in range(4):
             header['CRSTLK{i}{j}'.format(i=i+1, j=j+1)] = 1.0
-    hdu_list = [HeaderOnly(meta=header)] + [FakeCCDData() for i in range(4)]
+    hdu_list = [HeaderOnly(meta=header, name='')] + [FakeCCDData() for i in range(4)]
     LCOFrameFactory._init_crosstalk(FakeLCOObservationFrame(hdu_list=hdu_list))
 
 
@@ -44,7 +44,7 @@ def test_defaults_do_not_override_header():
         for j in range(4):
             header['CRSTLK{i}{j}'.format(i=i+1, j=j+1)] = 1.0
 
-    hdu_list = [HeaderOnly(meta=header)] + [FakeCCDData() for i in range(4)]
+    hdu_list = [HeaderOnly(meta=header, name='')] + [FakeCCDData() for i in range(4)]
     fake_image = FakeLCOObservationFrame(hdu_list=hdu_list)
     LCOFrameFactory._init_crosstalk(fake_image)
 

diff --git a/banzai/tests/utils.py b/banzai/tests/utils.py
@@ -102,8 +102,8 @@ def handles_inhomogeneous_set(stagetype, context, keyword, value, calibration_ma
     stage = stagetype(context)
     kwargs = {keyword: value}
     if calibration_maker:
-        images = [LCOCalibrationFrame(hdu_list=[HeaderOnly(meta=kwargs)])]
-        images += [LCOCalibrationFrame(hdu_list=[HeaderOnly()]) for x in range(6)]
+        images = [LCOCalibrationFrame(hdu_list=[HeaderOnly(meta=kwargs, name='')])]
+        images += [LCOCalibrationFrame(hdu_list=[HeaderOnly(meta={}, name=''),]) for x in range(6)]
         images = stage.do_stage(images)
         assert len(images) == 0
     else:

diff --git a/banzai/utils/array_utils.py b/banzai/utils/array_utils.py
@@ -6,8 +6,8 @@ def array_indices_to_slices(a):
     return tuple(slice(0, x, 1) for x in a.shape)
 
 
-def prune_nans_from_table(table):
+def find_nans_in_table(table):
     nan_in_row = np.zeros(len(table), dtype=bool)
     for col in table.colnames:
         nan_in_row |= np.isnan(table[col])
-    return table[~nan_in_row]
+    return nan_in_row
diff --git a/banzai/utils/fits_utils.py b/banzai/utils/fits_utils.py
@@ -199,12 +199,15 @@ def unpack(compressed_hdulist: fits.HDUList) -> fits.HDUList:
         starting_extension = 1
     for hdu in compressed_hdulist[starting_extension:]:
         if isinstance(hdu, fits.CompImageHDU):
-            data_type = str(hdu.data.dtype)
-            if 'int' == data_type[:3]:
-                data_type = getattr(np, 'u' + data_type)
-                data = np.array(hdu.data, data_type)
+            if hdu.data is None:
+                data = hdu.data
             else:
-                data = np.array(hdu.data, hdu.data.dtype)
+                data_type = str(hdu.data.dtype)
+                if 'int' == data_type[:3]:
+                    data_type = getattr(np, 'u' + data_type)
+                    data = np.array(hdu.data, data_type)
+                else:
+                    data = np.array(hdu.data, hdu.data.dtype)
             hdulist.append(fits.ImageHDU(data=data, header=hdu.header))
         elif isinstance(hdu, fits.BinTableHDU):
             hdulist.append(fits.BinTableHDU(data=hdu.data, header=hdu.header))
@@ -223,7 +226,11 @@ def pack(uncompressed_hdulist: fits.HDUList, lossless_extensions: Iterable) -> f
             quantize_level = 1e9
         else:
             quantize_level = 64
-        compressed_hdu = fits.CompImageHDU(data=np.ascontiguousarray(uncompressed_hdulist[0].data),
+        if uncompressed_hdulist[0].data is None:
+            data = None
+        else:
+            data = np.ascontiguousarray(uncompressed_hdulist[0].data)
+        compressed_hdu = fits.CompImageHDU(data=data,
                                            header=uncompressed_hdulist[0].header, quantize_level=quantize_level,
                                            quantize_method=1)
         hdulist = [primary_hdu, compressed_hdu]
@@ -234,7 +241,11 @@ def pack(uncompressed_hdulist: fits.HDUList, lossless_extensions: Iterable) -> f
                 quantize_level = 1e9
             else:
                 quantize_level = 64
-            compressed_hdu = fits.CompImageHDU(data=np.ascontiguousarray(hdu.data), header=hdu.header,
+            if hdu.data is None:
+                data = None
+            else:
+                data = np.ascontiguousarray(hdu.data)
+            compressed_hdu = fits.CompImageHDU(data=data, header=hdu.header,
                                                quantize_level=quantize_level, quantize_method=1)
             hdulist.append(compressed_hdu)
         else:

diff --git a/banzai/utils/realtime_utils.py b/banzai/utils/realtime_utils.py
@@ -86,7 +86,7 @@ def need_to_process_image(file_info, context):
     if 'frameid' in file_info:
         try:
             factory = import_utils.import_attribute(context.FRAME_FACTORY)()
-            test_image = factory.observation_frame_class(hdu_list=[HeaderOnly(file_info)],
+            test_image = factory.observation_frame_class(hdu_list=[HeaderOnly(file_info, name='')],
                                                          file_path=file_info['filename'])
             test_image.instrument = factory.get_instrument_from_header(file_info, db_address=context.db_address)
             if image_utils.get_reduction_level(test_image.meta) != '00':

diff --git a/helm-chart/banzai/templates/listener.yaml b/helm-chart/banzai/templates/listener.yaml
@@ -10,11 +10,13 @@ spec:
     matchLabels:
       app.kubernetes.io/name: {{ include "banzai.name" . }}
       app.kubernetes.io/instance: {{ .Release.Name }}
+      app.kubernetes.io/component: listener
   template:
     metadata:
       labels:
         app.kubernetes.io/name: {{ include "banzai.name" . }}
         app.kubernetes.io/instance: "{{ .Release.Name }}"
+        app.kubernetes.io/component: listener
     spec:
     {{- with .Values.imagePullSecrets }}
       imagePullSecrets:

diff --git a/helm-chart/banzai/templates/workers-large.yaml b/helm-chart/banzai/templates/workers-large.yaml
@@ -9,11 +9,13 @@ spec:
     matchLabels:
       app.kubernetes.io/name: {{ include "banzai.name" . }}
       app.kubernetes.io/instance: {{ .Release.Name }}
+      app.kubernetes.io/component: large-worker
   template:
     metadata:
       labels:
         app.kubernetes.io/name: {{ include "banzai.name" . }}
         app.kubernetes.io/instance: "{{ .Release.Name }}"
+        app.kubernetes.io/component: large-worker
     spec:
     {{- with .Values.imagePullSecrets }}
       imagePullSecrets:
@@ -47,6 +49,7 @@ spec:
             requests:
               cpu: "0.5"
               memory: "10Gi"
+              ephemeral-storage: "128Mi"
             limits:
               cpu: "2"
               memory: "10Gi"

diff --git a/helm-chart/banzai/templates/workers.yaml b/helm-chart/banzai/templates/workers.yaml
@@ -9,11 +9,13 @@ spec:
     matchLabels:
       app.kubernetes.io/name: {{ include "banzai.name" . }}
       app.kubernetes.io/instance: {{ .Release.Name }}
+      app.kubernetes.io/component: worker
   template:
     metadata:
       labels:
         app.kubernetes.io/name: {{ include "banzai.name" . }}
         app.kubernetes.io/instance: "{{ .Release.Name }}"
+        app.kubernetes.io/component: worker
     spec:
     {{- with .Values.imagePullSecrets }}
       imagePullSecrets:
@@ -47,9 +49,10 @@ spec:
             requests:
               cpu: "0.5"
               memory: "4Gi"
+              ephemeral-storage: "128Mi"
             limits:
               cpu: "2"
-              memory: "4Gi"
+              memory: "8Gi"
           volumeMounts:
             - name: tmp
               mountPath: /tmp

diff --git a/helm-chart/banzai/values-prod.yaml b/helm-chart/banzai/values-prod.yaml
@@ -37,7 +37,7 @@ banzai:
   calibrateProposalId: calibrate
   banzaiWorkerLogLevel: info
   rawDataApiRoot: http://archiveapi-internal.prod/
-  fitsBroker: rabbitmq-ha.prod.svc.cluster.local.
+  fitsBroker: "amqp://science-archive:science-archive@rabbitmq.prod-rmq-shared.svc:5672/science-archive"
   fitsExchange: archived_fits
   queueName: banzai_pipeline
   celeryTaskQueueName: banzai_imaging
@@ -58,7 +58,7 @@ postgresql:
   postgresqlDatabase: banzai
 
 rabbitmq:
-  hostname: rabbitmq-ha.prod.svc.cluster.local.
+  hostname: rabbitmq.prod-rmq-shared.svc
   rabbitmq:
     username: banzai
   vhost: banzai