Merge pull request #17 from LCOGT/feature/source-catalog-data

source-catalog analysis function, improved scale points util
LCOGT · Jun 11, 2024 · e7171af · e7171af
2 parents f20189c + c354f18
commit e7171af
Show file tree

Hide file tree

Showing 6 changed files with 86 additions and 35 deletions.
diff --git a/.gitignore b/.gitignore
@@ -161,3 +161,7 @@ cython_debug/
 
 # vscode
 .vscode
+
+# temp files
+tmp/
+tmp/**/*
diff --git a/datalab/datalab_session/analysis/line_profile.py b/datalab/datalab_session/analysis/line_profile.py
@@ -1,20 +1,17 @@
 from skimage.measure import profile_line
 
-from datalab.datalab_session.util import scale_flip_points
+from datalab.datalab_session.util import scale_points
+from datalab.datalab_session.util import get_hdu
 
 # For creating an array of brightness along a user drawn line
-def line_profile(input: dict, sci_hdu: object):
+def line_profile(input: dict):
   """
     Creates an array of luminosity values and the length of the line in arcseconds
   """
-  points = scale_flip_points(input['width'], input['height'], sci_hdu.data, [(input["x1"], input["y1"]), (input["x2"], input["y2"])])
-  line_profile = profile_line(sci_hdu.data, points[0], points[1], mode="constant", cval=-1)
+  sci_hdu = get_hdu(input['basename'], 'SCI')
+
+  x_points, y_points = scale_points(input["height"], input["width"], sci_hdu.data.shape[0], sci_hdu.data.shape[1], x_points=[input["x1"], input["x2"]], y_points=[input["y1"], input["y2"]])
+  line_profile = profile_line(sci_hdu.data, (x_points[0], y_points[0]), (x_points[1], y_points[1]), mode="constant", cval=-1)
   arcsec = len(line_profile) * sci_hdu.header["PIXSCALE"]
 
   return {"line_profile": line_profile, "arcsec": arcsec}
-
-def debug_point_on_sci_data(x, y, sci_hdu: object):
-  """
-    Debugging function to check a point (x,y) on the sci data has the same value as the point cross checked in DS9
-  """
-  print(f"data: {sci_hdu.data[y, x]}")
diff --git a/datalab/datalab_session/analysis/source_catalog.py b/datalab/datalab_session/analysis/source_catalog.py
@@ -0,0 +1,39 @@
+import numpy as np
+
+from datalab.datalab_session.util import get_hdu, scale_points
+
+
+def source_catalog(input: dict):
+  """
+    Returns a dict representing the source catalog data with x,y coordinates and flux values
+  """
+  cat_hdu = get_hdu(input['basename'], 'CAT')
+  sci_hdu = get_hdu(input['basename'], 'SCI')
+
+  # The number of sources to send back to the frontend, default 50
+  SOURCE_CATALOG_COUNT = min(50, len(cat_hdu.data["x"]))
+
+  # get x,y and flux values for the first SOURCE_CATALOG_COUNT sources
+  x_points = cat_hdu.data["x"][:SOURCE_CATALOG_COUNT]
+  y_points = cat_hdu.data["y"][:SOURCE_CATALOG_COUNT]
+  flux = cat_hdu.data["flux"][:SOURCE_CATALOG_COUNT]
+  ra = cat_hdu.data["ra"][:SOURCE_CATALOG_COUNT]
+  dec = cat_hdu.data["dec"][:SOURCE_CATALOG_COUNT]
+
+  # scale the x_points and y_points from the fits pixel coords to the jpg coords
+  fits_height, fits_width = np.shape(sci_hdu.data)
+  x_points, y_points = scale_points(fits_height, fits_width, input['width'], input['height'], x_points=x_points, y_points=y_points)
+
+  # we will be giving a list of dicts representing each source back to the frontend
+  source_catalog_data = []
+  for i in range(SOURCE_CATALOG_COUNT):
+    source_catalog_data.append({
+      "x": x_points[i],
+      "y": y_points[i],
+      "flux": flux[i].astype(int),
+      # truncate the ra and dec to 4 decimal places for readability
+      "ra": '%.4f'%(ra[i]),
+      "dec": '%.4f'%(dec[i])
+    })
+
+  return source_catalog_data
diff --git a/datalab/datalab_session/util.py b/datalab/datalab_session/util.py
@@ -1,5 +1,7 @@
 import requests
 import logging
+import os
+import urllib.request
 
 import boto3
 from astropy.io import fits
@@ -103,21 +105,27 @@ def get_archive_from_basename(basename: str) -> dict:
 
 def get_hdu(basename: str, extension: str = 'SCI') -> list[fits.HDUList]:
   """
-  Returns a list of Sci HDUs for the given basenames
+  Returns a HDU for the given basename
+  Will download the file to a tmp directory so future calls can open it directly
   Warning: this function returns an opened file that must be closed after use
   """
 
   # use the basename to fetch and create a list of hdu objects
   basename = basename.replace('-large', '').replace('-small', '')
+  basename_file_path = os.path.join(settings.TEMP_FITS_DIR, basename)
 
-  archive_record = get_archive_from_basename(basename)
+  if not os.path.isfile(basename_file_path):
 
-  try:
+    # create the tmp directory if it doesn't exist
+    if not os.path.exists(settings.TEMP_FITS_DIR):
+      os.makedirs(settings.TEMP_FITS_DIR)
+
+    archive_record = get_archive_from_basename(basename)
     fits_url = archive_record[0].get('url', 'No URL found')
-    hdu = fits.open(fits_url)
-    return hdu[extension]
-  except Exception as e:
-    raise FileNotFoundError(f"No image found with specified basename: {basename} Error: {e}")
+    urllib.request.urlretrieve(fits_url, basename_file_path)
+
+  hdu = fits.open(basename_file_path)
+  return hdu[extension]
 
 def create_fits(key: str, image_arr: np.ndarray) -> fits.HDUList:
 
@@ -141,23 +149,25 @@ def stack_arrays(array_list: list):
 
   return stacked
 
-def scale_flip_points(small_img_width: int, small_img_height: int, img_array: list, points: list[tuple[int, int]]):
+def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False):
   """
-    Scale the coordinates from a smaller image to the full sized fits so we know the positions of the coords on the 2dnumpy array
-    Returns the list of tuple points with coords scaled for the numpy array
+    Scales x_points and y_points from img_1 height and width to img_2 height and width
+    Optionally flips the points on the x or y axis
   """
-  large_height, large_width = np.shape(img_array)
+  if any([dim == 0 for dim in [height_1, width_1, height_2, width_2]]):
+    raise ValueError("height and width must be non-zero")
+
+  # normalize the points to be lists in case tuples or other are passed
+  x_points = np.array(x_points)
+  y_points = np.array(y_points)
 
-  # If the aspect ratios don't match we can't be certain where the point was
-  if small_img_width / small_img_height != large_width / large_height:
-    raise ValueError("Aspect ratios of the two images must match")
+  x_points = (x_points / width_1 * width_2).astype(int)
+  y_points = (y_points / height_1 * height_2).astype(int)
 
-  width_scale = large_width / small_img_width
-  height_scale = large_height / small_img_height
+  if flip_y:
+    y_points = height_2 - y_points
 
-  points_array = np.array(points)
-  scaled_points = np.int_(points_array * [width_scale, height_scale])
-  # html origin is top left, numpy origin is bottom left, so we need to flip the y axis
-  scaled_points[:, 1] = large_height - scaled_points[:, 1]
+  if flip_x:
+    x_points = width_2 - x_points
 
-  return scaled_points
+  return x_points, y_points
diff --git a/datalab/datalab_session/views.py b/datalab/datalab_session/views.py
@@ -4,7 +4,7 @@
 
 from datalab.datalab_session.data_operations.utils import available_operations
 from datalab.datalab_session.analysis.line_profile import line_profile
-from datalab.datalab_session.util import get_hdu
+from datalab.datalab_session.analysis.source_catalog import source_catalog
 
 
 class OperationOptionsApiView(RetrieveAPIView):
@@ -26,11 +26,11 @@ class AnalysisView(RetrieveAPIView):
     def post(self, request, action):
         input = request.data
 
-        sci_hdu = get_hdu(input['basename'])
-
         match action:
             case 'line-profile':
-                output = line_profile(input, sci_hdu)
+                output = line_profile(input)
+            case 'source-catalog':
+                output = source_catalog(input)
             case _:
                 raise Exception(f'Analysis action {action} not found')
 

diff --git a/datalab/settings.py b/datalab/settings.py
@@ -37,6 +37,7 @@ def get_list_from_env(variable, default=None):
 
 # Build paths inside the project like this: BASE_DIR / 'subdir'.
 BASE_DIR = Path(__file__).resolve().parent.parent
+TEMP_FITS_DIR = os.getenv('TEMP_FITS_DIR', os.path.join(BASE_DIR, 'tmp/fits/'))
 
 # Quick-start development settings - unsuitable for production
 # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/