Source code for alma_search.utils

"""Shared utility helpers for ALMA archive search workflows.

These helpers are intentionally small and reusable. They normalize missing
values, parse coordinate strings, and combine repeated metadata values into
stable CSV-friendly text.
"""

from __future__ import annotations

import logging
from typing import Any, Iterable, Sequence

import pandas as pd


[docs] def configure_logging(verbose: bool) -> None: """Configure the package-wide logging format and level. Parameters ---------- verbose : bool When ``True``, enable debug logging. Otherwise use info-level logging. """ level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format="%(asctime)s %(levelname)s %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", )
[docs] def safe_get(record: dict[str, Any], key: str, default: Any = "") -> Any: """Read a dictionary-like value while normalizing null-like entries. Parameters ---------- record : dict[str, Any] Mapping to read from. key : str Key to retrieve. default : Any, optional Fallback value used when the key is missing or null-like. Returns ------- Any Stored value or the supplied default. """ value = record.get(key, default) if value is None: return default try: if pd.isna(value): return default except Exception: pass return value
[docs] def is_blank(value: Any) -> bool: """Return whether a value should be treated as missing text/data. Parameters ---------- value : Any Value to test. Returns ------- bool ``True`` for ``None``, pandas missing values, and empty strings. """ if value is None: return True try: if pd.isna(value): return True except Exception: pass if isinstance(value, str) and not value.strip(): return True return False
[docs] def normalize_whitespace(value: Any) -> str: """Collapse repeated whitespace in a scalar value. Parameters ---------- value : Any Value to normalize. Returns ------- str String with internal whitespace collapsed to single spaces, or an empty string when the value is blank. """ if is_blank(value): return "" return " ".join(str(value).split())
[docs] def unique_preserve_order(items: Iterable[str]) -> list[str]: """Return unique items while preserving first-seen order. Parameters ---------- items : iterable[str] Candidate string values. Returns ------- list[str] Non-blank unique values in their original encounter order. """ seen: set[str] = set() result: list[str] = [] for item in items: if not item: continue if item not in seen: seen.add(item) result.append(item) return result
[docs] def stable_sort_numeric_strings(values: Iterable[str]) -> list[str]: """Sort string values numerically when possible, otherwise lexically. Parameters ---------- values : iterable[str] String values to sort. Returns ------- list[str] Unique values sorted with numeric strings before non-numeric ones. """ unique_values = unique_preserve_order(str(v) for v in values if str(v).strip()) def sort_key(item: str) -> tuple[int, float | str]: try: return (0, float(item)) except ValueError: return (1, item) return sorted(unique_values, key=sort_key)
[docs] def parse_ra_dec_to_degrees(ra_value: Any, dec_value: Any) -> tuple[float, float]: """Parse RA and Dec values into decimal degrees. Parameters ---------- ra_value : Any Right ascension value in decimal degrees or sexagesimal text. dec_value : Any Declination value in decimal degrees or sexagesimal text. Returns ------- tuple[float, float] Parsed ``(ra_deg, dec_deg)`` pair. Raises ------ ValueError If either coordinate is blank or cannot be parsed. """ if is_blank(ra_value) or is_blank(dec_value): raise ValueError("RA/Dec values must not be blank") ra_text = str(ra_value).strip() dec_text = str(dec_value).strip() try: return float(ra_text), float(dec_text) except ValueError: pass import astropy.units as u from astropy.coordinates import SkyCoord coord = SkyCoord(ra_text, dec_text, unit=(u.hourangle, u.deg), frame="icrs") return float(coord.ra.deg), float(coord.dec.deg)
[docs] def format_ra_dec_strings(ra_deg: float, dec_deg: float) -> tuple[str, str]: """Format decimal-degree coordinates as sexagesimal strings. Parameters ---------- ra_deg : float Right ascension in decimal degrees. dec_deg : float Declination in decimal degrees. Returns ------- tuple[str, str] ``(ra_text, dec_text)`` formatted with colon separators. """ import astropy.units as u from astropy.coordinates import SkyCoord coord = SkyCoord(ra_deg * u.deg, dec_deg * u.deg, frame="icrs") ra_text = coord.ra.to_string(unit=u.hour, sep=":", precision=2, pad=True) dec_text = coord.dec.to_string(unit=u.deg, sep=":", precision=2, pad=True, alwayssign=True) return str(ra_text), str(dec_text)
[docs] def to_optional_float(value: Any, scale: float = 1.0, digits: int = 3) -> float | pd.NA: """Convert a scalar to a rounded float when possible. Parameters ---------- value : Any Input value to convert. scale : float, optional Multiplicative scale factor applied before rounding. digits : int, optional Number of decimal places to keep. Returns ------- float | pandas.NA Rounded float result, or ``pandas.NA`` when conversion fails. """ if is_blank(value): return pd.NA try: return round(float(value) * scale, digits) except (TypeError, ValueError): return pd.NA
[docs] def format_float_text(value: Any, digits: int = 3) -> str: """Format a scalar value as compact text for merged CSV fields. Parameters ---------- value : Any Input scalar value. digits : int, optional Number of decimal places used when formatting numeric values. Returns ------- str Blank string for missing input, a cleaned text value for non-numeric input, or a trimmed numeric string. """ if is_blank(value): return "" try: number = round(float(value), digits) except (TypeError, ValueError): return normalize_whitespace(value) return f"{number:.{digits}f}".rstrip("0").rstrip(".")
[docs] def combine_scalar_values(values: Sequence[Any], digits: int = 3) -> str | pd.NA: """Combine repeated scalar values into a unique CSV-friendly string. Parameters ---------- values : sequence[Any] Scalar values collected across rows. digits : int, optional Number of decimal places for numeric formatting. Returns ------- str | pandas.NA Comma-separated unique values, or ``pandas.NA`` when nothing usable is available. """ formatted = unique_preserve_order( format_float_text(value, digits=digits) for value in values if not is_blank(value) and format_float_text(value, digits=digits) ) if not formatted: return pd.NA return ",".join(formatted)