#!/usr/bin/env python
"""A collection of reader functions for instruments operated on HALO and model output files
*author*: Johannes Röttenbacher
"""
import datetime
import logging
import os
import re
import warnings
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from pylim import cirrus_hl
from pylim import helpers as h
from pylim import smart
log = logging.getLogger(__name__)
[docs]def read_bacardi_raw(filename: str, path: str) -> xr.Dataset:
"""
Read raw BACARDI data as provided by DLR
Args:
filename: name of file
path: path to file
Returns: Dataset with BACARDI data and time as dimension
"""
filepath = os.path.join(path, filename)
date = re.search(r"\d{8}", filename)[0]
ds = xr.open_dataset(filepath)
ds = ds.rename({"TIME": "time"})
ds = ds.swap_dims({"tid": "time"}) # make time the dimension
# overwrite time to make a datetime index
ds = ds.assign({"time": pd.to_datetime(ds.time, unit='ms', origin=pd.to_datetime(date, format="%Y%m%d"))})
return ds
[docs]def read_bahamas(bahamas_path: str) -> xr.Dataset:
"""
Reader function for netcdf BAHAMAS data as provided by DLR.
Args:
bahamas_path: full path of netcdf file
Returns: xr.DataSet with BAHAMAS data and time as dimension
"""
ds = xr.open_dataset(bahamas_path)
ds = ds.swap_dims({"tid": "TIME"})
ds = ds.rename({"TIME": "time"})
return ds
[docs]def read_lamp_file(campaign: str = "cirrus-hl", filename: str = None, plot: bool = True, save_fig: bool = True,
save_file: bool = True) -> pd.DataFrame:
"""
Read in the 1000W lamp specification file interpolated to 1nm steps. Converts W/cm^2 to W/m^2.
Args:
campaign: for which campaign should the lamp file be read in?
filename: name of lamp file to be read in
plot: plot lamp file?
save_fig: save figure to standard plot path defined in config.toml?
save_file: save lamp file to standard calib path deined in config.toml?
Returns: A data frame with the irradiance in W/m² and the corresponding wavelength in nm
"""
# set paths
calib_path = h.get_path("calib", campaign=campaign)
plot_path = h.get_path("plot", campaign=campaign)
lamp_path = h.get_path("lamp", campaign=campaign) # get path to lamp defined in config.toml
# read in lamp file (name for cirrus-hl and halo-ac3)
lamp_file = "F1587i01_19.std" if filename is None else filename
names = ["Irradiance"] # column name
lamp = pd.read_csv(os.path.join(lamp_path, lamp_file), skiprows=1, header=None, names=names)
# TODO: make the wavelength definition more flexible,
# it should adjust itself according to information given in the lamp file
lamp["Wavelength"] = np.arange(250, 2501)
# convert from W/cm^2 to W/m^2; cm = m * 10^-2 => cm^2 = (m * 10^-2)^2 = m^2 * 10^-4 => W*10^4/m^2
lamp["Irradiance"] = lamp["Irradiance"] * 1e4
if plot:
# plot lamp calibration
lamp.plot(x="Wavelength", y="Irradiance", ylabel="Irradiance (W$\\,$m$^{-2}$)", xlabel="Wavelenght (nm)",
legend=False, title="1000W Lamp F-1587 interpolated on 1nm steps")
plt.grid()
plt.tight_layout()
if save_fig:
plt.savefig(f"{plot_path}/1000W_Lamp_F1587_1nm_19.png", dpi=100)
plt.show()
plt.close()
if save_file:
# save lamp file in calib folder
lamp.to_csv(f"{calib_path}/1000W_lamp_F1587_1nm_19.dat", index=False)
return lamp
[docs]def read_smart_raw(path: str, filename: str) -> pd.DataFrame:
"""
Read raw SMART data files
Args:
path: Path where to find file
filename: Name of file
Returns: pandas DataFrame with column names and datetime index
"""
file = os.path.join(path, filename)
date_str, channel, _ = smart.get_info_from_filename(filename)
if channel == "SWIR":
pixels = list(range(1, 257)) # 256 pixels
elif channel == "VNIR":
pixels = list(range(1, 1025)) # 1024 pixels
else:
raise ValueError("channel has to be 'SWIR' or 'VNIR'!")
# first three columns: Time (hh mm ss.ss), integration time (ms), shutter flag
header = ["time", "t_int", "shutter"]
header.extend(pixels)
df = pd.read_csv(file, sep="\t", header=None, names=header)
datetime_str = date_str + " " + df["time"]
df = df.set_index(pd.to_datetime(datetime_str, format="%Y_%m_%d %H %M %S.%f")).drop("time", axis=1)
return df
[docs]def read_smart_cor(path: str, filename: str) -> pd.DataFrame:
"""
Read dark current corrected SMART data files
Args:
path: Path where to find file
filename: Name of file
Returns: pandas DataFrame with column names and datetime index
"""
file = os.path.join(path, filename)
df = pd.read_csv(file, sep="\t", index_col="time", parse_dates=True)
df.columns = pd.to_numeric(df.columns) # make columns numeric
return df
[docs]def read_pixel_to_wavelength(path: str, spectrometer: str) -> pd.DataFrame:
"""
Read file which maps each pixel to a certain wavelength for a specified spectrometer.
Args:
path: Path where to find file
spectrometer: For which spectrometer the pixel to wavelength mapping should be read in, refer to lookup table for possible
spectrometers (e.g. ASP06_J3)
Returns: pandas DataFrame relating pixel number to wavelength
"""
filename = f"pixel_wl_{cirrus_hl.smart_lookup[spectrometer]}.dat"
file = os.path.join(path, filename)
df = pd.read_csv(file, sep="\s+", skiprows=7, header=None, names=["pixel", "wavelength"])
# sort df by the wavelength column and reset the index, necessary for the SWIR spectrometers
# df = df.sort_values(by="wavelength").reset_index(drop=True)
return df
[docs]def read_stabbi_data(stabbi_path: str) -> pd.DataFrame:
"""
Read in stabilization platform data from SMART.
Args:
stabbi_path: full path to dat file
Returns: pandas DataFrame with headers and a DateTimeIndex
"""
df = pd.read_csv(stabbi_path, skipinitialspace=True, sep="\t")
df["PCTIME"] = pd.to_datetime(df["DATE"] + " " + df["PCTIME"], format='%Y/%m/%d %H:%M:%S.%f')
df.set_index("PCTIME", inplace=True)
df.index.name = "time"
return df
[docs]def read_nav_data(nav_path: str) -> pd.DataFrame:
"""
Reader function for Navigation data file from the INS used by the stabilization of SMART
Args:
nav_path: path to IMS file including filename
Returns: pandas DataFrame with headers and a DateTimeIndex
"""
# read out the start time information given in the file
with open(nav_path, encoding="cp1252") as f:
time_info = f.readlines()[1]
start_time = pd.to_datetime(time_info[11:31], format="%m/%d/%Y %H:%M:%S")
# define the start date of the measurement
start_date = pd.Timestamp(year=start_time.year, month=start_time.month, day=start_time.day)
header = ["marker", "seconds", "roll", "pitch", "yaw", "AccS_X", "AccS_Y", "AccS_Z", "OmgS_X", "OmgS_Y", "OmgS_Z"]
nav = pd.read_csv(nav_path, sep="\s+", skiprows=13, header=None, names=header, encoding="cp1252")
nav["time"] = pd.to_datetime(nav["seconds"], origin=start_date, unit="s")
nav = nav.set_index("time")
return nav
[docs]def read_ins_gps_pos(filepath: str) -> pd.DataFrame:
"""
Read in a GPS position file as returned by the HALO-SMART INS system.
Args:
filepath: complete path to Nav_GPSPosxxxx.Asc file
Returns: time series with the GPS position data
"""
with open(filepath, encoding="cp1252") as f:
time_info = f.readlines()[1]
start_time = pd.to_datetime(time_info[11:31], format="%m/%d/%Y %H:%M:%S")
# define the start date of the measurement
start_date = pd.Timestamp(year=start_time.year, month=start_time.month, day=start_time.day)
header = ["marker", "seconds", "lon", "lat", "alt", "lon_std", "lat_std", "alt_std"]
df = pd.read_csv(filepath, sep="\s+", skiprows=10, header=None, names=header, encoding="cp1252")
df["time"] = pd.to_datetime(df["seconds"], origin=start_date, unit="s")
df = df.set_index("time")
return df
[docs]def read_ins_gps_vel(filepath: str) -> pd.DataFrame:
"""
Read in a GPS velocity file as returned by the HALO-SMART INS system.
Args:
filepath: complete path to Nav_GPSVelxxxx.Asc file
Returns: time series with the GPS velocity data
"""
with open(filepath, encoding="cp1252") as f:
time_info = f.readlines()[1]
start_time = pd.to_datetime(time_info[11:31], format="%m/%d/%Y %H:%M:%S")
# define the start date of the measurement
start_date = pd.Timestamp(year=start_time.year, month=start_time.month, day=start_time.day)
header = ["marker", "seconds", "v_east", "v_north", "v_up", "v_east_std", "v_north_std", "v_up_std"]
df = pd.read_csv(filepath, sep="\s+", skiprows=10, header=None, names=header, encoding="cp1252")
df["time"] = pd.to_datetime(df["seconds"], origin=start_date, unit="s")
df = df.set_index("time")
return df
[docs]def read_libradtran(flight: str, filename: str) -> pd.DataFrame:
"""
Read an old libRadtran simulation file generated by ``01_dirdiff_BBR_Cirrus_HL_Server_jr.pro`` and add a DateTime Index.
Args:
flight: which flight does the simulation belong to (e.g. Flight_20210629a)
filename: filename
Returns: DataFrame with libRadtran output data
"""
file_path = f"{h.get_path('libradtran', flight)}/{filename}"
bbr_sim = pd.read_csv(file_path, sep="\s+", skiprows=34)
date_dt = datetime.datetime.strptime(flight[7:15], "%Y%m%d")
date_ts = pd.Timestamp(year=date_dt.year, month=date_dt.month, day=date_dt.day)
bbr_sim["time"] = pd.to_datetime(bbr_sim["sod"], origin=date_ts, unit="s") # add a datetime column
bbr_sim = bbr_sim.set_index("time") # set it as index
return bbr_sim
[docs]def read_ozone_sonde(filepath: str) -> pd.DataFrame():
"""
Reader function for ames formatted ozone sonde data from http://www.ndaccdemo.org/
Args:
filepath: complete path to file
Returns: pandas DataFrame with ozone volume mixing ratio
"""
header_ny = ["ElapTime", "Press", "GeopHgt", "Temp", "RH", "PO3", "DD", "FF", "GPSHgt", "Lon", "Lat", "PmpT", "Ozi",
"Vpmp", "Ipmp"]
header_sc = ["ElapTime", "Press", "GeopHgt", "Temp", "RH", "T_styro", "PO3", "HorWindDir", "HorWindSpeed"]
header_ho = ["Press", "ElapTime", "GeopHgt", "Temp", "RH", "PO3", "HorWindDir", "HorWindSpeed"]
if "sc" in filepath:
df = pd.read_csv(filepath, skiprows=134, sep="\s+", names=header_sc, na_values=[99.99])
df["o3_vmr"] = df.PO3 * 1e-3 / (df.Press * 1e2) # mPa and hPa
elif "ny" in filepath:
df = pd.read_csv(filepath, skiprows=170, sep="\s+", names=header_ny, na_values=[99.99])
df["o3_vmr"] = df.PO3 * 1e-3 / (df.Press * 1e2) # mPa and hPa
else:
assert "ho" in filepath, "No header information for given file! Adjust reader function!"
df = pd.read_csv(filepath, skiprows=68, sep="\s+", names=header_ho, na_values=[999.9])
return df
[docs]def read_ecrad_output(filepath: str) -> xr.Dataset:
"""
Read in and preprocess a merged ecRad output file from :py:module:ecrad_merge_files.py
- Assign coordinates to dimensions
- Calculate pressure height in m
Args:
filepath: Full path to file
Returns: xarray DataSet with added coordinates to all dimensions of the file
"""
warnings.warn("This is done in its own script now. The function is obsolete and should not be used.",
DeprecationWarning, stacklevel=2)
ds = xr.open_dataset(filepath)
# assign coordinates to band_sw, band_lw and half_level
ds = ds.assign_coords({"band_sw": range(1, 15), "band_lw": range(1, 17), "half_level": range(138)})
# calculate pressure height
q_air = 1.292 # dry air density at 0°C in kg/m3
g_geo = 9.81 # earth acceleration in m/s^2
pressure_hl = ds["pressure_hl"]
ds["press_height"] = -(pressure_hl[:, :, 137]) * np.log(pressure_hl[:, :, :] / pressure_hl[:, :, 137]) / (q_air * g_geo)
# replace TOA height (calculated as infinity) with nan
ds["press_height"] = ds["press_height"].where(ds["press_height"] != np.inf, np.nan)
return ds
[docs]def read_cams_file(filepath: str) -> xr.Dataset:
"""
Read a CAMS atmosphere file
Args:
filepath: full path to nc file
Returns: DataSet with
"""