Mercurial > public > eldamwl_plotting / file revision

from typing import List, Any

import xarray as xr
from config import *
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import netCDF4 as nc
from datetime import datetime
import numpy as np
import sys
from os.path import exists


def read_variable_in_group(nc_file_path, group_name, variable_name):
    try:
        with nc.Dataset(nc_file_path, 'r') as dataset:
            # Find the group
            group = dataset.groups[group_name]

            # Access the variable
            try:
                variable = group.variables[variable_name]
                dimensions = variable.dimensions
                # print("Dimensions:", dimensions)
                # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim}

                # variable_metadata.add = variable.long_name
                variable_metadata = \
                    {'name': variable.name.capitalize(),
                     'long_name': variable.long_name.capitalize(),
                     'units': variable.units,
                     'ndim': variable.ndim}

                # Create an index array for slicing the variable
                index_array = [slice(None)] * variable.ndim
                # Access the variable data using the index array
                variable_data = variable[index_array]

                # Fetch the dimension values
                dimension_values = {}
                for dim_name in dimensions:
                    dim_values = group.variables[dim_name][:]
                    dimension_values[dim_name] = dim_values
                    # Adjust the index array for slicing
                    index_array[variable.dimensions.index(dim_name)] = 0

                # Now you have the variable data and its associated dimensions in a dictionary
                result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata}

                # Print or use the result as needed
                # print(result)
                return result

            except KeyError:
                print("The specified variable does not exist in the group.")

    except FileNotFoundError:
        print(f"The file '{nc_file_path}' does not exist.")
    except Exception as e:
        print(f"The group {e} does not exist.")


def read_variable_in_group2(nc_file_path, group_name, variable_name):
    try:
        with nc.Dataset(nc_file_path, 'r') as dataset:
            # Find the group
            group = dataset.groups[group_name]

            # Access the variable
            try:
                variable = group.variables[variable_name]
                dimensions = variable.dimensions
                # print("Dimensions:", dimensions)
                # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim}

                # variable_metadata.add = variable.long_name
                variable_metadata = \
                    {'name': variable.name.capitalize(),
                     'long_name': variable.long_name.capitalize(),
                     'units': variable.units,
                     'ndim': variable.ndim}

                # Create an index array for slicing the variable
                index_array = [slice(None)] * variable.ndim
                # Access the variable data using the index array
                variable_data = variable[index_array]

                # Fetch the dimension values
                dimension_values = {}
                for dim_name in dimensions:
                    dim_values = group.variables[dim_name][:]
                    dimension_values[dim_name] = dim_values
                    # Adjust the index array for slicing
                    index_array[variable.dimensions.index(dim_name)] = 0

                # Now you have the variable data and its associated dimensions in a dictionary
                # result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata}

                # Print or use the result as needed
                # print(result)
                return variable_data, dimension_values, variable_metadata

            except KeyError:
                print(f"The variable {variable_name} does not exist in the {group_name} group.")
    except FileNotFoundError:
        print(f"The file {nc_file_path} does not exist.")
    except Exception as e:
        print(f"The group {e} does not exist.")


def plot_vertical_profiles_plotly(variables, data, dimensions, y_array, positive_error_array, negative_error_array,
                                  xaxis_label, yaxis_label, title, timeinrows):
    """
    ToDo Complete
    Plot vertical profiles using plotly

    Parameters:
    -

    Returns:
    - fig: html
    """

    # Set number of columns, number of rows, and subtitles if needed
    ncols = len(variables)
    subtitles = None
    if timeinrows is True:
        nrows = len(dimensions['time'])
        if nrows > 1:
            for t in range(nrows):
                for v in range(ncols):
                    if subtitles is None:
                        subtitles = [str(datetime.fromtimestamp(dimensions['time'][t]))]
                    else:
                        subtitles += [str(datetime.fromtimestamp(dimensions['time'][t]))]
    else:
        nrows = 1
        subtitles = ''

    # Create figure
    fig = make_subplots(rows=nrows, cols=ncols,
                        subplot_titles=subtitles,
                        shared_yaxes=True)

    # Define colours
    clrs = [['rgb(0,35,255)', 'rgb(0,20,150)', 'rgb(0,170,250)'],  # UV
            ['rgb(90,210,50)', 'rgb(40,90,25)', 'rgb(120,250,80)'],  # VIS
            ['rgb(250,30,30)', 'rgb(200,25,30)', 'rgb(250,125,0)']]  # IR
    clrs = np.array(clrs)

    # Plot
    for v in range(len(variables)):
        for t in range(len(dimensions['time'])):
            for w in range(len(dimensions['wavelength'])):
                tn = str(datetime.fromtimestamp(dimensions['time'][t]))
                wn = str(dimensions['wavelength'][w])
                if timeinrows == True:
                    lgnd = wn + ' nm'
                    rown = t + 1
                    colorint = 1
                    colorind = 0
                    subtitle = str(datetime.fromtimestamp(dimensions['time'][t]))
                else:
                    lgnd = wn + ' nm - ' + tn
                    rown = 1
                    colorind = t
                    colorint = 1 - (0.4 * t)

                # colour definition
                if abs(dimensions['wavelength'][w] - 1064) < 1:
                    clr = clrs[2, colorind]
                    # clr = 'rgb(235,70,20)'  # 'rgba(215,27,96,0.4)'
                elif abs(dimensions['wavelength'][w] - 532) < 1:
                    clr = clrs[1, colorind]
                elif abs(dimensions['wavelength'][w] - 355) < 1:
                    clr = clrs[0, colorind]
                else:
                    clr = 'rgb(0,0,0)'

                data_values = data[variables[v]][w, t, :]
                error_values = positive_error_array[variables[v]][w, t, :]
                vertical_levels = y_array["data"][t, :]

                # Add trace
                fig.add_trace(go.Scatter(
                    x=data_values,
                    y=vertical_levels,
                    error_x=dict(array=error_values, width=0, thickness=0.1),
                    # ToDo Include distinction between positive and negative errors
                    mode='lines',
                    line=dict(width=2, color=clr),
                    name=lgnd),
                    row=rown, col=v + 1,
                )

                # ToDo Check formatting options (showlegend, mapbox)
                fig.update_xaxes({"title": xaxis_label[v]}, row=rown, col=v + 1)
                fig.update_xaxes({"mirror": True, "ticks": 'outside', "showline": True, "color": "black"}, row=rown,
                                 col=v + 1)

                fig.update_xaxes(ticks="outside")
                fig.update_yaxes(ticks="outside", row=rown, col=v + 1)  # ToDo check tickformat (i.e. tickformat=":.2e")
                if v == 0:
                    fig.update_yaxes({"title": yaxis_label}, row=rown, col=v + 1)

    # Set axis labels and title
    fig.update_layout(
        yaxis_title=yaxis_label,
        title=title,
        template="seaborn",  # You can change the template as per your preference
        # ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', 'plotly_dark', 'presentation',
        #  'xgridoff', 'ygridoff', 'gridon', 'none']
        height=max(600, 450 * rown), width=min(max(400 * len(variables), 750), 1400),
    )

    return fig


# Files for testing
# netcdf_file = 'hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc'      # Backscatter and extinction, no error
# netcdf_file = 'hpb_012_0000627_202308240200_202308240400_20230824hpb0200_ELDAmwl_v0.0.1.nc'     # Extinction with error


try:
    netcdf_file = sys.argv[1]
except:
    print('Syntax is incorrect, you need to pass the filename of the file to be plotted.\n'
          'Example: ./eldamwl_plot.py hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc')
    exit()

netcdf_file_path = netcdf_path + netcdf_file

file_exists = exists(netcdf_file_path)
if not file_exists:
    print('The file that you provided does not exist.')
    exit()

# Groups of data in the NetCDF file (low and high resolution)
groups = ['lowres_products', 'highres_products']
groups_names = ['Low resolution', 'High resolution']

# Variables that we want to plot
variable = ['backscatter', 'extinction', 'lidarratio', 'volumedepolarization', 'particledepolarization']    # ToDo Test particledepolarization when available
# Errors associated to the variables
variable_error = ['error_backscatter', 'error_extinction', 'error_lidarratio',
                  'positive_systematic_error_volumedepolarization', 'error_particledepolarization']
variable_negative_error_volumedepol = 'negative_systematic_error_volumedepolarization'
# Other variables:
#   cloud_mask (time, level)


for g in range(len(groups)):
    # Read the altitude variable
    altitude = read_variable_in_group(netcdf_file_path, groups[g], 'altitude')

    if altitude['metadata']['units'] == 'm':
        altitude['data'] = altitude['data'] / 1000.0
        altitude['metadata']['units'] = 'km'

    # Initialize variables
    data = None
    variables_with_data = None
    variables_axis = None

    # Read the data
    for v in range(len(variable)):
        # Initialize var
        var = None

        try:
            var, dim, met = read_variable_in_group2(netcdf_file_path, groups[g], variable[v])
        except TypeError:
            print(f"The variable {variable[v]} is not present. Cannot unpack non-iterable NoneType object")

        if var is not None:  # If the variable that we're trying to read exists
            if var.max() != var.min():  # If there is data in the array
                if variables_with_data is None:
                    variables_with_data = [variable[v]]
                    variables_axis = [met['long_name'] + ' [ ' + met['units'] + ' ]']
                else:
                    variables_with_data += [variable[v]]
                    variables_axis += [met['long_name'] + ' [ ' + met['units'] + ' ]']

                var_error, dim_error, met_error = read_variable_in_group2(netcdf_file_path, groups[g],
                                                                          variable_error[v])
                if variable[v] == 'volumedepolarization':
                    var_error_negative, dim, met = read_variable_in_group2(netcdf_file_path, groups[g],
                                                                           variable_negative_error_volumedepol)
                else:
                    var_error_negative = var_error

                # Add read data to the data variable to have it all grouped
                if data is None:
                    # Start populating data and data_error
                    data = {variable[v]: var}
                    data_error_positive = {variable[v]: var_error}
                    data_error_negative = {variable[v]: var_error_negative}
                else:
                    # Add more data to data and data_error
                    data[variable[v]] = var
                    data_error_positive[variable[v]] = var_error
                    data_error_negative[variable[v]] = var_error_negative
            else:
                print(f'There is no data for {variable[v]} in the {groups_names[g]} group.')

    dimensions = dim

    # Ask user if he/she wants to plot different temporal profiles in different rows
    if g == 0:
        # Initialize
        timeinplots = False
        timeinrows = False
        # Ask user how he/she wants the plot
        if len(dimensions['time']) > 1:
            answer = input(
                f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different plots [y/n]? ")
            if answer[0:1].lower() == 'y':
                timeinplots = True
            if timeinplots == False:
                answer = input(
                    f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different rows [y/n]? ")
                if answer[0:1].lower() == 'y':
                    timeinrows = True

    if variables_with_data is not None:
        timeiter = dimensions['time']
        if timeinplots == True:
            dimensions_t = dimensions
            for t in range(len(dimensions['time'])):
                dimensions_t['time'] = [timeiter[t]]
                for vr in range(len(variables_with_data)):
                    if vr == 0:
                        data_t = {variables_with_data[vr]: data[variables_with_data[vr]][:, t:t + 1, :]}
                    else:
                        data_t[variables_with_data[vr]] = data[variables_with_data[vr]][:, t:t + 1, :]

                fig = plot_vertical_profiles_plotly(
                    variables=variables_with_data,
                    data=data_t,
                    dimensions=dimensions_t,
                    y_array=altitude,
                    positive_error_array=data_error_positive,
                    negative_error_array=data_error_negative,
                    xaxis_label=variables_axis,
                    yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']',
                    title=netcdf_file[0:3].upper() + " - " + str(
                        datetime.fromtimestamp(dimensions_t['time'][0]).strftime(
                            '%d/%m/%Y %H:%M:%S')) + " - ELDA MWL " +
                          groups_names[g],
                    timeinrows=True
                )

                # Show the plot (in Jupyter Notebook or Jupyter Lab)
                fig.show()

                # Save the plot
                filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
                    dimensions['time'][0]).strftime('%Y%m%d-%H%M%S') + "_" + groups[g] + ".html"
                fig.write_html(html_path + filename)

        else:
            # Plotting in one plot (even if there are different time profiles)
            fig = plot_vertical_profiles_plotly(
                variables_with_data,
                data,
                dimensions,
                altitude,
                data_error_positive,
                data_error_negative,
                xaxis_label=variables_axis,
                yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']',
                title=netcdf_file[0:3].upper() + " - " + str(
                    datetime.fromtimestamp(dimensions['time'][0]).strftime('%d/%m/%Y')) + " - ELDA MWL " + groups_names[
                          g],
                timeinrows=timeinrows
            )

            # Show the plot (in Jupyter Notebook or Jupyter Lab)
            fig.show()

            # Save the plot
            if timeinrows == True:
                filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
                    dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp(
                    dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + "_timeinrows.html"
            else:
                filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
                    dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp(
                    dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + ".html"

            fig.write_html(html_path + filename)

    else:
        print(f'There is no data to be plotted in the {groups_names[g]} group.')