eldamwl_plot.py

changeset 0
fce4cae19357
child 2
763a7ac22031
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eldamwl_plot.py	Mon Jan 29 11:07:31 2024 +0000
@@ -0,0 +1,394 @@
+from typing import List, Any
+
+import xarray as xr
+from config import *
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import netCDF4 as nc
+from datetime import datetime
+import numpy as np
+import sys
+from os.path import exists
+
+
+def read_variable_in_group(nc_file_path, group_name, variable_name):
+    try:
+        with nc.Dataset(nc_file_path, 'r') as dataset:
+            # Find the group
+            group = dataset.groups[group_name]
+
+            # Access the variable
+            try:
+                variable = group.variables[variable_name]
+                dimensions = variable.dimensions
+                # print("Dimensions:", dimensions)
+                # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim}
+
+                # variable_metadata.add = variable.long_name
+                variable_metadata = \
+                    {'name': variable.name.capitalize(),
+                     'long_name': variable.long_name.capitalize(),
+                     'units': variable.units,
+                     'ndim': variable.ndim}
+
+                # Create an index array for slicing the variable
+                index_array = [slice(None)] * variable.ndim
+                # Access the variable data using the index array
+                variable_data = variable[index_array]
+
+                # Fetch the dimension values
+                dimension_values = {}
+                for dim_name in dimensions:
+                    dim_values = group.variables[dim_name][:]
+                    dimension_values[dim_name] = dim_values
+                    # Adjust the index array for slicing
+                    index_array[variable.dimensions.index(dim_name)] = 0
+
+                # Now you have the variable data and its associated dimensions in a dictionary
+                result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata}
+
+                # Print or use the result as needed
+                # print(result)
+                return result
+
+            except KeyError:
+                print("The specified variable does not exist in the group.")
+
+    except FileNotFoundError:
+        print(f"The file '{nc_file_path}' does not exist.")
+    except Exception as e:
+        print(f"The group {e} does not exist.")
+
+
+def read_variable_in_group2(nc_file_path, group_name, variable_name):
+    try:
+        with nc.Dataset(nc_file_path, 'r') as dataset:
+            # Find the group
+            group = dataset.groups[group_name]
+
+            # Access the variable
+            try:
+                variable = group.variables[variable_name]
+                dimensions = variable.dimensions
+                # print("Dimensions:", dimensions)
+                # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim}
+
+                # variable_metadata.add = variable.long_name
+                variable_metadata = \
+                    {'name': variable.name.capitalize(),
+                     'long_name': variable.long_name.capitalize(),
+                     'units': variable.units,
+                     'ndim': variable.ndim}
+
+                # Create an index array for slicing the variable
+                index_array = [slice(None)] * variable.ndim
+                # Access the variable data using the index array
+                variable_data = variable[index_array]
+
+                # Fetch the dimension values
+                dimension_values = {}
+                for dim_name in dimensions:
+                    dim_values = group.variables[dim_name][:]
+                    dimension_values[dim_name] = dim_values
+                    # Adjust the index array for slicing
+                    index_array[variable.dimensions.index(dim_name)] = 0
+
+                # Now you have the variable data and its associated dimensions in a dictionary
+                # result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata}
+
+                # Print or use the result as needed
+                # print(result)
+                return variable_data, dimension_values, variable_metadata
+
+            except KeyError:
+                print(f"The variable {variable_name} does not exist in the {group_name} group.")
+    except FileNotFoundError:
+        print(f"The file {nc_file_path} does not exist.")
+    except Exception as e:
+        print(f"The group {e} does not exist.")
+
+
+def plot_vertical_profiles_plotly(variables, data, dimensions, y_array, positive_error_array, negative_error_array,
+                                  xaxis_label, yaxis_label, title, timeinrows):
+    """
+    ToDo Complete
+    Plot vertical profiles using plotly
+
+    Parameters:
+    -
+
+    Returns:
+    - fig: html
+    """
+
+    # Set number of columns, number of rows, and subtitles if needed
+    ncols = len(variables)
+    subtitles = None
+    if timeinrows is True:
+        nrows = len(dimensions['time'])
+        if nrows > 1:
+            for t in range(nrows):
+                for v in range(ncols):
+                    if subtitles is None:
+                        subtitles = [str(datetime.fromtimestamp(dimensions['time'][t]))]
+                    else:
+                        subtitles += [str(datetime.fromtimestamp(dimensions['time'][t]))]
+    else:
+        nrows = 1
+        subtitles = ''
+
+    # Create figure
+    fig = make_subplots(rows=nrows, cols=ncols,
+                        subplot_titles=subtitles,
+                        shared_yaxes=True)
+
+    # Define colours
+    clrs = [['rgb(0,35,255)', 'rgb(0,20,150)', 'rgb(0,170,250)'],  # UV
+            ['rgb(90,210,50)', 'rgb(40,90,25)', 'rgb(120,250,80)'],  # VIS
+            ['rgb(250,30,30)', 'rgb(200,25,30)', 'rgb(250,125,0)']]  # IR
+    clrs = np.array(clrs)
+
+    # Plot
+    for v in range(len(variables)):
+        for t in range(len(dimensions['time'])):
+            for w in range(len(dimensions['wavelength'])):
+                tn = str(datetime.fromtimestamp(dimensions['time'][t]))
+                wn = str(dimensions['wavelength'][w])
+                if timeinrows == True:
+                    lgnd = wn + ' nm'
+                    rown = t + 1
+                    colorint = 1
+                    colorind = 0
+                    subtitle = str(datetime.fromtimestamp(dimensions['time'][t]))
+                else:
+                    lgnd = wn + ' nm - ' + tn
+                    rown = 1
+                    colorind = t
+                    colorint = 1 - (0.4 * t)
+
+                # colour definition
+                if abs(dimensions['wavelength'][w] - 1064) < 1:
+                    clr = clrs[2, colorind]
+                    # clr = 'rgb(235,70,20)'  # 'rgba(215,27,96,0.4)'
+                elif abs(dimensions['wavelength'][w] - 532) < 1:
+                    clr = clrs[1, colorind]
+                elif abs(dimensions['wavelength'][w] - 355) < 1:
+                    clr = clrs[0, colorind]
+                else:
+                    clr = 'rgb(0,0,0)'
+
+                data_values = data[variables[v]][w, t, :]
+                error_values = positive_error_array[variables[v]][w, t, :]
+                vertical_levels = y_array["data"][t, :]
+
+                # Add trace
+                fig.add_trace(go.Scatter(
+                    x=data_values,
+                    y=vertical_levels,
+                    error_x=dict(array=error_values, width=0, thickness=0.1),
+                    # ToDo Include distinction between positive and negative errors
+                    mode='lines',
+                    line=dict(width=2, color=clr),
+                    name=lgnd),
+                    row=rown, col=v + 1,
+                )
+
+                # ToDo Check formatting options (showlegend, mapbox)
+                fig.update_xaxes({"title": xaxis_label[v]}, row=rown, col=v + 1)
+                fig.update_xaxes({"mirror": True, "ticks": 'outside', "showline": True, "color": "black"}, row=rown,
+                                 col=v + 1)
+
+                fig.update_xaxes(ticks="outside")
+                fig.update_yaxes(ticks="outside", row=rown, col=v + 1)  # ToDo check tickformat (i.e. tickformat=":.2e")
+                if v == 0:
+                    fig.update_yaxes({"title": yaxis_label}, row=rown, col=v + 1)
+
+    # Set axis labels and title
+    fig.update_layout(
+        yaxis_title=yaxis_label,
+        title=title,
+        template="seaborn",  # You can change the template as per your preference
+        # ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', 'plotly_dark', 'presentation',
+        #  'xgridoff', 'ygridoff', 'gridon', 'none']
+        height=max(600, 450 * rown), width=min(max(400 * len(variables), 750), 1400),
+    )
+
+    return fig
+
+
+# Files for testing
+# netcdf_file = 'hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc'      # Backscatter and extinction, no error
+# netcdf_file = 'hpb_012_0000627_202308240200_202308240400_20230824hpb0200_ELDAmwl_v0.0.1.nc'     # Extinction with error
+
+
+try:
+    netcdf_file = sys.argv[1]
+except:
+    print('Syntax is incorrect, you need to pass the filename of the file to be plotted.\n'
+          'Example: ./eldamwl_plot.py hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc')
+    exit()
+
+netcdf_file_path = netcdf_path + netcdf_file
+
+file_exists = exists(netcdf_file_path)
+if not file_exists:
+    print('The file that you provided does not exist.')
+    exit()
+
+# Groups of data in the NetCDF file (low and high resolution)
+groups = ['lowres_products', 'highres_products']
+groups_names = ['Low resolution', 'High resolution']
+
+# Variables that we want to plot
+variable = ['backscatter', 'extinction', 'lidarratio', 'volumedepolarization', 'particledepolarization']    # ToDo Test particledepolarization when available
+# Errors associated to the variables
+variable_error = ['error_backscatter', 'error_extinction', 'error_lidarratio',
+                  'positive_systematic_error_volumedepolarization', 'error_particledepolarization']
+variable_negative_error_volumedepol = 'negative_systematic_error_volumedepolarization'
+# Other variables:
+#   cloud_mask (time, level)
+
+
+for g in range(len(groups)):
+    # Read the altitude variable
+    altitude = read_variable_in_group(netcdf_file_path, groups[g], 'altitude')
+
+    if altitude['metadata']['units'] == 'm':
+        altitude['data'] = altitude['data'] / 1000.0
+        altitude['metadata']['units'] = 'km'
+
+    # Initialize variables
+    data = None
+    variables_with_data = None
+    variables_axis = None
+
+    # Read the data
+    for v in range(len(variable)):
+        # Initialize var
+        var = None
+
+        try:
+            var, dim, met = read_variable_in_group2(netcdf_file_path, groups[g], variable[v])
+        except TypeError:
+            print(f"The variable {variable[v]} is not present. Cannot unpack non-iterable NoneType object")
+
+        if var is not None:  # If the variable that we're trying to read exists
+            if var.max() != var.min():  # If there is data in the array
+                if variables_with_data is None:
+                    variables_with_data = [variable[v]]
+                    variables_axis = [met['long_name'] + ' [ ' + met['units'] + ' ]']
+                else:
+                    variables_with_data += [variable[v]]
+                    variables_axis += [met['long_name'] + ' [ ' + met['units'] + ' ]']
+
+                var_error, dim_error, met_error = read_variable_in_group2(netcdf_file_path, groups[g],
+                                                                          variable_error[v])
+                if variable[v] == 'volumedepolarization':
+                    var_error_negative, dim, met = read_variable_in_group2(netcdf_file_path, groups[g],
+                                                                           variable_negative_error_volumedepol)
+                else:
+                    var_error_negative = var_error
+
+                # Add read data to the data variable to have it all grouped
+                if data is None:
+                    # Start populating data and data_error
+                    data = {variable[v]: var}
+                    data_error_positive = {variable[v]: var_error}
+                    data_error_negative = {variable[v]: var_error_negative}
+                else:
+                    # Add more data to data and data_error
+                    data[variable[v]] = var
+                    data_error_positive[variable[v]] = var_error
+                    data_error_negative[variable[v]] = var_error_negative
+            else:
+                print(f'There is no data for {variable[v]} in the {groups_names[g]} group.')
+
+    dimensions = dim
+
+    # Ask user if he/she wants to plot different temporal profiles in different rows
+    if g == 0:
+        # Initialize
+        timeinplots = False
+        timeinrows = False
+        # Ask user how he/she wants the plot
+        if len(dimensions['time']) > 1:
+            answer = input(
+                f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different plots [y/n]? ")
+            if answer[0:1].lower() == 'y':
+                timeinplots = True
+            if timeinplots == False:
+                answer = input(
+                    f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different rows [y/n]? ")
+                if answer[0:1].lower() == 'y':
+                    timeinrows = True
+
+    if variables_with_data is not None:
+        timeiter = dimensions['time']
+        if timeinplots == True:
+            dimensions_t = dimensions
+            for t in range(len(dimensions['time'])):
+                dimensions_t['time'] = [timeiter[t]]
+                for vr in range(len(variables_with_data)):
+                    if vr == 0:
+                        data_t = {variables_with_data[vr]: data[variables_with_data[vr]][:, t:t + 1, :]}
+                    else:
+                        data_t[variables_with_data[vr]] = data[variables_with_data[vr]][:, t:t + 1, :]
+
+                fig = plot_vertical_profiles_plotly(
+                    variables=variables_with_data,
+                    data=data_t,
+                    dimensions=dimensions_t,
+                    y_array=altitude,
+                    positive_error_array=data_error_positive,
+                    negative_error_array=data_error_negative,
+                    xaxis_label=variables_axis,
+                    yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']',
+                    title=netcdf_file[0:3].upper() + " - " + str(
+                        datetime.fromtimestamp(dimensions_t['time'][0]).strftime(
+                            '%d/%m/%Y %H:%M:%S')) + " - ELDA MWL " +
+                          groups_names[g],
+                    timeinrows=True
+                )
+
+                # Show the plot (in Jupyter Notebook or Jupyter Lab)
+                fig.show()
+
+                # Save the plot
+                filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
+                    dimensions['time'][0]).strftime('%Y%m%d-%H%M%S') + "_" + groups[g] + ".html"
+                fig.write_html(html_path + filename)
+
+        else:
+            # Plotting in one plot (even if there are different time profiles)
+            fig = plot_vertical_profiles_plotly(
+                variables_with_data,
+                data,
+                dimensions,
+                altitude,
+                data_error_positive,
+                data_error_negative,
+                xaxis_label=variables_axis,
+                yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']',
+                title=netcdf_file[0:3].upper() + " - " + str(
+                    datetime.fromtimestamp(dimensions['time'][0]).strftime('%d/%m/%Y')) + " - ELDA MWL " + groups_names[
+                          g],
+                timeinrows=timeinrows
+            )
+
+            # Show the plot (in Jupyter Notebook or Jupyter Lab)
+            fig.show()
+
+            # Save the plot
+            if timeinrows == True:
+                filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
+                    dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp(
+                    dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + "_timeinrows.html"
+            else:
+                filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
+                    dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp(
+                    dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + ".html"
+
+            fig.write_html(html_path + filename)
+
+    else:
+        print(f'There is no data to be plotted in the {groups_names[g]} group.')

mercurial