ELDAmwl Plotting: eldamwl_plot.py@fce4cae19357 (annotated)

eldamwl_plot.py@fce4cae19357 (annotated)

eldamwl_plot.py

Mon, 29 Jan 2024 11:07:31 +0000

author: pilar <pilar.gumaclaramunt@cnr.it>
date: Mon, 29 Jan 2024 11:07:31 +0000
changeset 0: fce4cae19357
child 2: 763a7ac22031
permissions: -rwxr-xr-x

First version

 from typing import List, Any
 import xarray as xr
 from config import *
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import netCDF4 as nc
 from datetime import datetime
 import numpy as np
 import sys
 from os.path import exists
 def read_variable_in_group(nc_file_path, group_name, variable_name):
     try:
         with nc.Dataset(nc_file_path, 'r') as dataset:
             # Find the group
             group = dataset.groups[group_name]
             # Access the variable
             try:
                 variable = group.variables[variable_name]
                 dimensions = variable.dimensions
                 # print("Dimensions:", dimensions)
                 # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim}
                 # variable_metadata.add = variable.long_name
                 variable_metadata = \
                     {'name': variable.name.capitalize(),
                      'long_name': variable.long_name.capitalize(),
                      'units': variable.units,
                      'ndim': variable.ndim}
                 # Create an index array for slicing the variable
                 index_array = [slice(None)] * variable.ndim
                 # Access the variable data using the index array
                 variable_data = variable[index_array]
                 # Fetch the dimension values
                 dimension_values = {}
                 for dim_name in dimensions:
                     dim_values = group.variables[dim_name][:]
                     dimension_values[dim_name] = dim_values
                     # Adjust the index array for slicing
                     index_array[variable.dimensions.index(dim_name)] = 0
                 # Now you have the variable data and its associated dimensions in a dictionary
                 result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata}
                 # Print or use the result as needed
                 # print(result)
                 return result
             except KeyError:
                 print("The specified variable does not exist in the group.")
     except FileNotFoundError:
         print(f"The file '{nc_file_path}' does not exist.")
     except Exception as e:
         print(f"The group {e} does not exist.")
 def read_variable_in_group2(nc_file_path, group_name, variable_name):
     try:
         with nc.Dataset(nc_file_path, 'r') as dataset:
             # Find the group
             group = dataset.groups[group_name]
             # Access the variable
             try:
                 variable = group.variables[variable_name]
                 dimensions = variable.dimensions
                 # print("Dimensions:", dimensions)
                 # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim}
                 # variable_metadata.add = variable.long_name
                 variable_metadata = \
                     {'name': variable.name.capitalize(),
                      'long_name': variable.long_name.capitalize(),
                      'units': variable.units,
                      'ndim': variable.ndim}
                 # Create an index array for slicing the variable
                 index_array = [slice(None)] * variable.ndim
                 # Access the variable data using the index array
                 variable_data = variable[index_array]
                 # Fetch the dimension values
                 dimension_values = {}
                 for dim_name in dimensions:
                     dim_values = group.variables[dim_name][:]
                     dimension_values[dim_name] = dim_values
                     # Adjust the index array for slicing
                     index_array[variable.dimensions.index(dim_name)] = 0
                 # Now you have the variable data and its associated dimensions in a dictionary
                 # result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata}
                 # Print or use the result as needed
                 # print(result)
                 return variable_data, dimension_values, variable_metadata
             except KeyError:
                 print(f"The variable {variable_name} does not exist in the {group_name} group.")
     except FileNotFoundError:
         print(f"The file {nc_file_path} does not exist.")
     except Exception as e:
         print(f"The group {e} does not exist.")
 def plot_vertical_profiles_plotly(variables, data, dimensions, y_array, positive_error_array, negative_error_array,
                                   xaxis_label, yaxis_label, title, timeinrows):
     """
     ToDo Complete
     Plot vertical profiles using plotly
     Parameters:
     -
     Returns:
     - fig: html
     """
     # Set number of columns, number of rows, and subtitles if needed
     ncols = len(variables)
     subtitles = None
     if timeinrows is True:
         nrows = len(dimensions['time'])
         if nrows > 1:
             for t in range(nrows):
                 for v in range(ncols):
                     if subtitles is None:
                         subtitles = [str(datetime.fromtimestamp(dimensions['time'][t]))]
                     else:
                         subtitles += [str(datetime.fromtimestamp(dimensions['time'][t]))]
     else:
         nrows = 1
         subtitles = ''
     # Create figure
     fig = make_subplots(rows=nrows, cols=ncols,
                         subplot_titles=subtitles,
                         shared_yaxes=True)
     # Define colours
     clrs = [['rgb(0,35,255)', 'rgb(0,20,150)', 'rgb(0,170,250)'],  # UV
             ['rgb(90,210,50)', 'rgb(40,90,25)', 'rgb(120,250,80)'],  # VIS
             ['rgb(250,30,30)', 'rgb(200,25,30)', 'rgb(250,125,0)']]  # IR
     clrs = np.array(clrs)
     # Plot
     for v in range(len(variables)):
         for t in range(len(dimensions['time'])):
             for w in range(len(dimensions['wavelength'])):
                 tn = str(datetime.fromtimestamp(dimensions['time'][t]))
                 wn = str(dimensions['wavelength'][w])
                 if timeinrows == True:
                     lgnd = wn + ' nm'
                     rown = t + 1
                     colorint = 1
                     colorind = 0
                     subtitle = str(datetime.fromtimestamp(dimensions['time'][t]))
                 else:
                     lgnd = wn + ' nm - ' + tn
                     rown = 1
                     colorind = t
                     colorint = 1 - (0.4 * t)
                 # colour definition
                 if abs(dimensions['wavelength'][w] - 1064) < 1:
                     clr = clrs[2, colorind]
                     # clr = 'rgb(235,70,20)'  # 'rgba(215,27,96,0.4)'
                 elif abs(dimensions['wavelength'][w] - 532) < 1:
                     clr = clrs[1, colorind]
                 elif abs(dimensions['wavelength'][w] - 355) < 1:
                     clr = clrs[0, colorind]
                 else:
                     clr = 'rgb(0,0,0)'
                 data_values = data[variables[v]][w, t, :]
                 error_values = positive_error_array[variables[v]][w, t, :]
                 vertical_levels = y_array["data"][t, :]
                 # Add trace
                 fig.add_trace(go.Scatter(
                     x=data_values,
                     y=vertical_levels,
                     error_x=dict(array=error_values, width=0, thickness=0.1),
                     # ToDo Include distinction between positive and negative errors
                     mode='lines',
                     line=dict(width=2, color=clr),
                     name=lgnd),
                     row=rown, col=v + 1,
                 )
                 # ToDo Check formatting options (showlegend, mapbox)
                 fig.update_xaxes({"title": xaxis_label[v]}, row=rown, col=v + 1)
                 fig.update_xaxes({"mirror": True, "ticks": 'outside', "showline": True, "color": "black"}, row=rown,
                                  col=v + 1)
                 fig.update_xaxes(ticks="outside")
                 fig.update_yaxes(ticks="outside", row=rown, col=v + 1)  # ToDo check tickformat (i.e. tickformat=":.2e")
                 if v == 0:
                     fig.update_yaxes({"title": yaxis_label}, row=rown, col=v + 1)
     # Set axis labels and title
     fig.update_layout(
         yaxis_title=yaxis_label,
         title=title,
         template="seaborn",  # You can change the template as per your preference
         # ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', 'plotly_dark', 'presentation',
         #  'xgridoff', 'ygridoff', 'gridon', 'none']
         height=max(600, 450 * rown), width=min(max(400 * len(variables), 750), 1400),
     )
     return fig
 # Files for testing
 # netcdf_file = 'hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc'      # Backscatter and extinction, no error
 # netcdf_file = 'hpb_012_0000627_202308240200_202308240400_20230824hpb0200_ELDAmwl_v0.0.1.nc'     # Extinction with error
 try:
     netcdf_file = sys.argv[1]
 except:
     print('Syntax is incorrect, you need to pass the filename of the file to be plotted.\n'
           'Example: ./eldamwl_plot.py hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc')
     exit()
 netcdf_file_path = netcdf_path + netcdf_file
 file_exists = exists(netcdf_file_path)
 if not file_exists:
     print('The file that you provided does not exist.')
     exit()
 # Groups of data in the NetCDF file (low and high resolution)
 groups = ['lowres_products', 'highres_products']
 groups_names = ['Low resolution', 'High resolution']
 # Variables that we want to plot
 variable = ['backscatter', 'extinction', 'lidarratio', 'volumedepolarization', 'particledepolarization']    # ToDo Test particledepolarization when available
 # Errors associated to the variables
 variable_error = ['error_backscatter', 'error_extinction', 'error_lidarratio',
                   'positive_systematic_error_volumedepolarization', 'error_particledepolarization']
 variable_negative_error_volumedepol = 'negative_systematic_error_volumedepolarization'
 # Other variables:
 #   cloud_mask (time, level)
 for g in range(len(groups)):
     # Read the altitude variable
     altitude = read_variable_in_group(netcdf_file_path, groups[g], 'altitude')
     if altitude['metadata']['units'] == 'm':
         altitude['data'] = altitude['data'] / 1000.0
         altitude['metadata']['units'] = 'km'
     # Initialize variables
     data = None
     variables_with_data = None
     variables_axis = None
     # Read the data
     for v in range(len(variable)):
         # Initialize var
         var = None
         try:
             var, dim, met = read_variable_in_group2(netcdf_file_path, groups[g], variable[v])
         except TypeError:
             print(f"The variable {variable[v]} is not present. Cannot unpack non-iterable NoneType object")
         if var is not None:  # If the variable that we're trying to read exists
             if var.max() != var.min():  # If there is data in the array
                 if variables_with_data is None:
                     variables_with_data = [variable[v]]
                     variables_axis = [met['long_name'] + ' [ ' + met['units'] + ' ]']
                 else:
                     variables_with_data += [variable[v]]
                     variables_axis += [met['long_name'] + ' [ ' + met['units'] + ' ]']
                 var_error, dim_error, met_error = read_variable_in_group2(netcdf_file_path, groups[g],
                                                                           variable_error[v])
                 if variable[v] == 'volumedepolarization':
                     var_error_negative, dim, met = read_variable_in_group2(netcdf_file_path, groups[g],
                                                                            variable_negative_error_volumedepol)
                 else:
                     var_error_negative = var_error
                 # Add read data to the data variable to have it all grouped
                 if data is None:
                     # Start populating data and data_error
                     data = {variable[v]: var}
                     data_error_positive = {variable[v]: var_error}
                     data_error_negative = {variable[v]: var_error_negative}
                 else:
                     # Add more data to data and data_error
                     data[variable[v]] = var
                     data_error_positive[variable[v]] = var_error
                     data_error_negative[variable[v]] = var_error_negative
             else:
                 print(f'There is no data for {variable[v]} in the {groups_names[g]} group.')
     dimensions = dim
     # Ask user if he/she wants to plot different temporal profiles in different rows
     if g == 0:
         # Initialize
         timeinplots = False
         timeinrows = False
         # Ask user how he/she wants the plot
         if len(dimensions['time']) > 1:
             answer = input(
                 f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different plots [y/n]? ")
             if answer[0:1].lower() == 'y':
                 timeinplots = True
             if timeinplots == False:
                 answer = input(
                     f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different rows [y/n]? ")
                 if answer[0:1].lower() == 'y':
                     timeinrows = True
     if variables_with_data is not None:
         timeiter = dimensions['time']
         if timeinplots == True:
             dimensions_t = dimensions
             for t in range(len(dimensions['time'])):
                 dimensions_t['time'] = [timeiter[t]]
                 for vr in range(len(variables_with_data)):
                     if vr == 0:
                         data_t = {variables_with_data[vr]: data[variables_with_data[vr]][:, t:t + 1, :]}
                     else:
                         data_t[variables_with_data[vr]] = data[variables_with_data[vr]][:, t:t + 1, :]
                 fig = plot_vertical_profiles_plotly(
                     variables=variables_with_data,
                     data=data_t,
                     dimensions=dimensions_t,
                     y_array=altitude,
                     positive_error_array=data_error_positive,
                     negative_error_array=data_error_negative,
                     xaxis_label=variables_axis,
                     yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']',
                     title=netcdf_file[0:3].upper() + " - " + str(
                         datetime.fromtimestamp(dimensions_t['time'][0]).strftime(
                             '%d/%m/%Y %H:%M:%S')) + " - ELDA MWL " +
                           groups_names[g],
                     timeinrows=True
                 )
                 # Show the plot (in Jupyter Notebook or Jupyter Lab)
                 fig.show()
                 # Save the plot
                 filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
                     dimensions['time'][0]).strftime('%Y%m%d-%H%M%S') + "_" + groups[g] + ".html"
                 fig.write_html(html_path + filename)
         else:
             # Plotting in one plot (even if there are different time profiles)
             fig = plot_vertical_profiles_plotly(
                 variables_with_data,
                 data,
                 dimensions,
                 altitude,
                 data_error_positive,
                 data_error_negative,
                 xaxis_label=variables_axis,
                 yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']',
                 title=netcdf_file[0:3].upper() + " - " + str(
                     datetime.fromtimestamp(dimensions['time'][0]).strftime('%d/%m/%Y')) + " - ELDA MWL " + groups_names[
                           g],
                 timeinrows=timeinrows
             )
             # Show the plot (in Jupyter Notebook or Jupyter Lab)
             fig.show()
             # Save the plot
             if timeinrows == True:
                 filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
                     dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp(
                     dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + "_timeinrows.html"
             else:
                 filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp(
                     dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp(
                     dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + ".html"
             fig.write_html(html_path + filename)
     else:
         print(f'There is no data to be plotted in the {groups_names[g]} group.')

Mercurial > public > eldamwl_plotting / annotate

eldamwl_plot.py@fce4cae19357 (annotated)

eldamwl_plot.py