Mon, 29 Jan 2024 11:07:31 +0000
First version
from typing import List, Any import xarray as xr from config import * import plotly.graph_objects as go from plotly.subplots import make_subplots import netCDF4 as nc from datetime import datetime import numpy as np import sys from os.path import exists def read_variable_in_group(nc_file_path, group_name, variable_name): try: with nc.Dataset(nc_file_path, 'r') as dataset: # Find the group group = dataset.groups[group_name] # Access the variable try: variable = group.variables[variable_name] dimensions = variable.dimensions # print("Dimensions:", dimensions) # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim} # variable_metadata.add = variable.long_name variable_metadata = \ {'name': variable.name.capitalize(), 'long_name': variable.long_name.capitalize(), 'units': variable.units, 'ndim': variable.ndim} # Create an index array for slicing the variable index_array = [slice(None)] * variable.ndim # Access the variable data using the index array variable_data = variable[index_array] # Fetch the dimension values dimension_values = {} for dim_name in dimensions: dim_values = group.variables[dim_name][:] dimension_values[dim_name] = dim_values # Adjust the index array for slicing index_array[variable.dimensions.index(dim_name)] = 0 # Now you have the variable data and its associated dimensions in a dictionary result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata} # Print or use the result as needed # print(result) return result except KeyError: print("The specified variable does not exist in the group.") except FileNotFoundError: print(f"The file '{nc_file_path}' does not exist.") except Exception as e: print(f"The group {e} does not exist.") def read_variable_in_group2(nc_file_path, group_name, variable_name): try: with nc.Dataset(nc_file_path, 'r') as dataset: # Find the group group = dataset.groups[group_name] # Access the variable try: variable = group.variables[variable_name] dimensions = variable.dimensions # print("Dimensions:", dimensions) # variable_metadata = { variable.name, variable.long_name, variable.units, variable.ndim} # variable_metadata.add = variable.long_name variable_metadata = \ {'name': variable.name.capitalize(), 'long_name': variable.long_name.capitalize(), 'units': variable.units, 'ndim': variable.ndim} # Create an index array for slicing the variable index_array = [slice(None)] * variable.ndim # Access the variable data using the index array variable_data = variable[index_array] # Fetch the dimension values dimension_values = {} for dim_name in dimensions: dim_values = group.variables[dim_name][:] dimension_values[dim_name] = dim_values # Adjust the index array for slicing index_array[variable.dimensions.index(dim_name)] = 0 # Now you have the variable data and its associated dimensions in a dictionary # result = {'data': variable_data, 'dimensions': dimension_values, 'metadata': variable_metadata} # Print or use the result as needed # print(result) return variable_data, dimension_values, variable_metadata except KeyError: print(f"The variable {variable_name} does not exist in the {group_name} group.") except FileNotFoundError: print(f"The file {nc_file_path} does not exist.") except Exception as e: print(f"The group {e} does not exist.") def plot_vertical_profiles_plotly(variables, data, dimensions, y_array, positive_error_array, negative_error_array, xaxis_label, yaxis_label, title, timeinrows): """ ToDo Complete Plot vertical profiles using plotly Parameters: - Returns: - fig: html """ # Set number of columns, number of rows, and subtitles if needed ncols = len(variables) subtitles = None if timeinrows is True: nrows = len(dimensions['time']) if nrows > 1: for t in range(nrows): for v in range(ncols): if subtitles is None: subtitles = [str(datetime.fromtimestamp(dimensions['time'][t]))] else: subtitles += [str(datetime.fromtimestamp(dimensions['time'][t]))] else: nrows = 1 subtitles = '' # Create figure fig = make_subplots(rows=nrows, cols=ncols, subplot_titles=subtitles, shared_yaxes=True) # Define colours clrs = [['rgb(0,35,255)', 'rgb(0,20,150)', 'rgb(0,170,250)'], # UV ['rgb(90,210,50)', 'rgb(40,90,25)', 'rgb(120,250,80)'], # VIS ['rgb(250,30,30)', 'rgb(200,25,30)', 'rgb(250,125,0)']] # IR clrs = np.array(clrs) # Plot for v in range(len(variables)): for t in range(len(dimensions['time'])): for w in range(len(dimensions['wavelength'])): tn = str(datetime.fromtimestamp(dimensions['time'][t])) wn = str(dimensions['wavelength'][w]) if timeinrows == True: lgnd = wn + ' nm' rown = t + 1 colorint = 1 colorind = 0 subtitle = str(datetime.fromtimestamp(dimensions['time'][t])) else: lgnd = wn + ' nm - ' + tn rown = 1 colorind = t colorint = 1 - (0.4 * t) # colour definition if abs(dimensions['wavelength'][w] - 1064) < 1: clr = clrs[2, colorind] # clr = 'rgb(235,70,20)' # 'rgba(215,27,96,0.4)' elif abs(dimensions['wavelength'][w] - 532) < 1: clr = clrs[1, colorind] elif abs(dimensions['wavelength'][w] - 355) < 1: clr = clrs[0, colorind] else: clr = 'rgb(0,0,0)' data_values = data[variables[v]][w, t, :] error_values = positive_error_array[variables[v]][w, t, :] vertical_levels = y_array["data"][t, :] # Add trace fig.add_trace(go.Scatter( x=data_values, y=vertical_levels, error_x=dict(array=error_values, width=0, thickness=0.1), # ToDo Include distinction between positive and negative errors mode='lines', line=dict(width=2, color=clr), name=lgnd), row=rown, col=v + 1, ) # ToDo Check formatting options (showlegend, mapbox) fig.update_xaxes({"title": xaxis_label[v]}, row=rown, col=v + 1) fig.update_xaxes({"mirror": True, "ticks": 'outside', "showline": True, "color": "black"}, row=rown, col=v + 1) fig.update_xaxes(ticks="outside") fig.update_yaxes(ticks="outside", row=rown, col=v + 1) # ToDo check tickformat (i.e. tickformat=":.2e") if v == 0: fig.update_yaxes({"title": yaxis_label}, row=rown, col=v + 1) # Set axis labels and title fig.update_layout( yaxis_title=yaxis_label, title=title, template="seaborn", # You can change the template as per your preference # ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', 'plotly_dark', 'presentation', # 'xgridoff', 'ygridoff', 'gridon', 'none'] height=max(600, 450 * rown), width=min(max(400 * len(variables), 750), 1400), ) return fig # Files for testing # netcdf_file = 'hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc' # Backscatter and extinction, no error # netcdf_file = 'hpb_012_0000627_202308240200_202308240400_20230824hpb0200_ELDAmwl_v0.0.1.nc' # Extinction with error try: netcdf_file = sys.argv[1] except: print('Syntax is incorrect, you need to pass the filename of the file to be plotted.\n' 'Example: ./eldamwl_plot.py hpb_012_0000598_201810172100_201810172300_20181017oh00_ELDAmwl_v0.0.1.nc') exit() netcdf_file_path = netcdf_path + netcdf_file file_exists = exists(netcdf_file_path) if not file_exists: print('The file that you provided does not exist.') exit() # Groups of data in the NetCDF file (low and high resolution) groups = ['lowres_products', 'highres_products'] groups_names = ['Low resolution', 'High resolution'] # Variables that we want to plot variable = ['backscatter', 'extinction', 'lidarratio', 'volumedepolarization', 'particledepolarization'] # ToDo Test particledepolarization when available # Errors associated to the variables variable_error = ['error_backscatter', 'error_extinction', 'error_lidarratio', 'positive_systematic_error_volumedepolarization', 'error_particledepolarization'] variable_negative_error_volumedepol = 'negative_systematic_error_volumedepolarization' # Other variables: # cloud_mask (time, level) for g in range(len(groups)): # Read the altitude variable altitude = read_variable_in_group(netcdf_file_path, groups[g], 'altitude') if altitude['metadata']['units'] == 'm': altitude['data'] = altitude['data'] / 1000.0 altitude['metadata']['units'] = 'km' # Initialize variables data = None variables_with_data = None variables_axis = None # Read the data for v in range(len(variable)): # Initialize var var = None try: var, dim, met = read_variable_in_group2(netcdf_file_path, groups[g], variable[v]) except TypeError: print(f"The variable {variable[v]} is not present. Cannot unpack non-iterable NoneType object") if var is not None: # If the variable that we're trying to read exists if var.max() != var.min(): # If there is data in the array if variables_with_data is None: variables_with_data = [variable[v]] variables_axis = [met['long_name'] + ' [ ' + met['units'] + ' ]'] else: variables_with_data += [variable[v]] variables_axis += [met['long_name'] + ' [ ' + met['units'] + ' ]'] var_error, dim_error, met_error = read_variable_in_group2(netcdf_file_path, groups[g], variable_error[v]) if variable[v] == 'volumedepolarization': var_error_negative, dim, met = read_variable_in_group2(netcdf_file_path, groups[g], variable_negative_error_volumedepol) else: var_error_negative = var_error # Add read data to the data variable to have it all grouped if data is None: # Start populating data and data_error data = {variable[v]: var} data_error_positive = {variable[v]: var_error} data_error_negative = {variable[v]: var_error_negative} else: # Add more data to data and data_error data[variable[v]] = var data_error_positive[variable[v]] = var_error data_error_negative[variable[v]] = var_error_negative else: print(f'There is no data for {variable[v]} in the {groups_names[g]} group.') dimensions = dim # Ask user if he/she wants to plot different temporal profiles in different rows if g == 0: # Initialize timeinplots = False timeinrows = False # Ask user how he/she wants the plot if len(dimensions['time']) > 1: answer = input( f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different plots [y/n]? ") if answer[0:1].lower() == 'y': timeinplots = True if timeinplots == False: answer = input( f"There are {len(dimensions['time'])} temporal profiles, do you want to plot them on different rows [y/n]? ") if answer[0:1].lower() == 'y': timeinrows = True if variables_with_data is not None: timeiter = dimensions['time'] if timeinplots == True: dimensions_t = dimensions for t in range(len(dimensions['time'])): dimensions_t['time'] = [timeiter[t]] for vr in range(len(variables_with_data)): if vr == 0: data_t = {variables_with_data[vr]: data[variables_with_data[vr]][:, t:t + 1, :]} else: data_t[variables_with_data[vr]] = data[variables_with_data[vr]][:, t:t + 1, :] fig = plot_vertical_profiles_plotly( variables=variables_with_data, data=data_t, dimensions=dimensions_t, y_array=altitude, positive_error_array=data_error_positive, negative_error_array=data_error_negative, xaxis_label=variables_axis, yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']', title=netcdf_file[0:3].upper() + " - " + str( datetime.fromtimestamp(dimensions_t['time'][0]).strftime( '%d/%m/%Y %H:%M:%S')) + " - ELDA MWL " + groups_names[g], timeinrows=True ) # Show the plot (in Jupyter Notebook or Jupyter Lab) fig.show() # Save the plot filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp( dimensions['time'][0]).strftime('%Y%m%d-%H%M%S') + "_" + groups[g] + ".html" fig.write_html(html_path + filename) else: # Plotting in one plot (even if there are different time profiles) fig = plot_vertical_profiles_plotly( variables_with_data, data, dimensions, altitude, data_error_positive, data_error_negative, xaxis_label=variables_axis, yaxis_label=altitude['metadata']['long_name'] + ' [' + altitude['metadata']['units'] + ']', title=netcdf_file[0:3].upper() + " - " + str( datetime.fromtimestamp(dimensions['time'][0]).strftime('%d/%m/%Y')) + " - ELDA MWL " + groups_names[ g], timeinrows=timeinrows ) # Show the plot (in Jupyter Notebook or Jupyter Lab) fig.show() # Save the plot if timeinrows == True: filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp( dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp( dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + "_timeinrows.html" else: filename = "ELDAmwl_" + netcdf_file[0:3].upper() + "_" + datetime.fromtimestamp( dimensions['time'][0]).strftime('%Y%m%d%H%M%S') + '-' + datetime.fromtimestamp( dimensions['time'][-1]).strftime('%Y%m%d%H%M%S') + "_" + groups[g] + ".html" fig.write_html(html_path + filename) else: print(f'There is no data to be plotted in the {groups_names[g]} group.')