diff -r 000000000000 -r 9d2b98ecf23d generic.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generic.py Tue May 15 16:43:26 2012 +0200 @@ -0,0 +1,543 @@ +# General imports +import datetime +from operator import itemgetter + +# Science imports +import numpy as np +import matplotlib as mpl +from matplotlib import pyplot as plt +import netCDF4 as netcdf + +# CNR-IMAA specific imports +import milos + + +netcdf_format = 'NETCDF3_CLASSIC' # choose one of 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC' and 'NETCDF4' + +class BaseLidarMeasurement(): + """ This is the general measurement object. + It is meant to become a general measurement object + independent of the input files. + + Each subclass should implement the following: + * the import_file method. + * set the "extra_netcdf_parameters" variable to a dictionary that includes the appropriate parameters. + + You can override the get_PT method to define a custom procedure to get ground temperature and pressure. + The one implemented by default is by using the MILOS meteorological station data. + + """ + + def __init__(self, filelist= None): + self.info = {} + self.dimensions = {} + self.variables = {} + self.channels = {} + self.attributes = {} + self.files = [] + self.dark_measurement = None + if filelist: + self.import_files(filelist) + + def import_files(self,filelist): + for f in filelist: + self.import_file(f) + self.update() + + def import_file(self,filename): + raise NotImplementedError('Importing files should be defined in the instrument-specific subclass.') + + def update(self): + ''' + Update the the info, variables and dimensions of the lidar measurement based + on the information found in the channels. + + Reading of the scan_angles parameter is not implemented. + ''' + + # Initialize + start_time =[] + stop_time = [] + points = [] + all_time_scales = [] + channel_name_list = [] + + # Get the information from all the channels + for channel_name, channel in self.channels.items(): + channel.update() + start_time.append(channel.start_time) + stop_time.append(channel.stop_time) + points.append(channel.points) + all_time_scales.append(channel.time) + channel_name_list.append(channel_name) + + # Find the unique time scales used in the channels + time_scales = set(all_time_scales) + + # Update the info dictionary + self.info['start_time'] = min(start_time) + self.info['stop_time'] = max(stop_time) + self.info['duration'] = self.info['stop_time'] - self.info['start_time'] + + # Update the dimensions dictionary + self.dimensions['points'] = max(points) + self.dimensions['channels'] = len(self.channels) + # self.dimensions['scan angles'] = 1 + self.dimensions['nb_of_time_scales'] = len(time_scales) + + # Update the variables dictionary + # Write time scales in seconds + raw_Data_Start_Time = [] + raw_Data_Stop_Time = [] + + for current_time_scale in list(time_scales): + raw_start_time = np.array(current_time_scale) - min(start_time) # Time since start_time + raw_start_in_seconds = np.array([t.seconds for t in raw_start_time]) # Convert in seconds + raw_Data_Start_Time.append(raw_start_in_seconds) # And add to the list + # Check if this time scale has measurements every 30 or 60 seconds. + + duration = self._get_duration(raw_start_in_seconds) + + raw_stop_in_seconds = raw_start_in_seconds + duration + raw_Data_Stop_Time.append(raw_stop_in_seconds) + + self.variables['Raw_Data_Start_Time']= raw_Data_Start_Time + self.variables['Raw_Data_Stop_Time']= raw_Data_Stop_Time + + # Make a dictionary to match time scales and channels + channel_timescales = [] + for (channel_name, current_time_scale) in zip(channel_name_list, all_time_scales): + # The following lines are PEARL specific. The reason they are here is not clear. + # if channel_name =='1064BLR': + # channel_name = '1064' + for (ts,n) in zip(time_scales, range(len(time_scales))): + if current_time_scale == ts: + channel_timescales.append([channel_name,n]) + self.variables['id_timescale'] = dict(channel_timescales) + + def _get_duration(self, raw_start_in_seconds): + ''' Return the duration for a given time scale. In some files (ex. Licel) this + can be specified from the files themselves. In others this must be guessed. + + ''' + # The old method, kept here for reference + #dt = np.mean(np.diff(raw_start_in_seconds)) + #for d in duration_list: + # if abs(dt - d) <15: #If the difference of measuremetns is 10s near the(30 or 60) seconds + # duration = d + + duration = np.diff(raw_start_in_seconds)[0] + + return duration + + def subset_by_channels(self, channel_subset): + ''' Get a measurement object containing only the channels with names + contained in the channel_sublet list ''' + + m = self.__class__() # Create an object of the same type as this one. + m.channels = dict([(channel, self.channels[channel]) for channel + in channel_subset]) + m.update() + return m + + def subset_by_time(self, start_time, stop_time): + + if start_time > stop_time: + raise ValueError('Stop time should be after start time') + + if (start_time < self.info['start_time']) or (stop_time > self.info['stop_time']): + raise ValueError('The time interval specified is not part of the measurement') + + m = self.__class__() # Create an object of the same type as this one. + for (channel_name, channel) in self.channels.items(): + m.channels[channel_name] = channel.subset_by_time(start_time, stop_time) + m.update() + return m + + def r_plot(self): + #Make a basic plot of the data. + #Should include some dictionary with params to make plot stable. + pass + + def r_pdf(self): + # Create a pdf report using a basic plot and meta-data. + pass + + def save(self): + #Save the current state of the object to continue the analysis later. + pass + + def get_PT(self): + ''' Sets the pressure and temperature at station level . + The results are stored in the info dictionary. + ''' + + self.info['Temperature'] = 10.0 + self.info['Pressure'] = 930.0 + + + def save_as_netcdf(self, filename): + """Saves the measurement in the netcdf format as required by the SCC. + Input: filename + """ + params = self.extra_netcdf_parameters + needed_parameters = ['Measurement_ID', 'Temperature', 'Pressure'] + + for parameter in needed_parameters: + stored_value = self.info.get(parameter, None) + if stored_value is None: + raise ValueError('A value needs to be specified for %s' % parameter) + + + dimensions = {'points': 1, + 'channels': 1, + 'time': None, + 'nb_of_time_scales': 1, + 'scan_angles': 1,} # Mandatory dimensions. Time bck not implemented + + global_att = {'Measurement_ID': None, + 'RawData_Start_Date': None, + 'RawData_Start_Time_UT': None, + 'RawData_Stop_Time_UT': None, + 'RawBck_Start_Date': None, + 'RawBck_Start_Time_UT': None, + 'RawBck_Stop_Time_UT': None, + 'Sounding_File_Name': None, + 'LR_File_Name': None, + 'Overlap_File_Name': None, + 'Location': None, + 'System': None, + 'Latitude_degrees_north': None, + 'Longitude_degrees_east': None, + 'Altitude_meter_asl': None} + + channel_variables = \ + {'channel_ID': (('channels', ), 'i'), + 'Background_Low': (('channels', ), 'd'), + 'Background_High': (('channels', ), 'd'), + 'LR_Input': (('channels', ), 'i'), + 'DAQ_Range': (('channels', ), 'd'), + 'Depolarization_Factor': (('channels', ), 'd'), } + + + channels = self.channels.keys() + + input_values = dict(self.dimensions, **self.variables) + + # Add some mandatory global attributes + input_values['Measurement_ID'] = self.info['Measurement_ID'] + input_values['RawData_Start_Date'] = '\'%s\'' % self.info['start_time'].strftime('%Y%m%d') + input_values['RawData_Start_Time_UT'] = '\'%s\'' % self.info['start_time'].strftime('%H%M%S') + input_values['RawData_Stop_Time_UT'] = '\'%s\'' % self.info['stop_time'].strftime('%H%M%S') + + # Add some optional global attributes + input_values['System'] = params.general_parameters['System'] + input_values['Latitude_degrees_north'] = params.general_parameters['Latitude_degrees_north'] + input_values['Longitude_degrees_east'] = params.general_parameters['Longitude_degrees_east'] + input_values['Altitude_meter_asl'] = params.general_parameters['Altitude_meter_asl'] + + # Open a netCDF4 file + f = netcdf.Dataset(filename,'w', format = netcdf_format) # the format is specified in the begining of the file + + # Create the dimensions in the file + for (d,v) in dimensions.iteritems(): + v = input_values.pop(d, v) + f.createDimension(d,v) + + # Create global attributes + for (attrib,value) in global_att.iteritems(): + val = input_values.pop(attrib,value) + if val: + exec('f.%s = %s' % (attrib,val)) + + """ Variables """ + # Write the values of fixes channel parameters + for (var,t) in channel_variables.iteritems(): + temp_v = f.createVariable(var,t[1],t[0]) + for (channel, n) in zip(channels, range(len(channels))): + temp_v[n] = params.channel_parameters[channel][var] + + # Write the id_timescale values + temp_id_timescale = f.createVariable('id_timescale','i',('channels',)) + for (channel, n) in zip(channels, range(len(channels))): + temp_id_timescale[n] = self.variables['id_timescale'][channel] + + # Laser pointing angle + temp_v = f.createVariable('Laser_Pointing_Angle','d',('scan_angles',)) + temp_v[:] = params.general_parameters['Laser_Pointing_Angle'] + + # Molecular calculation + temp_v = f.createVariable('Molecular_Calc','i') + temp_v[:] = params.general_parameters['Molecular_Calc'] + + # Laser pointing angles of profiles + temp_v = f.createVariable('Laser_Pointing_Angle_of_Profiles','i',('time','nb_of_time_scales')) + for (time_scale,n) in zip(self.variables['Raw_Data_Start_Time'], + range(len(self.variables['Raw_Data_Start_Time']))): + temp_v[:len(time_scale), n] = 0 # The lidar has only one laser pointing angle + + # Raw data start/stop time + temp_raw_start = f.createVariable('Raw_Data_Start_Time','i',('time','nb_of_time_scales')) + temp_raw_stop = f.createVariable('Raw_Data_Stop_Time','i',('time','nb_of_time_scales')) + for (start_time, stop_time,n) in zip(self.variables['Raw_Data_Start_Time'], + self.variables['Raw_Data_Stop_Time'], + range(len(self.variables['Raw_Data_Start_Time']))): + temp_raw_start[:len(start_time),n] = start_time + temp_raw_stop[:len(stop_time),n] = stop_time + + #Laser shots + temp_v = f.createVariable('Laser_Shots','i',('time','channels')) + for (channel,n) in zip(channels, range(len(channels))): + time_length = len(self.variables['Raw_Data_Start_Time'][self.variables['id_timescale'][channel]]) + temp_v[:time_length, n] = params.channel_parameters[channel]['Laser_Shots'] + + #Raw lidar data + temp_v = f.createVariable('Raw_Lidar_Data','d',('time', 'channels','points')) + for (channel,n) in zip(channels, range(len(channels))): + c = self.channels[channel] + temp_v[:len(c.time),n, :c.points] = c.matrix + + self.add_dark_measurements_to_netcdf(f, channels) + + #Pressure at lidar station + temp_v = f.createVariable('Pressure_at_Lidar_Station','d') + temp_v[:] = self.info['Pressure'] + + #Temperature at lidar station + temp_v = f.createVariable('Temperature_at_Lidar_Station','d') + temp_v[:] = self.info['Temperature'] + + self.save_netcdf_extra(f) + f.close() + + def add_dark_measurements_to_netcdf(self, f, channels): + + # Get dark measurements. If it is not given in self.dark_measurement + # try to get it using the get_dark_measurements method. If none is found + # return without adding something. + if self.dark_measurement is None: + self.dark_measurement = self.get_dark_measurements() + + if self.dark_measurement is None: + return + + dark_measurement = self.dark_measurement + + # Calculate the length of the time_bck dimensions + number_of_profiles = [len(c.time) for c in dark_measurement.channels.values()] + max_number_of_profiles = np.max(number_of_profiles) + + # Create the dimension + f.createDimension('time_bck', max_number_of_profiles) + + # Save the dark measurement data + temp_v = f.createVariable('Background_Profile','d',('time_bck', 'channels', 'points')) + for (channel,n) in zip(channels, range(len(channels))): + c = dark_measurement.channels[channel] + temp_v[:len(c.time),n, :c.points] = c.matrix + + # Dark profile start/stop time + temp_raw_start = f.createVariable('Raw_Bck_Start_Time','i',('time','nb_of_time_scales')) + temp_raw_stop = f.createVariable('Raw_Bck_Stop_Time','i',('time','nb_of_time_scales')) + for (start_time, stop_time,n) in zip(dark_measurement.variables['Raw_Data_Start_Time'], + dark_measurement.variables['Raw_Data_Stop_Time'], + range(len(dark_measurement.variables['Raw_Data_Start_Time']))): + temp_raw_start[:len(start_time),n] = start_time + temp_raw_stop[:len(stop_time),n] = stop_time + + # Dark measurement start/stop time + f.RawBck_Start_Date = dark_measurement.info['start_time'].strftime('%Y%m%d') + f.RawBck_Start_Time_UT = dark_measurement.info['start_time'].strftime('%H%M%S') + f.RawBck_Stop_Time_UT = dark_measurement.info['stop_time'].strftime('%H%M%S') + + + + def save_netcdf_extra(self, f): + pass + + def _gettime(self, date_str, time_str): + t = datetime.datetime.strptime(date_str+time_str,'%d/%m/%Y%H.%M.%S') + return t + + def plot(self): + for channel in self.channels: + self.channels[channel].plot(show_plot = False) + plt.show() + + def get_dark_measurements(self): + return None + + +class Lidar_channel: + + def __init__(self,channel_parameters): + c = 299792458 #Speed of light + self.wavelength = channel_parameters['name'] + self.name = str(self.wavelength) + self.binwidth = float(channel_parameters['binwidth']) # in microseconds + self.data = {} + self.resolution = self.binwidth * c / 2 + self.z = np.arange(len(channel_parameters['data'])) * self.resolution + self.resolution/2.0 # Change: add half bin in the z + self.points = len(channel_parameters['data']) + self.rc = [] + self.duration = 60 + + def calculate_rc(self): + background = np.mean(self.matrix[:,4000:], axis = 1) #Calculate the background from 30000m and above + self.rc = (self.matrix.transpose()- background).transpose() * (self.z **2) + + + def update(self): + self.start_time = min(self.data.keys()) + self.stop_time = max(self.data.keys()) + datetime.timedelta(seconds = self.duration) + self.time = tuple(sorted(self.data.keys())) + sorted_data = sorted(self.data.iteritems(), key=itemgetter(0)) + self.matrix = np.array(map(itemgetter(1),sorted_data)) + + def _nearest_dt(self,dtime): + margin = datetime.timedelta(seconds = 300) + if ((dtime + margin) < self.start_time)| ((dtime - margin) > self.stop_time): + print "Requested date not covered in this file" + raise + dt = abs(self.time - np.array(dtime)) + dtmin = min(dt) + + if dtmin > datetime.timedelta(seconds = 60): + print "Nearest profile more than 60 seconds away. dt = %s." % dtmin + ind_t = np.where(dt == dtmin) + ind_a= ind_t[0] + if len(ind_a) > 1: + ind_a = ind_a[0] + chosen_time = self.time[ind_a] + return chosen_time, ind_a + + def subset_by_time(self, start_time, stop_time): + + time_array = np.array(self.time) + condition = (time_array >= start_time) & (time_array <= stop_time) + + subset_time = time_array[condition] + subset_data = dict([(c_time, self.data[c_time]) for c_time in subset_time]) + + #Create a list with the values needed by channel's __init__() + parameters_values = {'name': self.wavelength, + 'binwidth': self.binwidth, + 'data': subset_data[subset_time[0]],} + + c = Lidar_channel(parameters_values) + c.data = subset_data + c.update() + return c + + + def profile(self,dtime, signal_type = 'rc'): + t, idx = self._nearest_dt(dtime) + if signal_type == 'rc': + data = self.rc + else: + data = self.matrix + + prof = data[idx,:][0] + return prof, t + + def get_slice(self, starttime, endtime, signal_type = 'rc'): + if signal_type == 'rc': + data = self.rc + else: + data = self.matrix + tim = np.array(self.time) + starttime = self._nearest_dt(starttime)[0] + endtime = self._nearest_dt(endtime)[0] + condition = (tim >= starttime) & (tim <= endtime) + sl = data[condition, :] + t = tim[condition] + return sl,t + + def av_profile(self, tim, duration = datetime.timedelta(seconds = 0), signal_type = 'rc'): + starttime = tim - duration/2 + endtime = tim + duration/2 + d,t = self.get_slice(starttime, endtime, signal_type = signal_type) + prof = np.mean(d, axis = 0) + tmin = min(t) + tmax = max(t) + tav = tmin + (tmax-tmin)/2 + return prof,(tav, tmin,tmax) + + def plot(self, signal_type = 'rc', filename = None, zoom = [0,12000,0,-1], show_plot = True, cmap = plt.cm.jet): + #if filename is not None: + # matplotlib.use('Agg') + + fig = plt.figure() + ax1 = fig.add_subplot(111) + self.draw_plot(ax1, cmap = cmap, signal_type = signal_type, zoom = zoom) + ax1.set_title("%s signal - %s" % (signal_type.upper(), self.name)) + + if filename is not None: + pass + #plt.savefig(filename) + else: + if show_plot: + plt.show() + #plt.close() ??? + + def draw_plot(self,ax1, cmap = plt.cm.jet, signal_type = 'rc', zoom = [0,12000,0,-1]): + + if signal_type == 'rc': + if len(self.rc) == 0: + self.calculate_rc() + data = self.rc + else: + data = self.matrix + + hmax_idx = self.index_at_height(zoom[1]) + + ax1.set_ylabel('Altitude (km)') + ax1.set_xlabel('Time UTC') + #y axis in km, xaxis /2 to make 30s measurements in minutes. Only for 1064 + #dateFormatter = mpl.dates.DateFormatter('%H.%M') + #hourlocator = mpl.dates.HourLocator() + + #dayFormatter = mpl.dates.DateFormatter('\n\n%d/%m') + #daylocator = mpl.dates.DayLocator() + hourFormatter = mpl.dates.DateFormatter('%H.%M') + hourlocator = mpl.dates.AutoDateLocator(interval_multiples=True) + + + #ax1.axes.xaxis.set_major_formatter(dayFormatter) + #ax1.axes.xaxis.set_major_locator(daylocator) + ax1.axes.xaxis.set_major_formatter(hourFormatter) + ax1.axes.xaxis.set_major_locator(hourlocator) + + + ts1 = mpl.dates.date2num(self.start_time) + ts2 = mpl.dates.date2num(self.stop_time) + + + im1 = ax1.imshow(data.transpose()[zoom[0]:hmax_idx,zoom[2]:zoom[3]], + aspect = 'auto', + origin = 'lower', + cmap = cmap, + #vmin = 0, + vmin = data[:,10:400].max() * 0.1, + #vmax = 1.4*10**7, + vmax = data[:,10:400].max() * 0.9, + extent = [ts1,ts2,self.z[zoom[0]]/1000.0, self.z[hmax_idx]/1000.0], + ) + + cb1 = plt.colorbar(im1) + cb1.ax.set_ylabel('a.u.') + + def index_at_height(self, height): + idx = np.array(np.abs(self.z - height).argmin()) + if idx.size >1: + idx =idx[0] + return idx + +def netcdf_from_files(LidarClass, filename, files, channels, measurement_ID): + #Read the lidar files and select channels + temp_m = LidarClass(files) + m = temp_m.subset_by_channels(channels) + m.get_PT() + m.info['Measurement_ID'] = measurement_ID + m.save_as_netcdf(filename) +