diff -r 000000000000 -r 9d2b98ecf23d generic.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/generic.py	Tue May 15 16:43:26 2012 +0200
@@ -0,0 +1,543 @@
+# General imports
+import datetime
+from operator import itemgetter
+
+# Science imports
+import numpy as np
+import matplotlib as mpl
+from matplotlib import pyplot as plt
+import netCDF4 as netcdf
+
+# CNR-IMAA specific imports
+import milos
+
+
+netcdf_format = 'NETCDF3_CLASSIC' # choose one of 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC' and 'NETCDF4'
+
+class BaseLidarMeasurement():
+    """ This is the general measurement object.
+    It is meant to become a general measurement object 
+    independent of the input files.
+    
+    Each subclass should implement the following:
+    * the import_file method.
+    * set the "extra_netcdf_parameters" variable to a dictionary that includes the appropriate parameters.
+    
+    You can override the get_PT method to define a custom procedure to get ground temperature and pressure.
+    The one implemented by default is by using the MILOS meteorological station data. 
+    
+    """
+    
+    def __init__(self, filelist= None):
+        self.info = {}
+        self.dimensions = {}
+        self.variables =  {}
+        self.channels = {}
+        self.attributes = {}
+        self.files = []
+        self.dark_measurement = None
+        if filelist:
+            self.import_files(filelist)
+        
+    def import_files(self,filelist):
+        for f in filelist:
+            self.import_file(f)
+        self.update()
+
+    def import_file(self,filename):
+        raise NotImplementedError('Importing files should be defined in the instrument-specific subclass.')
+
+    def update(self):
+        '''
+        Update the the info, variables and dimensions of the lidar measurement based 
+        on the information found in the channels.
+        
+        Reading of the scan_angles parameter is not implemented.
+        '''
+        
+        # Initialize
+        start_time =[]
+        stop_time = []
+        points = []
+        all_time_scales = []
+        channel_name_list = []
+        
+        # Get the information from all the channels
+        for channel_name, channel in self.channels.items():
+            channel.update()
+            start_time.append(channel.start_time)
+            stop_time.append(channel.stop_time)
+            points.append(channel.points)
+            all_time_scales.append(channel.time)
+            channel_name_list.append(channel_name)
+            
+        # Find the unique time scales used in the channels
+        time_scales = set(all_time_scales)
+        
+        # Update the info dictionary
+        self.info['start_time'] = min(start_time)
+        self.info['stop_time'] = max(stop_time)
+        self.info['duration'] = self.info['stop_time'] - self.info['start_time']
+        
+        # Update the dimensions dictionary
+        self.dimensions['points'] = max(points)
+        self.dimensions['channels'] = len(self.channels)
+        # self.dimensions['scan angles'] = 1
+        self.dimensions['nb_of_time_scales'] = len(time_scales)
+        
+        # Update the variables dictionary
+        # Write time scales in seconds
+        raw_Data_Start_Time = []
+        raw_Data_Stop_Time = []
+        
+        for current_time_scale in list(time_scales): 
+            raw_start_time = np.array(current_time_scale) - min(start_time) # Time since start_time
+            raw_start_in_seconds = np.array([t.seconds for t in raw_start_time]) # Convert in seconds
+            raw_Data_Start_Time.append(raw_start_in_seconds) # And add to the list
+            # Check if this time scale has measurements every 30 or 60 seconds.
+            
+            duration = self._get_duration(raw_start_in_seconds)
+            
+            raw_stop_in_seconds = raw_start_in_seconds + duration
+            raw_Data_Stop_Time.append(raw_stop_in_seconds)
+            
+        self.variables['Raw_Data_Start_Time']= raw_Data_Start_Time
+        self.variables['Raw_Data_Stop_Time']= raw_Data_Stop_Time
+    
+        # Make a dictionary to match time scales and channels
+        channel_timescales = []
+        for (channel_name, current_time_scale) in zip(channel_name_list, all_time_scales):
+            # The following lines are PEARL specific. The reason they are here is not clear.
+            # if channel_name =='1064BLR':
+            #     channel_name = '1064'
+            for (ts,n) in zip(time_scales, range(len(time_scales))):
+                if current_time_scale == ts:
+                    channel_timescales.append([channel_name,n])
+        self.variables['id_timescale'] = dict(channel_timescales)
+    
+    def _get_duration(self, raw_start_in_seconds):
+        ''' Return the duration for a given time scale. In some files (ex. Licel) this
+        can be specified from the files themselves. In others this must be guessed.
+         
+        '''
+        # The old method, kept here for reference
+        #dt = np.mean(np.diff(raw_start_in_seconds))
+        #for d in duration_list:
+        #    if abs(dt - d) <15: #If the difference of measuremetns is 10s near the(30 or 60) seconds
+        #        duration = d
+        
+        duration = np.diff(raw_start_in_seconds)[0]
+
+        return duration
+    
+    def subset_by_channels(self, channel_subset):
+        ''' Get a measurement object containing only the channels with names
+        contained in the channel_sublet list '''
+        
+        m = self.__class__() # Create an object of the same type as this one.
+        m.channels = dict([(channel, self.channels[channel]) for channel 
+                            in channel_subset])
+        m.update()
+        return m
+        
+    def subset_by_time(self, start_time, stop_time):
+    
+        if start_time > stop_time:
+            raise ValueError('Stop time should be after start time')
+            
+        if (start_time < self.info['start_time']) or (stop_time > self.info['stop_time']):
+            raise ValueError('The time interval specified is not part of the measurement')
+
+        m = self.__class__() # Create an object of the same type as this one.
+        for (channel_name, channel)  in self.channels.items():
+            m.channels[channel_name] = channel.subset_by_time(start_time, stop_time)
+        m.update()
+        return m
+    
+    def r_plot(self):
+        #Make a basic plot of the data.
+        #Should include some dictionary with params to make plot stable.
+        pass 
+    
+    def r_pdf(self):
+        # Create a pdf report using a basic plot and meta-data.
+        pass
+    
+    def save(self):
+        #Save the current state of the object to continue the analysis later.
+        pass
+        
+    def get_PT(self):
+        ''' Sets the pressure and temperature at station level .
+        The results are stored in the info dictionary.        
+        '''
+    
+        self.info['Temperature'] = 10.0
+        self.info['Pressure'] = 930.0
+        
+
+    def save_as_netcdf(self, filename):
+        """Saves the measurement in the netcdf format as required by the SCC.
+        Input: filename
+        """
+        params = self.extra_netcdf_parameters
+        needed_parameters = ['Measurement_ID', 'Temperature', 'Pressure']
+        
+        for parameter in needed_parameters:
+            stored_value = self.info.get(parameter, None)
+            if stored_value is None:
+                raise ValueError('A value needs to be specified for %s' % parameter)
+        
+                
+        dimensions = {'points': 1,
+                'channels': 1,
+                'time': None,
+                'nb_of_time_scales': 1,
+                'scan_angles': 1,}  # Mandatory dimensions. Time bck not implemented
+
+        global_att = {'Measurement_ID': None,
+                    'RawData_Start_Date': None,
+                    'RawData_Start_Time_UT': None,
+                    'RawData_Stop_Time_UT': None,
+                    'RawBck_Start_Date': None,
+                    'RawBck_Start_Time_UT': None,
+                    'RawBck_Stop_Time_UT': None,
+                    'Sounding_File_Name': None,
+                    'LR_File_Name': None,
+                    'Overlap_File_Name': None,
+                    'Location': None,
+                    'System': None,
+                    'Latitude_degrees_north': None,
+                    'Longitude_degrees_east': None,
+                    'Altitude_meter_asl': None}
+
+        channel_variables = \
+                    {'channel_ID': (('channels', ), 'i'),
+                    'Background_Low': (('channels', ), 'd'),
+                    'Background_High': (('channels', ), 'd'),
+                    'LR_Input': (('channels', ), 'i'),
+                    'DAQ_Range': (('channels', ), 'd'), 
+                    'Depolarization_Factor': (('channels', ), 'd'), }
+                    
+        
+        channels = self.channels.keys()
+
+        input_values = dict(self.dimensions, **self.variables)
+        
+        # Add some mandatory global attributes
+        input_values['Measurement_ID'] = self.info['Measurement_ID']
+        input_values['RawData_Start_Date'] = '\'%s\'' % self.info['start_time'].strftime('%Y%m%d')
+        input_values['RawData_Start_Time_UT'] = '\'%s\'' % self.info['start_time'].strftime('%H%M%S')
+        input_values['RawData_Stop_Time_UT'] = '\'%s\'' % self.info['stop_time'].strftime('%H%M%S')
+        
+        # Add some optional global attributes
+        input_values['System'] = params.general_parameters['System']
+        input_values['Latitude_degrees_north'] = params.general_parameters['Latitude_degrees_north']
+        input_values['Longitude_degrees_east'] = params.general_parameters['Longitude_degrees_east']
+        input_values['Altitude_meter_asl'] = params.general_parameters['Altitude_meter_asl']
+        
+        # Open a netCDF4 file
+        f = netcdf.Dataset(filename,'w', format = netcdf_format) # the format is specified in the begining of the file
+        
+        # Create the dimensions in the file
+        for (d,v) in dimensions.iteritems():
+            v = input_values.pop(d, v)
+            f.createDimension(d,v)
+        
+        # Create global attributes
+        for (attrib,value) in global_att.iteritems():
+            val = input_values.pop(attrib,value)
+            if val: 
+                exec('f.%s = %s' % (attrib,val))
+        
+        """ Variables """
+        # Write the values of fixes channel parameters
+        for (var,t) in channel_variables.iteritems():
+            temp_v = f.createVariable(var,t[1],t[0])
+            for (channel, n) in zip(channels, range(len(channels))):
+                temp_v[n] = params.channel_parameters[channel][var]
+        
+        # Write the id_timescale values
+        temp_id_timescale = f.createVariable('id_timescale','i',('channels',))
+        for (channel, n) in zip(channels, range(len(channels))):
+            temp_id_timescale[n] = self.variables['id_timescale'][channel]
+        
+        # Laser pointing angle
+        temp_v = f.createVariable('Laser_Pointing_Angle','d',('scan_angles',))
+        temp_v[:] = params.general_parameters['Laser_Pointing_Angle']
+        
+        # Molecular calculation
+        temp_v = f.createVariable('Molecular_Calc','i')
+        temp_v[:] = params.general_parameters['Molecular_Calc']    
+
+        # Laser pointing angles of profiles
+        temp_v = f.createVariable('Laser_Pointing_Angle_of_Profiles','i',('time','nb_of_time_scales'))
+        for (time_scale,n) in zip(self.variables['Raw_Data_Start_Time'],
+                                  range(len(self.variables['Raw_Data_Start_Time']))):
+            temp_v[:len(time_scale), n] = 0  # The lidar has only one laser pointing angle
+        
+        # Raw data start/stop time
+        temp_raw_start = f.createVariable('Raw_Data_Start_Time','i',('time','nb_of_time_scales'))
+        temp_raw_stop = f.createVariable('Raw_Data_Stop_Time','i',('time','nb_of_time_scales'))
+        for (start_time, stop_time,n) in zip(self.variables['Raw_Data_Start_Time'],
+                                             self.variables['Raw_Data_Stop_Time'],
+                                             range(len(self.variables['Raw_Data_Start_Time']))):
+            temp_raw_start[:len(start_time),n] = start_time
+            temp_raw_stop[:len(stop_time),n] = stop_time
+
+        #Laser shots
+        temp_v =  f.createVariable('Laser_Shots','i',('time','channels'))
+        for (channel,n) in zip(channels, range(len(channels))):
+            time_length = len(self.variables['Raw_Data_Start_Time'][self.variables['id_timescale'][channel]])
+            temp_v[:time_length, n] = params.channel_parameters[channel]['Laser_Shots']
+        
+        #Raw lidar data
+        temp_v = f.createVariable('Raw_Lidar_Data','d',('time', 'channels','points'))
+        for (channel,n) in zip(channels, range(len(channels))):
+            c = self.channels[channel]
+            temp_v[:len(c.time),n, :c.points] = c.matrix
+        
+        self.add_dark_measurements_to_netcdf(f, channels)
+            
+        #Pressure at lidar station
+        temp_v = f.createVariable('Pressure_at_Lidar_Station','d')
+        temp_v[:] = self.info['Pressure']
+
+        #Temperature at lidar station
+        temp_v = f.createVariable('Temperature_at_Lidar_Station','d')
+        temp_v[:] = self.info['Temperature']   
+        
+        self.save_netcdf_extra(f)
+        f.close()
+   
+    def add_dark_measurements_to_netcdf(self, f, channels):
+
+        # Get dark measurements. If it is not given in self.dark_measurement
+        # try to get it using the get_dark_measurements method. If none is found
+        # return without adding something.
+        if self.dark_measurement is None:
+            self.dark_measurement = self.get_dark_measurements()
+        
+        if self.dark_measurement is None:
+            return
+        
+        dark_measurement = self.dark_measurement
+        
+        # Calculate the length of the time_bck dimensions
+        number_of_profiles = [len(c.time) for c in dark_measurement.channels.values()]
+        max_number_of_profiles = np.max(number_of_profiles)
+        
+        # Create the dimension
+        f.createDimension('time_bck', max_number_of_profiles)
+         
+        # Save the dark measurement data
+        temp_v = f.createVariable('Background_Profile','d',('time_bck', 'channels', 'points'))
+        for (channel,n) in zip(channels, range(len(channels))):
+            c = dark_measurement.channels[channel]
+            temp_v[:len(c.time),n, :c.points] = c.matrix
+        
+        # Dark profile start/stop time
+        temp_raw_start = f.createVariable('Raw_Bck_Start_Time','i',('time','nb_of_time_scales'))
+        temp_raw_stop = f.createVariable('Raw_Bck_Stop_Time','i',('time','nb_of_time_scales'))
+        for (start_time, stop_time,n) in zip(dark_measurement.variables['Raw_Data_Start_Time'],
+                                             dark_measurement.variables['Raw_Data_Stop_Time'],
+                                             range(len(dark_measurement.variables['Raw_Data_Start_Time']))):
+            temp_raw_start[:len(start_time),n] = start_time
+            temp_raw_stop[:len(stop_time),n] = stop_time
+        
+        # Dark measurement start/stop time
+        f.RawBck_Start_Date = dark_measurement.info['start_time'].strftime('%Y%m%d')
+        f.RawBck_Start_Time_UT = dark_measurement.info['start_time'].strftime('%H%M%S')
+        f.RawBck_Stop_Time_UT = dark_measurement.info['stop_time'].strftime('%H%M%S')
+
+
+
+    def save_netcdf_extra(self, f):
+        pass
+        
+    def _gettime(self, date_str, time_str):        
+        t = datetime.datetime.strptime(date_str+time_str,'%d/%m/%Y%H.%M.%S')
+        return t
+    
+    def plot(self):
+        for channel in self.channels:
+            self.channels[channel].plot(show_plot = False)
+        plt.show()
+    
+    def get_dark_measurements(self):
+        return None
+
+
+class Lidar_channel:
+
+    def __init__(self,channel_parameters):
+        c = 299792458 #Speed of light
+        self.wavelength = channel_parameters['name']
+        self.name = str(self.wavelength)
+        self.binwidth = float(channel_parameters['binwidth']) # in microseconds
+        self.data = {}
+        self.resolution = self.binwidth * c / 2
+        self.z = np.arange(len(channel_parameters['data'])) * self.resolution + self.resolution/2.0 # Change: add half bin in the z 
+        self.points = len(channel_parameters['data'])
+        self.rc = []
+        self.duration  = 60
+        
+    def calculate_rc(self):
+        background = np.mean(self.matrix[:,4000:], axis = 1) #Calculate the background from 30000m and above
+        self.rc = (self.matrix.transpose()- background).transpose() * (self.z **2)
+        
+    
+    def update(self):
+        self.start_time = min(self.data.keys())
+        self.stop_time = max(self.data.keys()) + datetime.timedelta(seconds = self.duration)
+        self.time = tuple(sorted(self.data.keys()))
+        sorted_data = sorted(self.data.iteritems(), key=itemgetter(0))
+        self.matrix =  np.array(map(itemgetter(1),sorted_data))
+    
+    def _nearest_dt(self,dtime):
+        margin = datetime.timedelta(seconds = 300)        
+        if ((dtime + margin) < self.start_time)| ((dtime - margin)   > self.stop_time):
+            print "Requested date not covered in this file"
+            raise
+        dt = abs(self.time - np.array(dtime))
+        dtmin = min(dt)
+
+        if dtmin > datetime.timedelta(seconds = 60):
+            print "Nearest profile more than 60 seconds away. dt = %s." % dtmin
+        ind_t = np.where(dt == dtmin)
+        ind_a= ind_t[0]
+        if len(ind_a) > 1:
+            ind_a = ind_a[0]
+        chosen_time = self.time[ind_a]
+        return chosen_time, ind_a
+    
+    def subset_by_time(self, start_time, stop_time):
+        
+        time_array = np.array(self.time)
+        condition = (time_array >= start_time) & (time_array <= stop_time)
+        
+        subset_time = time_array[condition]
+        subset_data = dict([(c_time, self.data[c_time]) for c_time in subset_time])
+        
+        #Create a list with the values needed by channel's __init__()
+        parameters_values = {'name': self.wavelength, 
+                             'binwidth': self.binwidth, 
+                             'data': subset_data[subset_time[0]],}
+
+        c = Lidar_channel(parameters_values)
+        c.data = subset_data
+        c.update()
+        return c
+        
+    
+    def profile(self,dtime, signal_type = 'rc'):
+        t, idx = self._nearest_dt(dtime)
+        if signal_type == 'rc':
+            data = self.rc
+        else:
+            data = self.matrix
+        
+        prof = data[idx,:][0]
+        return prof, t
+
+    def get_slice(self, starttime, endtime, signal_type = 'rc'):
+        if signal_type == 'rc':
+            data = self.rc
+        else:
+            data = self.matrix
+        tim = np.array(self.time)
+        starttime = self._nearest_dt(starttime)[0]
+        endtime = self._nearest_dt(endtime)[0]
+        condition = (tim >= starttime) & (tim <= endtime)
+        sl  = data[condition, :]
+        t = tim[condition]
+        return sl,t
+    
+    def av_profile(self, tim, duration = datetime.timedelta(seconds = 0), signal_type = 'rc'):
+        starttime = tim - duration/2
+        endtime = tim + duration/2
+        d,t = self.get_slice(starttime, endtime, signal_type = signal_type)
+        prof = np.mean(d, axis = 0)
+        tmin = min(t)
+        tmax = max(t)
+        tav = tmin + (tmax-tmin)/2
+        return prof,(tav, tmin,tmax)
+       
+    def plot(self, signal_type = 'rc', filename = None, zoom = [0,12000,0,-1], show_plot = True, cmap = plt.cm.jet):
+        #if filename is not None:
+        #    matplotlib.use('Agg')
+        
+        fig = plt.figure()
+        ax1 = fig.add_subplot(111)
+        self.draw_plot(ax1, cmap = cmap, signal_type = signal_type, zoom = zoom)
+        ax1.set_title("%s signal - %s" % (signal_type.upper(), self.name)) 
+        
+        if filename is not None:
+            pass
+            #plt.savefig(filename)
+        else:
+            if show_plot:
+                plt.show()
+        #plt.close() ???
+    
+    def draw_plot(self,ax1, cmap = plt.cm.jet, signal_type = 'rc', zoom = [0,12000,0,-1]):
+        
+        if signal_type == 'rc':
+            if len(self.rc) == 0:
+                self.calculate_rc()
+            data = self.rc
+        else:
+            data = self.matrix
+        
+        hmax_idx = self.index_at_height(zoom[1])
+        
+        ax1.set_ylabel('Altitude (km)')
+        ax1.set_xlabel('Time UTC')
+        #y axis in km, xaxis /2 to make 30s measurements in minutes. Only for 1064
+        #dateFormatter = mpl.dates.DateFormatter('%H.%M')
+        #hourlocator = mpl.dates.HourLocator()
+        
+        #dayFormatter = mpl.dates.DateFormatter('\n\n%d/%m')
+        #daylocator = mpl.dates.DayLocator()       
+        hourFormatter = mpl.dates.DateFormatter('%H.%M')
+        hourlocator = mpl.dates.AutoDateLocator(interval_multiples=True)
+
+        
+        #ax1.axes.xaxis.set_major_formatter(dayFormatter)
+        #ax1.axes.xaxis.set_major_locator(daylocator)
+        ax1.axes.xaxis.set_major_formatter(hourFormatter)
+        ax1.axes.xaxis.set_major_locator(hourlocator)
+
+        
+        ts1 = mpl.dates.date2num(self.start_time)
+        ts2 = mpl.dates.date2num(self.stop_time)
+        
+        
+        im1 = ax1.imshow(data.transpose()[zoom[0]:hmax_idx,zoom[2]:zoom[3]],
+            aspect = 'auto',
+            origin = 'lower',
+            cmap = cmap,
+            #vmin = 0,
+            vmin = data[:,10:400].max() * 0.1,
+            #vmax = 1.4*10**7,
+            vmax = data[:,10:400].max() * 0.9,
+            extent = [ts1,ts2,self.z[zoom[0]]/1000.0, self.z[hmax_idx]/1000.0],
+            )
+        
+        cb1 = plt.colorbar(im1)
+        cb1.ax.set_ylabel('a.u.')
+
+    def index_at_height(self, height):
+        idx = np.array(np.abs(self.z - height).argmin())
+        if idx.size >1:
+            idx =idx[0]
+        return idx
+    
+def netcdf_from_files(LidarClass, filename, files, channels, measurement_ID):
+    #Read the lidar files and select channels
+    temp_m = LidarClass(files)
+    m = temp_m.subset_by_channels(channels)
+    m.get_PT()
+    m.info['Measurement_ID'] = measurement_ID
+    m.save_as_netcdf(filename)
+