atmospheric_lidar/generic.py

changeset 116
d9703af687aa
parent 103
e954eecc217f
child 117
44a43b0e452f
--- a/atmospheric_lidar/generic.py	Thu Jan 04 14:47:36 2018 +0200
+++ b/atmospheric_lidar/generic.py	Sat Jan 06 10:08:46 2018 +0200
@@ -1,6 +1,7 @@
 import datetime
 import logging
 from operator import itemgetter
+import itertools
 
 import matplotlib as mpl
 import netCDF4 as netcdf
@@ -87,7 +88,7 @@
         stop_time = []
         points = []
         all_time_scales = []
-        channel_name_list = []
+        channel_names = []
 
         # Get the information from all the channels
         for channel_name, channel in self.channels.items():
@@ -96,7 +97,7 @@
             stop_time.append(channel.stop_time)
             points.append(channel.points)
             all_time_scales.append(channel.time)
-            channel_name_list.append(channel_name)
+            channel_names.append(channel_name)
 
         # Find the unique time scales used in the channels
         time_scales = set(all_time_scales)
@@ -112,6 +113,17 @@
         # self.dimensions['scan angles'] = 1
         self.dimensions['nb_of_time_scales'] = len(time_scales)
 
+        # Make a dictionaries to match time scales and channels
+        channel_timescales = {}
+        timescale_channels = dict((ts, []) for ts in time_scales)
+        for (channel_name, current_time_scale) in zip(channel_names, all_time_scales):
+            for (ts, n) in zip(time_scales, range(len(time_scales))):
+                if current_time_scale == ts:
+                    channel_timescales[channel_name] = n
+                    timescale_channels[ts].append(channel_name)
+
+        self.variables['id_timescale'] = channel_timescales
+
         # Update the variables dictionary
         # Write time scales in seconds
         raw_data_start_time = []
@@ -122,44 +134,16 @@
             raw_start_in_seconds = np.array([t.seconds for t in raw_start_time])  # Convert in seconds
             raw_data_start_time.append(raw_start_in_seconds)  # And add to the list
 
-            duration = self._get_duration(raw_start_in_seconds)
+            channel_name = timescale_channels[current_time_scale][0]
+            channel = self.channels[channel_name]
 
             # TODO: Define stop time for each measurement based on real data
-            raw_stop_in_seconds = raw_start_in_seconds + duration
+            raw_stop_in_seconds = raw_start_in_seconds + channel.get_duration()
             raw_data_stop_time.append(raw_stop_in_seconds)
 
         self.variables['Raw_Data_Start_Time'] = raw_data_start_time
         self.variables['Raw_Data_Stop_Time'] = raw_data_stop_time
 
-        # Make a dictionary to match time scales and channels
-        channel_timescales = []
-        for (channel_name, current_time_scale) in zip(channel_name_list, all_time_scales):
-            for (ts, n) in zip(time_scales, range(len(time_scales))):
-                if current_time_scale == ts:
-                    channel_timescales.append([channel_name, n])
-        self.variables['id_timescale'] = dict(channel_timescales)
-
-    def _get_duration(self, raw_start_in_seconds):
-        """ 
-        Return the duration for a given time scale. 
-       
-        In some files (e.g. Licel) this can be specified from the files themselves. 
-        In others this must be guessed.
-        
-        Parameters
-        ----------
-        raw_start_in_seconds : array
-           An array of floats, representing the start time of each measurement profile.
-        
-        Returns
-        -------
-        duration : float
-           Guess of the duration of each bin in the timescale
-        """
-        duration = np.diff(raw_start_in_seconds)[0]
-
-        return duration
-
     def subset_by_channels(self, channel_subset):
         """ 
         Create a measurement object containing only a subset of  channels. 
@@ -205,8 +189,8 @@
         common_channels = list(set(scc_channels).intersection(self.channels.keys()))
 
         if not common_channels:
-            logging.debug("Config channels: %s." % ','.join(scc_channels))
-            logging.debug("Licel channels: %s." % ','.join(self.channels.keys()))
+            logger.debug("Config channels: %s." % ','.join(scc_channels))
+            logger.debug("Licel channels: %s." % ','.join(self.channels.keys()))
             raise ValueError('No common channels between licel and configuration files.')
 
         return self.subset_by_channels(common_channels)
@@ -362,20 +346,20 @@
         filename : str
            Output file name. If None, <measurement_id>.nc will be used. 
         """
-        params = self.extra_netcdf_parameters
+        parameters = self.extra_netcdf_parameters
 
         # Guess measurement ID if none is provided
         if 'Measurement_ID' not in self.info:
             self.set_measurement_id()
 
         # Check if temperature and pressure are defined
-        for parameter in ['Temperature', 'Pressure']:
-            stored_value = self.info.get(parameter, None)
+        for attribute in ['Temperature', 'Pressure']:
+            stored_value = self.info.get(attribute, None)
             if stored_value is None:
                 try:
                     self.set_PT()
                 except:
-                    raise ValueError('A value needs to be specified for %s' % parameter)
+                    raise ValueError('No value specified for %s' % attribute)
 
         if not filename:
             filename = "%s.nc" % self.info['Measurement_ID']
@@ -388,25 +372,23 @@
                       'nb_of_time_scales': 1,
                       'scan_angles': 1, }  # Mandatory dimensions. Time bck not implemented
 
-        global_att = {'Measurement_ID': None,
-                      'RawData_Start_Date': None,
-                      'RawData_Start_Time_UT': None,
-                      'RawData_Stop_Time_UT': None,
-                      'RawBck_Start_Date': None,
-                      'RawBck_Start_Time_UT': None,
-                      'RawBck_Stop_Time_UT': None,
-                      'Sounding_File_Name': None,
-                      'LR_File_Name': None,
-                      'Overlap_File_Name': None,
-                      'Location': None,
-                      'System': None,
-                      'Latitude_degrees_north': None,
-                      'Longitude_degrees_east': None,
-                      'Altitude_meter_asl': None}
+        global_attributes = {'Measurement_ID': None,
+                             'RawData_Start_Date': None,
+                             'RawData_Start_Time_UT': None,
+                             'RawData_Stop_Time_UT': None,
+                             'RawBck_Start_Date': None,
+                             'RawBck_Start_Time_UT': None,
+                             'RawBck_Stop_Time_UT': None,
+                             'Sounding_File_Name': None,
+                             'LR_File_Name': None,
+                             'Overlap_File_Name': None,
+                             'Location': None,
+                             'System': None,
+                             'Latitude_degrees_north': None,
+                             'Longitude_degrees_east': None,
+                             'Altitude_meter_asl': None}
 
-        channel_variables = self._get_scc_mandatory_channel_variables()
-
-        channels = self.channels.keys()
+        channel_names = self.channels.keys()
 
         input_values = dict(self.dimensions, **self.variables)
 
@@ -416,16 +398,17 @@
         input_values['RawData_Start_Time_UT'] = self.info['start_time'].strftime('%H%M%S')
         input_values['RawData_Stop_Time_UT'] = self.info['stop_time'].strftime('%H%M%S')
 
-        # Add some optional global attributes
-        input_values['System'] = params.general_parameters['System']
-        input_values['Latitude_degrees_north'] = params.general_parameters['Latitude_degrees_north']
-        input_values['Longitude_degrees_east'] = params.general_parameters['Longitude_degrees_east']
-        input_values['Altitude_meter_asl'] = params.general_parameters['Altitude_meter_asl']
+        # Add any global attributes provided by the subclass
+        for attribute in self._get_custom_global_attributes():
+            input_values[attribute["name"]] = attribute["value"]
 
-        # Override general parameters with those provided by any subclass
-        # in a custom fashion
-        for param in self.get_custom_general_parameters():
-            input_values[param["name"]] = param["value"]
+        # Override global attributes, if provided in the settings file.
+        for attribute_name in ['System', 'Latitude_degrees_north', 'Longitude_degrees_east', 'Altitude_meter_asl']:
+            if attribute_name in parameters.general_parameters.keys():
+                if attribute_name in input_values:
+                    logger.info("Overriding {0} attribute, using the value provided in the parameter file.".format(
+                        attribute_name))
+                input_values[attribute_name] = parameters.general_parameters[attribute_name]
 
         # Open a netCDF file. The format is specified in the beginning of this module.
         with netcdf.Dataset(filename, 'w', format=NETCDF_FORMAT) as f:
@@ -436,69 +419,70 @@
                 f.createDimension(d, v)
 
             # Create global attributes
-            for (attrib, value) in global_att.iteritems():
+            for (attrib, value) in global_attributes.iteritems():
                 val = input_values.pop(attrib, value)
                 if val:
                     setattr(f, attrib, val)
 
             # Variables
-
             # Write either channel_id or string_channel_id in the file
-            first_channel_keys = params.channel_parameters.items()[0][1].keys()
+            first_channel_keys = parameters.channel_parameters.items()[0][1].keys()
             if "channel_ID" in first_channel_keys:
                 channel_var = 'channel_ID'
                 variable_type = 'i'
-            elif "channel string ID" in first_channel_keys:
-                channel_var = 'channel string ID'
+            elif "channel_string_ID" in first_channel_keys:
+                channel_var = 'channel_string_ID'
                 variable_type = str
             else:
                 raise ValueError('Channel parameters should define either "chanel_id" or "channel_string_ID".')
 
             temp_v = f.createVariable(channel_var, variable_type, ('channels',))
-            for n, channel in enumerate(channels):
-                temp_v[n] = params.channel_parameters[channel][channel_var]
+            for n, channel in enumerate(channel_names):
+                temp_v[n] = parameters.channel_parameters[channel][channel_var]
 
-            # Write the custom subclass parameters:
-            for param in self.get_custom_channel_parameters():
-                temp_v = f.createVariable(param["name"], param["type"], param["dimensions"])
-
-                for (value, n) in zip(param["values"], range(len(param["values"]))):
-                    temp_v[n] = value
+            # Write the custom subclass variables:
+            for variable in self._get_custom_variables(channel_names):
+                temp_v = f.createVariable(variable["name"], variable["type"], variable["dimensions"])
+                temp_v[:] = variable["values"]
 
             # Write the values of fixed channel parameters:
-            fill_value = -9999
-            for param in self._get_provided_extra_parameters():
-                if param in channel_variables.keys():
-                    try:
-                        temp_v = f.createVariable(param, channel_variables[param][1], channel_variables[param][0])
-                    except RuntimeError:
-                        logging.warning("NetCDF variable \"%s\" ignored because it was read from the input files!" % param)
-                        continue
-                else:
+            channel_variable_specs = self._get_scc_channel_variables()
+
+            for variable_name in self._get_provided_variable_names():
+
+                # Check if the variable already defined (e.g. from values in Licel files).
+                if variable_name in f.variables.keys():
+                    logger.warning(
+                        "Provided values of \"{0}\" were ignored because they were read from other source.".format(
+                            variable_name))
+                    continue
+
+                if variable_name not in channel_variable_specs.keys():
+                    logger.warning("Provided variable {0} is not parts of the specs: {1}".format(variable_name, channel_variable_specs.keys()))
+                    continue
+
+                temp_v = f.createVariable(variable_name,
+                                          channel_variable_specs[variable_name][1],
+                                          channel_variable_specs[variable_name][0])
+
+                for n, channel_name in enumerate(channel_names):
                     try:
-                        temp_v = f.createVariable(param, 'd', ('channels',), fill_value=fill_value)
-                    except RuntimeError:
-                        logging.warning("NetCDF variable \"%s\" ignored because it was read from the input files!" % param)
-                        continue
-
-                for (channel, n) in zip(channels, range(len(channels))):
-                    try:
-                        temp_v[n] = params.channel_parameters[channel][param]
+                        temp_v[n] = parameters.channel_parameters[channel_name][variable_name]
                     except KeyError:  # The parameter was not provided for this channel so we mask the value.
-                        temp_v[n] = fill_value
+                        pass  # Default value should be a missing value  # TODO: Check this.
 
             # Write the id_timescale values
             temp_id_timescale = f.createVariable('id_timescale', 'i', ('channels',))
-            for (channel, n) in zip(channels, range(len(channels))):
+            for (channel, n) in zip(channel_names, range(len(channel_names))):
                 temp_id_timescale[n] = self.variables['id_timescale'][channel]
 
             # Laser pointing angle
             temp_v = f.createVariable('Laser_Pointing_Angle', 'd', ('scan_angles',))
-            temp_v[:] = params.general_parameters['Laser_Pointing_Angle']
+            temp_v[:] = parameters.general_parameters['Laser_Pointing_Angle']
 
             # Molecular calculation
             temp_v = f.createVariable('Molecular_Calc', 'i')
-            temp_v[:] = params.general_parameters['Molecular_Calc']
+            temp_v[:] = parameters.general_parameters['Molecular_Calc']
 
             # Laser pointing angles of profiles
             temp_v = f.createVariable('Laser_Pointing_Angle_of_Profiles', 'i', ('time', 'nb_of_time_scales'))
@@ -516,22 +500,23 @@
                 temp_raw_stop[:len(stop_time), n] = stop_time
 
             # Laser shots
-            try:
+            if "Laser_Shots" in f.variables.keys():
+                logger.warning("Provided values of \"Laser_Shots\" were ignored because they were read from other source.")
+            else:
                 temp_v = f.createVariable('Laser_Shots', 'i', ('time', 'channels'))
-                for (channel, n) in zip(channels, range(len(channels))):
+                for (channel, n) in zip(channel_names, range(len(channel_names))):
                     time_length = len(self.variables['Raw_Data_Start_Time'][self.variables['id_timescale'][channel]])
-                    # Array slicing stoped working as usual ex. temp_v[:10] = 100 does not work. ??? np.ones was added.
-                    temp_v[:time_length, n] = np.ones(time_length) * params.channel_parameters[channel]['Laser_Shots']
-            except RuntimeError:
-                logging.warning("NetCDF variable \"%s\" ignored because it was read from the input files!" % "LaserShots")
+                    # Array slicing stopped working as usual ex. temp_v[:10] = 100 does not work. ??? np.ones was added.
+                    temp_v[:time_length, n] = np.ones(time_length) * parameters.channel_parameters[channel][
+                        'Laser_Shots']
 
             # Raw lidar data
             temp_v = f.createVariable('Raw_Lidar_Data', 'd', ('time', 'channels', 'points'))
-            for (channel, n) in zip(channels, range(len(channels))):
+            for (channel, n) in zip(channel_names, range(len(channel_names))):
                 c = self.channels[channel]
                 temp_v[:len(c.time), n, :c.points] = c.matrix
 
-            self.add_dark_measurements_to_netcdf(f, channels)
+            self.add_dark_measurements_to_netcdf(f, channel_names)
 
             # Pressure at lidar station
             temp_v = f.createVariable('Pressure_at_Lidar_Station', 'd')
@@ -543,42 +528,44 @@
 
             self.save_netcdf_extra(f)
 
-    def _get_scc_mandatory_channel_variables(self):
+    def _get_scc_channel_variables(self):
         channel_variables = \
             {'Background_Low': (('channels',), 'd'),
              'Background_High': (('channels',), 'd'),
+             'Laser_Repetition_Rate': (('channels',), 'i'),
+             'Scattering_Mechanism': (('channels',), 'i'),
+             'Signal_Type': (('channels',), 'i'),
+             'Emitted_Wavelength': (('channels',), 'd'),
+             'Detected_Wavelength': (('channels',), 'd'),
+             'Raw_Data_Range_Resolution': (('channels',), 'd'),
+             'Background_Mode': (('channels',), 'i'),
+             'Dead_Time_Corr_Type': (('channels',), 'i'),
+             'Dead_Time': (('channels',), 'd'),
+             'Acquisition_Mode': (('channels',), 'i'),
+             'Trigger_Delay': (('channels',), 'd'),
+             'Pol_Calib_Range_Min': (('channels',), 'i'),
+             'Pol_Calib_Range_Max': (('channels',), 'i'),
              'LR_Input': (('channels',), 'i'),
              'DAQ_Range': (('channels',), 'd'),
              }
         return channel_variables
 
-    def _get_provided_extra_parameters(self):
-        # When looking for non-mandatory channel parameters, ignore the following parameter names:
-        ignore = ['channel_ID', 'channel string ID', 'Depolarization_Factor', 'Laser_Shots']
+    def _get_provided_variable_names(self):
+        """ Return a list of """
+        # When looking for channel parameters, ignore the following parameter names:
+        ignore = {'channel_ID', 'channel_string_ID', 'Depolarization_Factor', 'Laser_Shots'}  # Set
 
         channels = self.channels.keys()
-        params = self.extra_netcdf_parameters.channel_parameters
-        mandatory = self._get_scc_mandatory_channel_variables()
+        channel_parameters = self.extra_netcdf_parameters.channel_parameters
 
-        # Get all the provided extra parameters (both mandatory and optional):
-        provided_extra_parameters = []
-        for (channel, n) in zip(channels, range(len(channels))):
-            # Check all the mandatory parameters are provided for each of the channels:
-            for var in mandatory.keys():
-                if var not in params[channel].keys():
-                    raise ValueError("Mandatory parameter '{0}' not provided for channel {1}!".format(
-                        var,
-                        channel
-                    ))
+        # Get all the provided parameters (both mandatory and optional):
+        all_provided_variables = [channel_parameters[channel].keys() for channel in channels]
+        provided_variables = set(itertools.chain.from_iterable(all_provided_variables))
 
-            provided_extra_parameters.extend(params[channel].keys())
+        # Discard certain parameter names:
+        provided_variables -= ignore
 
-        provided_extra_parameters = set(provided_extra_parameters)
-        # Discard certain parameter names:
-        for param in ignore:
-            provided_extra_parameters.discard(param)
-
-        return provided_extra_parameters
+        return provided_variables
 
     def add_dark_measurements_to_netcdf(self, f, channels):
         """
@@ -644,19 +631,18 @@
     def get_dark_measurements(self):
         return None
 
-    def get_custom_general_parameters(self):
+    def _get_custom_global_attributes(self):
         """
-        Abstract method to provide custom NetCDF parameters  that should be included 
+        Abstract method to provide NetCDF global attributes that should be included
         in the final NetCDF file.
         
         If required, this method should be implemented by subclasses of BaseLidarMeasurement.
         """
         return []
 
-    def get_custom_channel_parameters(self):
+    def _get_custom_variables(self, channel_names=None):
         """
-        Abstract method to provide custom NetCDF parameters for the channels in this 
-        measurement that should be included in the final NetCDF file. 
+        Abstract method to provide custom NetCDF variables that should be included in the final NetCDF file.
         
         If required, this method should be implemented by subclasses of BaseLidarMeasurement.
         """
@@ -693,7 +679,29 @@
             len(channel_parameters['data'])) * self.resolution + self.resolution / 2.0  # Change: add half bin in the z
         self.points = len(channel_parameters['data'])
         self.rc = []
-        self.duration = 60
+
+    def get_duration(self):
+        """ Get an array with the duration of each profile in seconds.
+
+        If the duration property already exists, returns this.
+        If not, it tries to estimate it based on the time difference of the profiles.
+
+        Returns
+        -------
+        duration : ndarray
+           1D array containing profile durations.
+        """
+
+        current_value = getattr(self, 'duration', None)
+
+        if current_value:
+            return np.array(current_value)
+
+        time_diff = np.diff(self.time)
+        durations = [dt.seconds for dt in time_diff]
+        # Assume the last two profiles have the same duration
+        duration = np.array(durations)
+        return duration
 
     def calculate_rc(self, idx_min=4000, idx_max=5000):
         """ Calculate range corrected signal.
@@ -717,7 +725,7 @@
         Update the time parameters and data according to the raw input data. 
         """
         self.start_time = min(self.data.keys())
-        self.stop_time = max(self.data.keys()) + datetime.timedelta(seconds=self.duration)
+        self.stop_time = max(self.data.keys()) + datetime.timedelta(seconds=self.duration[-1])
         self.time = tuple(sorted(self.data.keys()))
         sorted_data = sorted(self.data.iteritems(), key=itemgetter(0))
         self.matrix = np.array(map(itemgetter(1), sorted_data))
@@ -738,10 +746,12 @@
         profile_idx : int
            The index of the selected profile.
         """
-        margin = datetime.timedelta(seconds=self.duration * 5)
+        max_duration = np.max(self.duration)
+
+        margin = datetime.timedelta(seconds=max_duration * 5)
 
         if ((input_time + margin) < self.start_time) | ((input_time - margin) > self.stop_time):
-            logging.error("Requested date not covered in this file")
+            logger.error("Requested date not covered in this file")
             raise ValueError("Requested date not covered in this file")
         dt = np.abs(np.array(self.time) - input_time)
 
@@ -749,8 +759,8 @@
         profile_datetime = self.time[profile_idx]
 
         dt_min = dt[profile_idx]
-        if dt_min > datetime.timedelta(seconds=self.duration):
-            logging.warning("Nearest profile more than 60 seconds away. dt = %s." % dt_min)
+        if dt_min > datetime.timedelta(seconds=max_duration):
+            logger.warning("Nearest profile more than %s seconds away. dt = %s." % (max_duration, dt_min))
 
         return profile_datetime, profile_idx
 
@@ -1078,7 +1088,7 @@
             ax1.axes.xaxis.set_major_locator(hourlocator)
 
         # Get the values of the time axis
-        dt = datetime.timedelta(seconds=self.duration)
+        dt = datetime.timedelta(seconds=self.duration[-1])
 
         time_cut = self.time[zoom[2]:zoom[3]]
         time_last = time_cut[-1] + dt  # The last element needed for pcolormesh

mercurial