Added save_as_csv method to Licel files.

Wed, 25 Mar 2020 11:51:35 +0200

author
ioannis <ioannis@ioannis-VirtualBox>
date
Wed, 25 Mar 2020 11:51:35 +0200
changeset 194
809190df0dc8
parent 193
bea35be85f99
child 195
3e652f8063d4

Added save_as_csv method to Licel files.

Channels and Photodiodes are not ordered dictionaries.

atmospheric_lidar/__init__.py file | annotate | diff | comparison | revisions
atmospheric_lidar/generic.py file | annotate | diff | comparison | revisions
atmospheric_lidar/licel.py file | annotate | diff | comparison | revisions
atmospheric_lidar/raymetrics.py file | annotate | diff | comparison | revisions
changelog.rst file | annotate | diff | comparison | revisions
--- a/atmospheric_lidar/__init__.py	Sun Mar 15 16:45:04 2020 +0200
+++ b/atmospheric_lidar/__init__.py	Wed Mar 25 11:51:35 2020 +0200
@@ -1,1 +1,1 @@
-__version__ = '0.4.4'
\ No newline at end of file
+__version__ = '0.5.0'
\ No newline at end of file
--- a/atmospheric_lidar/generic.py	Sun Mar 15 16:45:04 2020 +0200
+++ b/atmospheric_lidar/generic.py	Wed Mar 25 11:51:35 2020 +0200
@@ -2,6 +2,7 @@
 import logging
 from operator import itemgetter
 import itertools
+import collections
 
 import matplotlib as mpl
 import netCDF4 as netcdf
@@ -40,7 +41,7 @@
         self.info = {}
         self.dimensions = {}
         self.variables = {}
-        self.channels = {}
+        self.channels = collections.OrderedDict()
         self.attributes = {}
         self.files = []
         self.dark_measurement = None
--- a/atmospheric_lidar/licel.py	Sun Mar 15 16:45:04 2020 +0200
+++ b/atmospheric_lidar/licel.py	Wed Mar 25 11:51:35 2020 +0200
@@ -2,6 +2,7 @@
 import logging
 import copy
 import os
+import collections
 
 import numpy as np
 import pytz
@@ -132,7 +133,7 @@
 
         In addition, some ancillary variables are also calculated (z, dz, number_of_bins).
         """
-        
+
         norm = self.raw_data / float(self.number_of_shots)
         dz = self.bin_width
 
@@ -141,13 +142,15 @@
             ADCrange = self.discriminator  # Discriminator value already in mV
 
             if self.is_photodiode and (self.adcbits == 0):
-                logger.info("Assuming adcbits equal 1. This is a bug in current licel format when storing photodiode data.")
+                logger.info(
+                    "Assuming adcbits equal 1. This is a bug in current licel format when storing photodiode data.")
                 channel_data = norm * ADCrange / (2 ** self.adcbits)
             else:
-                channel_data = norm * ADCrange / ((2 ** self.adcbits) - 1)  # Licel LabView code has a bug (does not account -1).
+                channel_data = norm * ADCrange / (
+                            (2 ** self.adcbits) - 1)  # Licel LabView code has a bug (does not account -1).
 
         else:
-             channel_data = norm * self.number_of_shots
+            channel_data = norm * self.number_of_shots
 
         # Calculate Z
         self.z = np.array([dz * bin_number + dz / 2.0 for bin_number in range(self.data_points)])
@@ -158,7 +161,6 @@
     def is_analog(self):
         return self.analog_photon == '0'
 
-
     @property
     def laser_shots(self):
         """ Alias for number_of_shots """
@@ -210,6 +212,8 @@
         self.stop_time = None
         self.licel_timezone = licel_timezone
 
+        self.header_lines = []  # Store raw header lines, to be used in save_as_txt
+
         if import_now:
             self.import_file()
         else:
@@ -218,8 +222,8 @@
     def import_file(self):
         """ Read the header info and data of the Licel file.
         """
-        channels = {}
-        photodiodes = {}
+        channels = collections.OrderedDict()
+        photodiodes = collections.OrderedDict()
 
         with open(self.file_path, 'rb') as f:
 
@@ -276,7 +280,9 @@
         channel_info = []
 
         # Read first line
-        raw_info['Filename'] = f.readline().decode().strip()
+        first_line = f.readline().decode().strip()
+        raw_info['Filename'] = first_line
+        self.header_lines.append(first_line)
 
         raw_info.update(self._read_second_header_line(f))
 
@@ -303,7 +309,9 @@
 
         # Read the rest of the header.
         for c1 in range(int(raw_info['number_of_datasets'])):
-            channel_info.append(self.match_lines(f.readline().decode(), self.licel_file_channel_format))
+            channel_line = f.readline().decode()
+            channel_info.append(self.match_lines(channel_line, self.licel_file_channel_format))
+            self.header_lines.append(channel_line.strip())
 
         self.raw_info = raw_info
         self.channel_info = channel_info
@@ -349,6 +357,7 @@
         raw_info = {}
 
         second_line = f.readline().decode()
+        self.header_lines.append(second_line.strip())
         # Many Licel files don't follow the licel standard. Specifically, the
         # measurement site is not always 8 characters, and can include white
         # spaces. For this, the site name is detect everything before the first
@@ -367,6 +376,8 @@
         """ Read the rest of the header lines, after line 2. """
         # Read third line
         third_line = f.readline().decode()
+        self.header_lines.append(third_line.strip())
+
         raw_dict = self.match_lines(third_line, self.licel_file_header_format[2])
         return raw_dict
 
@@ -400,6 +411,7 @@
 
     @staticmethod
     def match_lines(f1, f2):
+        # TODO: Change this to regex?
         list1 = f1.split()
         list2 = f2.split()
 
@@ -412,6 +424,76 @@
         combined = dict(combined)
         return combined
 
+    def save_as_csv(self, file_path=None, fill_value=-999):
+        """ Save the Licel file in txt format.
+
+        The format roughly follows the txt files created by Licel software. There are two main differences:
+        a) Channel names are used as headers.
+        b) Photon-counting data are given in shots, not in MHz.
+
+        Parameters
+        ----------
+        file_path : str or None
+           The output file path. If nan, the input file path is used with a .txt suffix.
+        fill_value : float
+           A fill value to be used in case of different length columns, e.g. when saving photodiode data.
+
+        Returns
+        -------
+        file_path : str
+           Returns the used file paths. This is useful when input file_path is None.
+        """
+        if file_path is None:
+            file_path = self.file_path + ".csv"
+
+        # Collect channel names and data
+        column_names = []
+        column_data = []
+
+        for name, channel in self.channels.items():
+            if channel.is_analog:
+                column_name = "{0} (mV)".format(name)
+            else:
+                column_name = "{0} (counts)".format(name)
+
+            column_names.append(column_name)
+            column_data.append(channel.data)
+
+        for name, photodiode in self.photodiodes.items():
+            if 'PD' not in name:
+                name = 'PD_' + name
+
+            column_names.append(name)
+            column_data.append(photodiode.data)
+
+        column_data = self._common_length_array(column_data, fill_value)
+
+        header_text = '\n'.join(self.header_lines) + '\n'
+        column_header = ', '.join(column_names)
+
+        np.savetxt(file_path, column_data.T,  fmt='%.4f', delimiter=',', header=header_text + column_header, comments='')
+
+        return file_path
+
+    @staticmethod
+    def _common_length_array(array_list, fill_value):
+        """ Make a signle array out of serveral 1D arrays with, possibly, different length"""
+
+        lengths = [len(a) for a in array_list]
+
+        if len(set(lengths)) == 1:
+            output_array = np.array(array_list)
+        else:
+            dimensions = (len(lengths), max(lengths))
+            output_array = np.ma.masked_all(dimensions)
+
+            for n, array in enumerate(array_list):
+                output_array[n, :len(array)] = array
+
+            output_array.filled(fill_value)
+
+        return output_array
+
 
 class LicelChannel(LidarChannel):
 
@@ -503,7 +585,6 @@
 
 
 class LicelLidarMeasurement(BaseLidarMeasurement):
-
     file_class = LicelFile
     channel_class = LicelChannel
     photodiode_class = PhotodiodeChannel
@@ -516,7 +597,7 @@
         self.use_id_as_name = use_id_as_name
         self.get_name_by_order = get_name_by_order
         self.licel_timezone = licel_timezone
-        self.photodiodes = {}
+        self.photodiodes = collections.OrderedDict()
 
         super(LicelLidarMeasurement, self).__init__(file_list)
 
@@ -581,7 +662,8 @@
             if channel.is_analog:
                 unique_values = list(set(channel.discriminator))
                 if len(unique_values) > 1:
-                    logger.warning('More than one discriminator levels for channel {0}: {1}'.format(channel_name, unique_values))
+                    logger.warning(
+                        'More than one discriminator levels for channel {0}: {1}'.format(channel_name, unique_values))
                 daq_ranges[n] = unique_values[0]
 
         laser_shots = []
@@ -682,4 +764,4 @@
 
 
 class LicelDivaLidarMeasurement(DivaConverterMixin, LicelLidarMeasurement):
-    pass
\ No newline at end of file
+    pass
--- a/atmospheric_lidar/raymetrics.py	Sun Mar 15 16:45:04 2020 +0200
+++ b/atmospheric_lidar/raymetrics.py	Wed Mar 25 11:51:35 2020 +0200
@@ -86,9 +86,13 @@
         raw_info = {}
 
         third_line = f.readline().decode()
+        self.header_lines.append(third_line.strip())
+
         raw_info.update(self.match_lines(third_line, self.licel_file_header_format[2]))
 
         fourth_line = f.readline().decode()
+        self.header_lines.append(fourth_line.strip())
+
         raw_info.update(self.match_lines(fourth_line, self.licel_file_header_format[3]))
         return raw_info
 
--- a/changelog.rst	Sun Mar 15 16:45:04 2020 +0200
+++ b/changelog.rst	Wed Mar 25 11:51:35 2020 +0200
@@ -4,7 +4,18 @@
 Unreleased
 ----------
 
-0.4.3 - 2020‑03‑15
+0.5.0 - 2020‑03‑25
+------------------
+Added
+~~~~~
+- Save as csv method on LicelFiles
+
+Changed
+~~~~~~~
+- Changes channel dictionaries to OrderedDict
+
+
+0.4.4 - 2020‑03‑15
 ------------------
 Fixed
 ~~~~~

mercurial