Mercurial > public > scc_access / changeset

--- a/scc_access/scc_access.py	Fri May 21 16:27:08 2021 +0300
+++ b/scc_access/scc_access.py	Mon May 24 11:43:31 2021 +0300
@@ -1,3 +1,17 @@
+""" This is a script that allows interaction with the SCC through the command line.
+
+It is based on the requests module for accessing the server.
+
+Most of the interactions are done through the web interface, i.e. by mimicking user interaction with the
+SCC website (i.e. user login, data submission, etc.). In few cases, the SCC API is also used.
+
+Most of the functionality is included in the SCC class. The class is used to login into the SCC website and automate
+interaction with the site (i.e. upload a file, get measurement status, etc.).
+
+Two other classes (Measurement, AncillaryFile) are used in some cases to handle the output of the SCC API.
+
+Several shortcut functions are defined to perform specific tasks using the SCC class (e.g. process_file, delete_measurements etc).
+"""
 import sys

 import requests
@@ -31,11 +45,11 @@


 class SCC:
-    """A simple class that will attempt to upload a file on the SCC server.
+    """A  class that will attempt to interact SCC server.

-    The uploading is done by simulating a normal browser session. In the current
-    version no check is performed, and no feedback is given if the upload
-    was successful. If everything is setup correctly, it will work.
+    Most interactions are by simulating a normal browser session. In the current
+    version few checks are performed before upload a file, and no feedback is given in case the upload
+    fails.
     """

     def __init__(self, auth, output_dir, base_url):
@@ -47,6 +61,7 @@
         self.session.auth = auth
         self.session.verify = False

+        # Setup SCC server URLS for later use
         self.login_url = urlparse.urljoin(self.base_url, 'accounts/login/')
         self.logout_url = urlparse.urljoin(self.base_url, 'accounts/logout/')
         self.list_measurements_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/')
@@ -54,20 +69,21 @@
         self.upload_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/quick/')
         self.measurement_page_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/')
         self.download_hirelpp_pattern = urlparse.urljoin(self.base_url,
-                                                             'data_processing/measurements/{0}/download-hirelpp/')
+                                                         'data_processing/measurements/{0}/download-hirelpp/')
         self.download_cloudmask_pattern = urlparse.urljoin(self.base_url,
-                                                         'data_processing/measurements/{0}/download-cloudmask/')
+                                                           'data_processing/measurements/{0}/download-cloudmask/')

         self.download_elpp_pattern = urlparse.urljoin(self.base_url,
-                                                              'data_processing/measurements/{0}/download-preprocessed/')
+                                                      'data_processing/measurements/{0}/download-preprocessed/')
         self.download_elda_pattern = urlparse.urljoin(self.base_url,
-                                                         'data_processing/measurements/{0}/download-optical/')
+                                                      'data_processing/measurements/{0}/download-optical/')
         self.download_plots_pattern = urlparse.urljoin(self.base_url,
                                                        'data_processing/measurements/{0}/download-plots/')
         self.download_elic_pattern = urlparse.urljoin(self.base_url,
-                                                       'data_processing/measurements/{0}/download-elic/')
+                                                      'data_processing/measurements/{0}/download-elic/')
         self.delete_measurement_pattern = urlparse.urljoin(self.base_url, 'admin/database/measurements/{0}/delete/')

+        # Setup API URLs for later use
         self.api_base_url = urlparse.urljoin(self.base_url, 'api/v1/')
         self.api_measurement_pattern = urlparse.urljoin(self.api_base_url, 'measurements/{0}/')
         self.api_measurements_url = urlparse.urljoin(self.api_base_url, 'measurements')
@@ -76,14 +92,20 @@
         self.api_overlap_search_pattern = urlparse.urljoin(self.api_base_url, 'overlap_files/?filename={0}')

     def login(self, credentials):
-        """ Login to SCC. """
+        """ Login to the SCC.
+
+        Parameters
+        ----------
+        credentials : tuple or list
+           A list or tuple in the form (username, password).
+        """
         logger.debug("Attempting to login to SCC, username %s." % credentials[0])
         login_credentials = {'username': credentials[0],
                              'password': credentials[1]}

         logger.debug("Accessing login page at %s." % self.login_url)

-        # Get upload form
+        # Get login form
         login_page = self.session.get(self.login_url)

         if not login_page.ok:
@@ -98,14 +120,35 @@
         return login_submit

     def logout(self):
-        """ Logout from SCC """
+        """ Logout from the SCC """
         return self.session.get(self.logout_url, stream=True)

-    def upload_file(self, filename, system_id, force_upload, delete_related, delay=0, rs_filename=None, ov_filename=None, lr_filename=None):
-        """ Upload a filename for processing with a specific system. If the
-        upload is successful, it returns the measurement id. """
+    def upload_file(self, filename, system_id, force_upload, delete_related, delay=0, rs_filename=None,
+                    ov_filename=None, lr_filename=None):
+        """ Upload a file for processing.
+
+        If the upload is successful, it returns the measurement id.
+
+
+        Parameters
+        ----------
+        filename : str
+           File path of the file to upload
+        system_id : int
+           System id to be used in the processing
+        force_upload : bool
+           If True, if a measurement with the same ID is found on the server, it will be first deleted and the
+           file current file will be uploaded. If False, the file will not be uploaded if the measurement ID is
+           already present on the SCC server.
+        delete_related : bool
+           Answer to delete related question when deleting existing measurements from the SCC.
+        rs_filename, ov_filename, lr_filename : str
+           Ancillary files pahts to be uploaded.
+        """
+        # Get the measurement ID from the netcdf file
         measurement_id = self.measurement_id_from_file(filename)

+        # Handle possible existing measurements with the same ID on the SCC server.
         logger.debug('Checking if a measurement with the same id already exists on the SCC server.')
         existing_measurement, _ = self.get_measurement(measurement_id)

@@ -118,9 +161,12 @@
                 logger.error(
                     "Measurement with id {} already exists on the SCC. Use --force_upload flag to overwrite it.".format(
                         measurement_id))
-                # TODO: Implement handling at the proper place. This does not allow the SCC class to be used by external programs.
+                # TODO: Implement handling at the proper place. Exiting here does not allow the SCC class to be
+                # used by external programs. Instead an exception should be raised.
                 sys.exit(1)

+        # Upload the file(s)
+
         # Get submit page
         upload_page = self.session.get(self.upload_url)

@@ -132,11 +178,13 @@

         files = {'data': open(filename, 'rb')}

+        # Add ancillary files to be uploaded
         if rs_filename is not None:
             ancillary_file, _ = self.get_ancillary(rs_filename, 'sounding')

             if ancillary_file.already_on_scc:
-                logger.warning("Sounding file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
+                logger.warning(
+                    "Sounding file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
             else:
                 logger.debug('Adding sounding file %s' % rs_filename)
                 files['sounding_file'] = open(rs_filename, 'rb')
@@ -145,7 +193,8 @@
             ancillary_file, _ = self.get_ancillary(ov_filename, 'overlap')

             if ancillary_file.already_on_scc:
-                logger.warning("Overlap file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
+                logger.warning(
+                    "Overlap file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
             else:
                 logger.debug('Adding overlap file %s' % ov_filename)
                 files['overlap_file'] = open(ov_filename, 'rb')
@@ -155,11 +204,13 @@

             if ancillary_file.already_on_scc:
                 logger.warning(
-                    "Lidar ratio file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
+                    "Lidar ratio file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(
+                        ancillary_file))
             else:
                 logger.debug('Adding lidar ratio file %s' % lr_filename)
                 files['lidar_ratio_file'] = open(lr_filename, 'rb')

+        # Upload the files
         logger.info("Uploading of file %s started." % filename)

         upload_submit = self.session.post(self.upload_url,
@@ -172,19 +223,27 @@
             logger.warning("Connection error. Status code: %s" % upload_submit.status_code)
             return False

-        # Check if there was a redirect to a new page.
+        # Check if there was a redirect to a new page. If not, something went wrong
         if upload_submit.url == self.upload_url:
             measurement_id = False
             logger.error("Uploaded file(s) rejected! Try to upload manually to see the error.")
         else:
-            measurement_id = re.findall(regex, upload_submit.text)[0]
+            # TODO: Check if this is needed. This was used when the measurement ID was not read from the input file.
+            measurement_id = re.findall(regex, upload_submit.text)[0]  # Get the measurement ID from the output page
             logger.info("Successfully uploaded measurement with id %s." % measurement_id)
-            logger.info("You can monitor the processing progress online: {}".format(self.measurement_page_pattern.format(measurement_id)))
+            logger.info("You can monitor the processing progress online: {}".format(
+                self.measurement_page_pattern.format(measurement_id)))
         return measurement_id

     @staticmethod
     def measurement_id_from_file(filename):
-        """ Get the measurement id from the input file. """
+        """ Get the measurement id from the input file.
+
+        Parameters
+        ----------
+        filename : str
+           File path of the input file.
+        """

         if not os.path.isfile(filename):
             logger.error("File {} does not exist.".format(filename))
@@ -203,8 +262,10 @@

     def download_files(self, measurement_id, subdir, download_url):
         """ Downloads some files from the download_url to the specified
-        subdir. This method is used to download preprocessed file, optical
-        files etc.
+        subdir.
+
+        This is a general method used to download preprocessed file, optical
+        files by other, file-specific, methods.
         """
         # TODO: Make downloading more robust (e.g. in case that files do not exist on server).
         # Get the file
@@ -345,7 +406,8 @@
             if monitor:
                 self.monitor_processing(measurement_id)

-    def process(self, filename, system_id, monitor,  force_upload, delete_related, delay=0, rs_filename=None, lr_filename=None, ov_filename=None):
+    def process(self, filename, system_id, monitor, force_upload, delete_related, delay=0, rs_filename=None,
+                lr_filename=None, ov_filename=None):
         """ Upload a file for processing and wait for the processing to finish.
         If the processing is successful, it will download all produced files.
         """
@@ -375,7 +437,7 @@
         attempts_count = 0
         max_attempts = retry_max + 1

-        # try to wait for measurement to appear in API
+        # try to wait for measurement to appear in API. A user has reported that this does not happen immediately.
         measurement = None
         logger.info("Looking for measurement %s on the SCC.", measurement_id)

@@ -424,7 +486,7 @@
         if measurement.elic == 127:
             logger.info("Downloading ELIC files.")
             self.download_elic(measurement_id)
-        if measurement.is_calibration and measurement.eldec==0:
+        if measurement.is_calibration and measurement.eldec == 0:
             logger.info("Downloading ELDEC files.")
             self.download_eldec(measurement_id)
         logger.info("--- Processing finished. ---")
@@ -432,10 +494,24 @@
         return measurement

     def get_measurement(self, measurement_id):
+        """ Get a measurement information from the SCC API.

-        if measurement_id is None:  # Is this still required?
+        Parameters
+        ----------
+        measurement_id : str
+           The measurement ID to search.
+
+        Returns
+        -------
+        : Measurement object or None
+           If the measurement is found, a Measurement object is returned. If not, it returns None
+        """
+        # TODO: Consider to homogenize with get_ancillary method (i.e. always return a Measurement object).
+
+        if measurement_id is None:  # TODO: Is this still required?
             return None

+        # Access the API
         measurement_url = self.api_measurement_pattern.format(measurement_id)
         logger.debug("Measurement API URL: %s" % measurement_url)

@@ -461,7 +537,7 @@

     def delete_measurement(self, measurement_id, delete_related):
         """ Deletes a measurement with the provided measurement id. The user
-        should have the appropriate permissions.
+        should have the appropriate permissions (i.e. access to the admin site).

         The procedures is performed directly through the web interface and
         NOT through the API.
@@ -506,7 +582,10 @@
         return True

     def available_measurements(self):
-        """ Get a list of available measurement on the SCC. """
+        """ Get a list of available measurement on the SCC.
+
+        The methods is currently not used, could be merged with list_measurements.
+        """
         response = self.session.get(self.api_measurements_url)
         response_dict = response.json()

@@ -521,8 +600,9 @@
         return measurements

     def list_measurements(self, id_exact=None, id_startswith=None):
+        """ Get the response text from the API. """

-        # TODO: Change this to work through the API
+        # TODO: Add some error handling, e.g. as per available_measurements method

         # Need to set to empty string if not specified, we won't get any results
         params = {}
@@ -540,6 +620,7 @@
         """ Give the first available measurement id on the SCC for the specific
         date.
         """
+        # TODO: Check if this method needs updating to handle all measurement_ID formats.
         date_str = t1.strftime('%Y%m%d')
         base_id = "%s%s" % (date_str, call_sign)
         search_url = urlparse.urljoin(self.api_base_url, 'measurements/?id__startswith=%s' % base_id)
@@ -742,6 +823,7 @@
 class AncillaryFile(ApiObject):
     """ This class represents the ancilalry file object as returned in the SCC API.
     """
+
     @property
     def already_on_scc(self):
         if self.exists is False:
@@ -755,6 +837,7 @@
                                self.status)


+# Methods that use the SCC class to perform specific tasks.
 def process_file(filename, system_id, settings, force_upload, delete_related,
                  delay=0, monitor=True, rs_filename=None, lr_filename=None, ov_filename=None):
     """ Shortcut function to process a file to the SCC. """
@@ -881,6 +964,7 @@

 def setup_upload_file(parser):
     """ Upload but do not monitor processing progress. """
+
     def upload_file_from_args(parsed):
         process_file(parsed.filename, parsed.system, parsed.config,
                      delay=parsed.delay,
@@ -933,8 +1017,10 @@
         download_measurements(parsed.IDs, parsed.max_retries, parsed.ignore_errors, parsed.config)

     parser.add_argument("IDs", help="Measurement IDs that should be downloaded.", nargs="+")
-    parser.add_argument("--max_retries", help="Number of times to retry in cases of missing measurement id.", default=0, type=int)
-    parser.add_argument("--ignore_errors", help="Ignore errors when downloading multiple measurements.", action="store_false")
+    parser.add_argument("--max_retries", help="Number of times to retry in cases of missing measurement id.", default=0,
+                        type=int)
+    parser.add_argument("--ignore_errors", help="Ignore errors when downloading multiple measurements.",
+                        action="store_false")
     parser.set_defaults(execute=download_measurements_from_args)


@@ -944,10 +1030,12 @@
     subparsers = parser.add_subparsers()

     delete_parser = subparsers.add_parser("delete", help="Deletes a measurement.")
-    rerun_all_parser = subparsers.add_parser("rerun-all", help="Rerun all processing steps for the provided measurement IDs.")
+    rerun_all_parser = subparsers.add_parser("rerun-all",
+                                             help="Rerun all processing steps for the provided measurement IDs.")
     rerun_processing_parser = subparsers.add_parser("rerun-elpp",
                                                     help="Rerun low-resolution processing steps for the provided measurement ID.")
-    upload_file_parser = subparsers.add_parser("upload-file", help="Submit a file and, optionally, download the output products.")
+    upload_file_parser = subparsers.add_parser("upload-file",
+                                               help="Submit a file and, optionally, download the output products.")
     list_parser = subparsers.add_parser("list", help="List measurements registered on the SCC.")
     download_parser = subparsers.add_parser("download", help="Download selected measurements.")