SCC access: comparison scc_access/scc

-:58006f895f8a
+:0106aeed80d8
+""" This is a script that allows interaction with the SCC through the command line.
+It is based on the requests module for accessing the server.
+Most of the interactions are done through the web interface, i.e. by mimicking user interaction with the
+SCC website (i.e. user login, data submission, etc.). In few cases, the SCC API is also used.
+Most of the functionality is included in the SCC class. The class is used to login into the SCC website and automate
+interaction with the site (i.e. upload a file, get measurement status, etc.).
+Two other classes (Measurement, AncillaryFile) are used in some cases to handle the output of the SCC API.
+Several shortcut functions are defined to perform specific tasks using the SCC class (e.g. process_file, delete_measurements etc).
+"""
 import sys
 import requests
 try:
 # This should be read from the uploaded file, but would require an extra NetCDF module.
 regex = "<h3>Measurement (?P<measurement_id>.{12,15}) <small>"  # {12, 15} to handle both old- and new-style measurement ids.
 class SCC:
-"""A simple class that will attempt to upload a file on the SCC server.
+"""A  class that will attempt to interact SCC server.
-The uploading is done by simulating a normal browser session. In the current
+Most interactions are by simulating a normal browser session. In the current
-version no check is performed, and no feedback is given if the upload
+version few checks are performed before upload a file, and no feedback is given in case the upload
-was successful. If everything is setup correctly, it will work.
+fails.
 """
 def __init__(self, auth, output_dir, base_url):
 self.auth = auth
 self.base_url = base_url
 self.session = requests.Session()
 self.session.auth = auth
 self.session.verify = False
+# Setup SCC server URLS for later use
 self.login_url = urlparse.urljoin(self.base_url, 'accounts/login/')
 self.logout_url = urlparse.urljoin(self.base_url, 'accounts/logout/')
 self.list_measurements_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/')
 self.upload_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/quick/')
 self.measurement_page_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/')
 self.download_hirelpp_pattern = urlparse.urljoin(self.base_url,
 'data_processing/measurements/{0}/download-hirelpp/')
 self.download_cloudmask_pattern = urlparse.urljoin(self.base_url,
 'data_processing/measurements/{0}/download-cloudmask/')
 self.download_elpp_pattern = urlparse.urljoin(self.base_url,
 'data_processing/measurements/{0}/download-preprocessed/')
 self.download_elda_pattern = urlparse.urljoin(self.base_url,
 'data_processing/measurements/{0}/download-optical/')
 self.download_plots_pattern = urlparse.urljoin(self.base_url,
 'data_processing/measurements/{0}/download-plots/')
 self.download_elic_pattern = urlparse.urljoin(self.base_url,
 'data_processing/measurements/{0}/download-elic/')
 self.delete_measurement_pattern = urlparse.urljoin(self.base_url, 'admin/database/measurements/{0}/delete/')
+# Setup API URLs for later use
 self.api_base_url = urlparse.urljoin(self.base_url, 'api/v1/')
 self.api_measurement_pattern = urlparse.urljoin(self.api_base_url, 'measurements/{0}/')
 self.api_measurements_url = urlparse.urljoin(self.api_base_url, 'measurements')
 self.api_sounding_search_pattern = urlparse.urljoin(self.api_base_url, 'sounding_files/?filename={0}')
 self.api_lidarratio_search_pattern = urlparse.urljoin(self.api_base_url, 'lidarratio_files/?filename={0}')
 self.api_overlap_search_pattern = urlparse.urljoin(self.api_base_url, 'overlap_files/?filename={0}')
 def login(self, credentials):
-""" Login to SCC. """
+""" Login to the SCC.
+Parameters
+----------
+credentials : tuple or list
+A list or tuple in the form (username, password).
+"""
 logger.debug("Attempting to login to SCC, username %s." % credentials[0])
 login_credentials = {'username': credentials[0],
 'password': credentials[1]}
 logger.debug("Accessing login page at %s." % self.login_url)
-# Get upload form
+# Get login form
 login_page = self.session.get(self.login_url)
 if not login_page.ok:
 raise self.PageNotAccessibleError('Could not access login pages. Status code %s' % login_page.status_code)
 headers={'X-CSRFToken': login_page.cookies['csrftoken'],
 'referer': self.login_url})
 return login_submit
 def logout(self):
-""" Logout from SCC """
+""" Logout from the SCC """
 return self.session.get(self.logout_url, stream=True)
-def upload_file(self, filename, system_id, force_upload, delete_related, delay=0, rs_filename=None, ov_filename=None, lr_filename=None):
+def upload_file(self, filename, system_id, force_upload, delete_related, delay=0, rs_filename=None,
-""" Upload a filename for processing with a specific system. If the
+ov_filename=None, lr_filename=None):
-upload is successful, it returns the measurement id. """
+""" Upload a file for processing.
+If the upload is successful, it returns the measurement id.
+Parameters
+----------
+filename : str
+File path of the file to upload
+system_id : int
+System id to be used in the processing
+force_upload : bool
+If True, if a measurement with the same ID is found on the server, it will be first deleted and the
+file current file will be uploaded. If False, the file will not be uploaded if the measurement ID is
+already present on the SCC server.
+delete_related : bool
+Answer to delete related question when deleting existing measurements from the SCC.
+rs_filename, ov_filename, lr_filename : str
+Ancillary files pahts to be uploaded.
+"""
+# Get the measurement ID from the netcdf file
 measurement_id = self.measurement_id_from_file(filename)
+# Handle possible existing measurements with the same ID on the SCC server.
 logger.debug('Checking if a measurement with the same id already exists on the SCC server.')
 existing_measurement, _ = self.get_measurement(measurement_id)
 if existing_measurement:
 if force_upload:
 self.delete_measurement(measurement_id, delete_related)
 else:
 logger.error(
 "Measurement with id {} already exists on the SCC. Use --force_upload flag to overwrite it.".format(
 measurement_id))
-# TODO: Implement handling at the proper place. This does not allow the SCC class to be used by external programs.
+# TODO: Implement handling at the proper place. Exiting here does not allow the SCC class to be
+# used by external programs. Instead an exception should be raised.
 sys.exit(1)
+# Upload the file(s)
 # Get submit page
 upload_page = self.session.get(self.upload_url)
 # Submit the data
 logger.debug("Submitted processing parameters - System: {}, Delay: {}".format(system_id, delay))
 files = {'data': open(filename, 'rb')}
+# Add ancillary files to be uploaded
 if rs_filename is not None:
 ancillary_file, _ = self.get_ancillary(rs_filename, 'sounding')
 if ancillary_file.already_on_scc:
-logger.warning("Sounding file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
+logger.warning(
+"Sounding file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
 else:
 logger.debug('Adding sounding file %s' % rs_filename)
 files['sounding_file'] = open(rs_filename, 'rb')
 if ov_filename is not None:
 ancillary_file, _ = self.get_ancillary(ov_filename, 'overlap')
 if ancillary_file.already_on_scc:
-logger.warning("Overlap file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
+logger.warning(
+"Overlap file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
 else:
 logger.debug('Adding overlap file %s' % ov_filename)
 files['overlap_file'] = open(ov_filename, 'rb')
 if lr_filename is not None:
 ancillary_file, _ = self.get_ancillary(lr_filename, 'lidarratio')
 if ancillary_file.already_on_scc:
 logger.warning(
-"Lidar ratio file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
+"Lidar ratio file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(
+ancillary_file))
 else:
 logger.debug('Adding lidar ratio file %s' % lr_filename)
 files['lidar_ratio_file'] = open(lr_filename, 'rb')
+# Upload the files
 logger.info("Uploading of file %s started." % filename)
 upload_submit = self.session.post(self.upload_url,
 data=upload_data,
 files=files,
 if upload_submit.status_code != 200:
 logger.warning("Connection error. Status code: %s" % upload_submit.status_code)
 return False
-# Check if there was a redirect to a new page.
+# Check if there was a redirect to a new page. If not, something went wrong
 if upload_submit.url == self.upload_url:
 measurement_id = False
 logger.error("Uploaded file(s) rejected! Try to upload manually to see the error.")
 else:
-measurement_id = re.findall(regex, upload_submit.text)[0]
+# TODO: Check if this is needed. This was used when the measurement ID was not read from the input file.
+measurement_id = re.findall(regex, upload_submit.text)[0]  # Get the measurement ID from the output page
 logger.info("Successfully uploaded measurement with id %s." % measurement_id)
-logger.info("You can monitor the processing progress online: {}".format(self.measurement_page_pattern.format(measurement_id)))
+logger.info("You can monitor the processing progress online: {}".format(
+self.measurement_page_pattern.format(measurement_id)))
 return measurement_id
 @staticmethod
 def measurement_id_from_file(filename):
-""" Get the measurement id from the input file. """
+""" Get the measurement id from the input file.
+Parameters
+----------
+filename : str
+File path of the input file.
+"""
 if not os.path.isfile(filename):
 logger.error("File {} does not exist.".format(filename))
 sys.exit(1)
 return measurement_id
 def download_files(self, measurement_id, subdir, download_url):
 """ Downloads some files from the download_url to the specified
-subdir. This method is used to download preprocessed file, optical
+subdir.
-files etc.
+This is a general method used to download preprocessed file, optical
+files by other, file-specific, methods.
 """
 # TODO: Make downloading more robust (e.g. in case that files do not exist on server).
 # Get the file
 request = self.session.get(download_url, stream=True)
 logger.info("Rerun-all command submitted successfully for id {}.".format(measurement_id))
 if monitor:
 self.monitor_processing(measurement_id)
-def process(self, filename, system_id, monitor,  force_upload, delete_related, delay=0, rs_filename=None, lr_filename=None, ov_filename=None):
+def process(self, filename, system_id, monitor, force_upload, delete_related, delay=0, rs_filename=None,
+lr_filename=None, ov_filename=None):
 """ Upload a file for processing and wait for the processing to finish.
 If the processing is successful, it will download all produced files.
 """
 logger.info("--- Processing started on %s. ---" % datetime.datetime.now())
 # Upload file
 # try to deal with error 404
 attempts_count = 0
 max_attempts = retry_max + 1
-# try to wait for measurement to appear in API
+# try to wait for measurement to appear in API. A user has reported that this does not happen immediately.
 measurement = None
 logger.info("Looking for measurement %s on the SCC.", measurement_id)
 while attempts_count < max_attempts:
 attempts_count += 1
 logger.info("Downloading ELDA plots.")
 self.download_plots(measurement_id)
 if measurement.elic == 127:
 logger.info("Downloading ELIC files.")
 self.download_elic(measurement_id)
-if measurement.is_calibration and measurement.eldec==0:
+if measurement.is_calibration and measurement.eldec == 0:
 logger.info("Downloading ELDEC files.")
 self.download_eldec(measurement_id)
 logger.info("--- Processing finished. ---")
 return measurement
 def get_measurement(self, measurement_id):
+""" Get a measurement information from the SCC API.
-if measurement_id is None:  # Is this still required?
+Parameters
+----------
+measurement_id : str
+The measurement ID to search.
+Returns
+-------
+: Measurement object or None
+If the measurement is found, a Measurement object is returned. If not, it returns None
+"""
+# TODO: Consider to homogenize with get_ancillary method (i.e. always return a Measurement object).
+if measurement_id is None:  # TODO: Is this still required?
 return None
+# Access the API
 measurement_url = self.api_measurement_pattern.format(measurement_id)
 logger.debug("Measurement API URL: %s" % measurement_url)
 response = self.session.get(measurement_url)
 return measurement, response.status_code
 def delete_measurement(self, measurement_id, delete_related):
 """ Deletes a measurement with the provided measurement id. The user
-should have the appropriate permissions.
+should have the appropriate permissions (i.e. access to the admin site).
 The procedures is performed directly through the web interface and
 NOT through the API.
 """
 # Get the measurement object
 logger.info("Deleted measurement {0}".format(measurement_id))
 return True
 def available_measurements(self):
-""" Get a list of available measurement on the SCC. """
+""" Get a list of available measurement on the SCC.
+The methods is currently not used, could be merged with list_measurements.
+"""
 response = self.session.get(self.api_measurements_url)
 response_dict = response.json()
 if response_dict:
 measurement_list = response_dict['objects']
 measurements = None
 return measurements
 def list_measurements(self, id_exact=None, id_startswith=None):
+""" Get the response text from the API. """
-# TODO: Change this to work through the API
+# TODO: Add some error handling, e.g. as per available_measurements method
 # Need to set to empty string if not specified, we won't get any results
 params = {}
 if id_exact is not None:
 def measurement_id_for_date(self, t1, call_sign, base_number=0):
 """ Give the first available measurement id on the SCC for the specific
 date.
 """
+# TODO: Check if this method needs updating to handle all measurement_ID formats.
 date_str = t1.strftime('%Y%m%d')
 base_id = "%s%s" % (date_str, call_sign)
 search_url = urlparse.urljoin(self.api_base_url, 'measurements/?id__startswith=%s' % base_id)
 response = self.session.get(search_url)
 class AncillaryFile(ApiObject):
 """ This class represents the ancilalry file object as returned in the SCC API.
 """
 @property
 def already_on_scc(self):
 if self.exists is False:
 return False
 return "%s: %s, %s" % (self.id,
 self.filename,
 self.status)
+# Methods that use the SCC class to perform specific tasks.
 def process_file(filename, system_id, settings, force_upload, delete_related,
 delay=0, monitor=True, rs_filename=None, lr_filename=None, ov_filename=None):
 """ Shortcut function to process a file to the SCC. """
 logger.info("Processing file %s, using system %s" % (filename, system_id))
 parser.set_defaults(execute=rerun_processing_from_args)
 def setup_upload_file(parser):
 """ Upload but do not monitor processing progress. """
 def upload_file_from_args(parsed):
 process_file(parsed.filename, parsed.system, parsed.config,
 delay=parsed.delay,
 monitor=parsed.process,
 force_upload=parsed.force_upload,
 def setup_download_measurements(parser):
 def download_measurements_from_args(parsed):
 download_measurements(parsed.IDs, parsed.max_retries, parsed.ignore_errors, parsed.config)
 parser.add_argument("IDs", help="Measurement IDs that should be downloaded.", nargs="+")
-parser.add_argument("--max_retries", help="Number of times to retry in cases of missing measurement id.", default=0, type=int)
+parser.add_argument("--max_retries", help="Number of times to retry in cases of missing measurement id.", default=0,
-parser.add_argument("--ignore_errors", help="Ignore errors when downloading multiple measurements.", action="store_false")
+type=int)
+parser.add_argument("--ignore_errors", help="Ignore errors when downloading multiple measurements.",
+action="store_false")
 parser.set_defaults(execute=download_measurements_from_args)
 def main():
 # Define the command line arguments.
 parser = argparse.ArgumentParser()
 subparsers = parser.add_subparsers()
 delete_parser = subparsers.add_parser("delete", help="Deletes a measurement.")
-rerun_all_parser = subparsers.add_parser("rerun-all", help="Rerun all processing steps for the provided measurement IDs.")
+rerun_all_parser = subparsers.add_parser("rerun-all",
+help="Rerun all processing steps for the provided measurement IDs.")
 rerun_processing_parser = subparsers.add_parser("rerun-elpp",
 help="Rerun low-resolution processing steps for the provided measurement ID.")
-upload_file_parser = subparsers.add_parser("upload-file", help="Submit a file and, optionally, download the output products.")
+upload_file_parser = subparsers.add_parser("upload-file",
+help="Submit a file and, optionally, download the output products.")
 list_parser = subparsers.add_parser("list", help="List measurements registered on the SCC.")
 download_parser = subparsers.add_parser("download", help="Download selected measurements.")
 setup_delete(delete_parser)
 setup_rerun_all(rerun_all_parser)

comparison: scc_access/scc_access.py

scc_access/scc_access.py

Mercurial > public > scc_access / file comparison

comparison: scc_access/scc_access.py

scc_access/scc_access.py