Mon, 24 May 2021 11:43:31 +0300
Slightly improved documentation.
scc_access/scc_access.py | file | annotate | diff | comparison | revisions |
--- a/scc_access/scc_access.py Fri May 21 16:27:08 2021 +0300 +++ b/scc_access/scc_access.py Mon May 24 11:43:31 2021 +0300 @@ -1,3 +1,17 @@ +""" This is a script that allows interaction with the SCC through the command line. + +It is based on the requests module for accessing the server. + +Most of the interactions are done through the web interface, i.e. by mimicking user interaction with the +SCC website (i.e. user login, data submission, etc.). In few cases, the SCC API is also used. + +Most of the functionality is included in the SCC class. The class is used to login into the SCC website and automate +interaction with the site (i.e. upload a file, get measurement status, etc.). + +Two other classes (Measurement, AncillaryFile) are used in some cases to handle the output of the SCC API. + +Several shortcut functions are defined to perform specific tasks using the SCC class (e.g. process_file, delete_measurements etc). +""" import sys import requests @@ -31,11 +45,11 @@ class SCC: - """A simple class that will attempt to upload a file on the SCC server. + """A class that will attempt to interact SCC server. - The uploading is done by simulating a normal browser session. In the current - version no check is performed, and no feedback is given if the upload - was successful. If everything is setup correctly, it will work. + Most interactions are by simulating a normal browser session. In the current + version few checks are performed before upload a file, and no feedback is given in case the upload + fails. """ def __init__(self, auth, output_dir, base_url): @@ -47,6 +61,7 @@ self.session.auth = auth self.session.verify = False + # Setup SCC server URLS for later use self.login_url = urlparse.urljoin(self.base_url, 'accounts/login/') self.logout_url = urlparse.urljoin(self.base_url, 'accounts/logout/') self.list_measurements_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/') @@ -54,20 +69,21 @@ self.upload_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/quick/') self.measurement_page_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/') self.download_hirelpp_pattern = urlparse.urljoin(self.base_url, - 'data_processing/measurements/{0}/download-hirelpp/') + 'data_processing/measurements/{0}/download-hirelpp/') self.download_cloudmask_pattern = urlparse.urljoin(self.base_url, - 'data_processing/measurements/{0}/download-cloudmask/') + 'data_processing/measurements/{0}/download-cloudmask/') self.download_elpp_pattern = urlparse.urljoin(self.base_url, - 'data_processing/measurements/{0}/download-preprocessed/') + 'data_processing/measurements/{0}/download-preprocessed/') self.download_elda_pattern = urlparse.urljoin(self.base_url, - 'data_processing/measurements/{0}/download-optical/') + 'data_processing/measurements/{0}/download-optical/') self.download_plots_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/download-plots/') self.download_elic_pattern = urlparse.urljoin(self.base_url, - 'data_processing/measurements/{0}/download-elic/') + 'data_processing/measurements/{0}/download-elic/') self.delete_measurement_pattern = urlparse.urljoin(self.base_url, 'admin/database/measurements/{0}/delete/') + # Setup API URLs for later use self.api_base_url = urlparse.urljoin(self.base_url, 'api/v1/') self.api_measurement_pattern = urlparse.urljoin(self.api_base_url, 'measurements/{0}/') self.api_measurements_url = urlparse.urljoin(self.api_base_url, 'measurements') @@ -76,14 +92,20 @@ self.api_overlap_search_pattern = urlparse.urljoin(self.api_base_url, 'overlap_files/?filename={0}') def login(self, credentials): - """ Login to SCC. """ + """ Login to the SCC. + + Parameters + ---------- + credentials : tuple or list + A list or tuple in the form (username, password). + """ logger.debug("Attempting to login to SCC, username %s." % credentials[0]) login_credentials = {'username': credentials[0], 'password': credentials[1]} logger.debug("Accessing login page at %s." % self.login_url) - # Get upload form + # Get login form login_page = self.session.get(self.login_url) if not login_page.ok: @@ -98,14 +120,35 @@ return login_submit def logout(self): - """ Logout from SCC """ + """ Logout from the SCC """ return self.session.get(self.logout_url, stream=True) - def upload_file(self, filename, system_id, force_upload, delete_related, delay=0, rs_filename=None, ov_filename=None, lr_filename=None): - """ Upload a filename for processing with a specific system. If the - upload is successful, it returns the measurement id. """ + def upload_file(self, filename, system_id, force_upload, delete_related, delay=0, rs_filename=None, + ov_filename=None, lr_filename=None): + """ Upload a file for processing. + + If the upload is successful, it returns the measurement id. + + + Parameters + ---------- + filename : str + File path of the file to upload + system_id : int + System id to be used in the processing + force_upload : bool + If True, if a measurement with the same ID is found on the server, it will be first deleted and the + file current file will be uploaded. If False, the file will not be uploaded if the measurement ID is + already present on the SCC server. + delete_related : bool + Answer to delete related question when deleting existing measurements from the SCC. + rs_filename, ov_filename, lr_filename : str + Ancillary files pahts to be uploaded. + """ + # Get the measurement ID from the netcdf file measurement_id = self.measurement_id_from_file(filename) + # Handle possible existing measurements with the same ID on the SCC server. logger.debug('Checking if a measurement with the same id already exists on the SCC server.') existing_measurement, _ = self.get_measurement(measurement_id) @@ -118,9 +161,12 @@ logger.error( "Measurement with id {} already exists on the SCC. Use --force_upload flag to overwrite it.".format( measurement_id)) - # TODO: Implement handling at the proper place. This does not allow the SCC class to be used by external programs. + # TODO: Implement handling at the proper place. Exiting here does not allow the SCC class to be + # used by external programs. Instead an exception should be raised. sys.exit(1) + # Upload the file(s) + # Get submit page upload_page = self.session.get(self.upload_url) @@ -132,11 +178,13 @@ files = {'data': open(filename, 'rb')} + # Add ancillary files to be uploaded if rs_filename is not None: ancillary_file, _ = self.get_ancillary(rs_filename, 'sounding') if ancillary_file.already_on_scc: - logger.warning("Sounding file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file)) + logger.warning( + "Sounding file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file)) else: logger.debug('Adding sounding file %s' % rs_filename) files['sounding_file'] = open(rs_filename, 'rb') @@ -145,7 +193,8 @@ ancillary_file, _ = self.get_ancillary(ov_filename, 'overlap') if ancillary_file.already_on_scc: - logger.warning("Overlap file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file)) + logger.warning( + "Overlap file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file)) else: logger.debug('Adding overlap file %s' % ov_filename) files['overlap_file'] = open(ov_filename, 'rb') @@ -155,11 +204,13 @@ if ancillary_file.already_on_scc: logger.warning( - "Lidar ratio file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file)) + "Lidar ratio file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format( + ancillary_file)) else: logger.debug('Adding lidar ratio file %s' % lr_filename) files['lidar_ratio_file'] = open(lr_filename, 'rb') + # Upload the files logger.info("Uploading of file %s started." % filename) upload_submit = self.session.post(self.upload_url, @@ -172,19 +223,27 @@ logger.warning("Connection error. Status code: %s" % upload_submit.status_code) return False - # Check if there was a redirect to a new page. + # Check if there was a redirect to a new page. If not, something went wrong if upload_submit.url == self.upload_url: measurement_id = False logger.error("Uploaded file(s) rejected! Try to upload manually to see the error.") else: - measurement_id = re.findall(regex, upload_submit.text)[0] + # TODO: Check if this is needed. This was used when the measurement ID was not read from the input file. + measurement_id = re.findall(regex, upload_submit.text)[0] # Get the measurement ID from the output page logger.info("Successfully uploaded measurement with id %s." % measurement_id) - logger.info("You can monitor the processing progress online: {}".format(self.measurement_page_pattern.format(measurement_id))) + logger.info("You can monitor the processing progress online: {}".format( + self.measurement_page_pattern.format(measurement_id))) return measurement_id @staticmethod def measurement_id_from_file(filename): - """ Get the measurement id from the input file. """ + """ Get the measurement id from the input file. + + Parameters + ---------- + filename : str + File path of the input file. + """ if not os.path.isfile(filename): logger.error("File {} does not exist.".format(filename)) @@ -203,8 +262,10 @@ def download_files(self, measurement_id, subdir, download_url): """ Downloads some files from the download_url to the specified - subdir. This method is used to download preprocessed file, optical - files etc. + subdir. + + This is a general method used to download preprocessed file, optical + files by other, file-specific, methods. """ # TODO: Make downloading more robust (e.g. in case that files do not exist on server). # Get the file @@ -345,7 +406,8 @@ if monitor: self.monitor_processing(measurement_id) - def process(self, filename, system_id, monitor, force_upload, delete_related, delay=0, rs_filename=None, lr_filename=None, ov_filename=None): + def process(self, filename, system_id, monitor, force_upload, delete_related, delay=0, rs_filename=None, + lr_filename=None, ov_filename=None): """ Upload a file for processing and wait for the processing to finish. If the processing is successful, it will download all produced files. """ @@ -375,7 +437,7 @@ attempts_count = 0 max_attempts = retry_max + 1 - # try to wait for measurement to appear in API + # try to wait for measurement to appear in API. A user has reported that this does not happen immediately. measurement = None logger.info("Looking for measurement %s on the SCC.", measurement_id) @@ -424,7 +486,7 @@ if measurement.elic == 127: logger.info("Downloading ELIC files.") self.download_elic(measurement_id) - if measurement.is_calibration and measurement.eldec==0: + if measurement.is_calibration and measurement.eldec == 0: logger.info("Downloading ELDEC files.") self.download_eldec(measurement_id) logger.info("--- Processing finished. ---") @@ -432,10 +494,24 @@ return measurement def get_measurement(self, measurement_id): + """ Get a measurement information from the SCC API. - if measurement_id is None: # Is this still required? + Parameters + ---------- + measurement_id : str + The measurement ID to search. + + Returns + ------- + : Measurement object or None + If the measurement is found, a Measurement object is returned. If not, it returns None + """ + # TODO: Consider to homogenize with get_ancillary method (i.e. always return a Measurement object). + + if measurement_id is None: # TODO: Is this still required? return None + # Access the API measurement_url = self.api_measurement_pattern.format(measurement_id) logger.debug("Measurement API URL: %s" % measurement_url) @@ -461,7 +537,7 @@ def delete_measurement(self, measurement_id, delete_related): """ Deletes a measurement with the provided measurement id. The user - should have the appropriate permissions. + should have the appropriate permissions (i.e. access to the admin site). The procedures is performed directly through the web interface and NOT through the API. @@ -506,7 +582,10 @@ return True def available_measurements(self): - """ Get a list of available measurement on the SCC. """ + """ Get a list of available measurement on the SCC. + + The methods is currently not used, could be merged with list_measurements. + """ response = self.session.get(self.api_measurements_url) response_dict = response.json() @@ -521,8 +600,9 @@ return measurements def list_measurements(self, id_exact=None, id_startswith=None): + """ Get the response text from the API. """ - # TODO: Change this to work through the API + # TODO: Add some error handling, e.g. as per available_measurements method # Need to set to empty string if not specified, we won't get any results params = {} @@ -540,6 +620,7 @@ """ Give the first available measurement id on the SCC for the specific date. """ + # TODO: Check if this method needs updating to handle all measurement_ID formats. date_str = t1.strftime('%Y%m%d') base_id = "%s%s" % (date_str, call_sign) search_url = urlparse.urljoin(self.api_base_url, 'measurements/?id__startswith=%s' % base_id) @@ -742,6 +823,7 @@ class AncillaryFile(ApiObject): """ This class represents the ancilalry file object as returned in the SCC API. """ + @property def already_on_scc(self): if self.exists is False: @@ -755,6 +837,7 @@ self.status) +# Methods that use the SCC class to perform specific tasks. def process_file(filename, system_id, settings, force_upload, delete_related, delay=0, monitor=True, rs_filename=None, lr_filename=None, ov_filename=None): """ Shortcut function to process a file to the SCC. """ @@ -881,6 +964,7 @@ def setup_upload_file(parser): """ Upload but do not monitor processing progress. """ + def upload_file_from_args(parsed): process_file(parsed.filename, parsed.system, parsed.config, delay=parsed.delay, @@ -933,8 +1017,10 @@ download_measurements(parsed.IDs, parsed.max_retries, parsed.ignore_errors, parsed.config) parser.add_argument("IDs", help="Measurement IDs that should be downloaded.", nargs="+") - parser.add_argument("--max_retries", help="Number of times to retry in cases of missing measurement id.", default=0, type=int) - parser.add_argument("--ignore_errors", help="Ignore errors when downloading multiple measurements.", action="store_false") + parser.add_argument("--max_retries", help="Number of times to retry in cases of missing measurement id.", default=0, + type=int) + parser.add_argument("--ignore_errors", help="Ignore errors when downloading multiple measurements.", + action="store_false") parser.set_defaults(execute=download_measurements_from_args) @@ -944,10 +1030,12 @@ subparsers = parser.add_subparsers() delete_parser = subparsers.add_parser("delete", help="Deletes a measurement.") - rerun_all_parser = subparsers.add_parser("rerun-all", help="Rerun all processing steps for the provided measurement IDs.") + rerun_all_parser = subparsers.add_parser("rerun-all", + help="Rerun all processing steps for the provided measurement IDs.") rerun_processing_parser = subparsers.add_parser("rerun-elpp", help="Rerun low-resolution processing steps for the provided measurement ID.") - upload_file_parser = subparsers.add_parser("upload-file", help="Submit a file and, optionally, download the output products.") + upload_file_parser = subparsers.add_parser("upload-file", + help="Submit a file and, optionally, download the output products.") list_parser = subparsers.add_parser("list", help="List measurements registered on the SCC.") download_parser = subparsers.add_parser("download", help="Download selected measurements.")