Mon, 08 Jan 2018 14:59:21 +0100
Refactor argument parsing + New commands
* Restructured argument parsing to use subcommands.
* Move some common options to requests session (auth + verify)
* Allow for a common location for settings file (~/.scc_access.yaml)
* Update Readme
* Add option to list available files
* Add option to download existing files
README.rst | file | annotate | diff | comparison | revisions | |
scc_access/scc_access.py | file | annotate | diff | comparison | revisions |
--- a/README.rst Fri Dec 15 22:53:17 2017 +0200 +++ b/README.rst Mon Jan 08 14:59:21 2018 +0100 @@ -36,14 +36,16 @@ Settings -------- -You will need to change some user-defined settings in a settings.py file. You -can rename the settings.sample.py file to settings.py and follow the instructions -there. +You will need to change some user-defined settings in a settings.yaml file. You +can copy the settings_sample.yaml file to settings.py and follow the instructions +there. You can copy the resulting file to your home directory as `.scc_access.yaml`. +This is the default location, `scc_access` will search there if no other location was +specified. Specifically, you will need to: -1. Change the BASIC_LOGIN and DJANGO_LOGIN to your credentials. -2. Change the OUTPUT_DIR to the location were the results will be stored. +1. Change the `basic_credentials` and `website_credentials` to your credentials. +2. Change the `output_dir` to the location were the results will be stored. Please not that it's not a good idea to store your own credentials in the settings file. The standard user has "Station Management" privileges and if the credentials @@ -57,17 +59,22 @@ You can upload a file specifying the username and the system id:: - scc_access 20110101po01.nc 125 + scc_access upload-file 20110101po01.nc 125 If you want to wait for the processing to finish and download the resulting files -you need to define the -p flag:: +you need to define the `process-file` command. Use the `-p` flag to wait for the +result:: - scc_access 20110101po01.nc 125 -p + scc_access process-file 20110101po01.nc 125 -p -If you want to delete an existing measurement id from the database use the -d flag and -the measurement id:: +If you want to delete an existing measurement id from the database use the `delete` +command and the measurement id:: - scc_access -d 20110101po01 + scc_access delete 20110101po01 + +You can list available measurements with the `list` command:: + + scc_access list For more information on the syntax type::
--- a/scc_access/scc_access.py Fri Dec 15 22:53:17 2017 +0200 +++ b/scc_access/scc_access.py Mon Jan 08 14:59:21 2018 +0100 @@ -1,7 +1,7 @@ import requests + requests.packages.urllib3.disable_warnings() -import sys import urlparse import argparse import os @@ -13,7 +13,6 @@ import logging import yaml - logger = logging.getLogger(__name__) # The regex to find the measurement id from the measurement page @@ -30,47 +29,42 @@ """ def __init__(self, auth, output_dir, base_url): + self.auth = auth self.output_dir = output_dir self.base_url = base_url self.session = requests.Session() - self.construct_urls() - - def construct_urls(self): - """ Construct all URLs needed for processing. """ - # Construct the absolute URLs + self.session.auth = auth + self.session.verify = False self.login_url = urlparse.urljoin(self.base_url, 'accounts/login/') self.upload_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/quick/') - self.download_preprocessed_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/download-preprocessed/') - self.download_optical_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/download-optical/') - self.download_graph_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/download-plots/') + self.download_preprocessed_pattern = urlparse.urljoin(self.base_url, + 'data_processing/measurements/{0}/download-preprocessed/') + self.download_optical_pattern = urlparse.urljoin(self.base_url, + 'data_processing/measurements/{0}/download-optical/') + self.download_graph_pattern = urlparse.urljoin(self.base_url, + 'data_processing/measurements/{0}/download-plots/') self.delete_measurement_pattern = urlparse.urljoin(self.base_url, 'admin/database/measurements/{0}/delete/') self.api_base_url = urlparse.urljoin(self.base_url, 'api/v1/') + self.list_measurements_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/') def login(self, credentials): """ Login the the website. """ logger.debug("Attempting to login to SCC, username %s." % credentials[0]) - self.login_credentials = {'username': credentials[0], - 'password': credentials[1]} + login_credentials = {'username': credentials[0], + 'password': credentials[1]} logger.debug("Accessing login page at %s." % self.login_url) # Get upload form - login_page = self.session.get(self.login_url, auth=self.auth, verify=False) - - if login_page.status_code != 200: - logger.error('Could not access login pages. Status code %s' % login_page.status_code) - sys.exit(1) + login_page = self.session.get(self.login_url) logger.debug("Submiting credentials.") - # Submit the login data login_submit = self.session.post(self.login_url, - data=self.login_credentials, + data=login_credentials, headers={'X-CSRFToken': login_page.cookies['csrftoken'], - 'referer': self.login_url}, - verify=False, - auth=self.auth) + 'referer': self.login_url}) return login_submit def logout(self): @@ -80,9 +74,7 @@ """ Upload a filename for processing with a specific system. If the upload is successful, it returns the measurement id. """ # Get submit page - upload_page = self.session.get(self.upload_url, - auth=self.auth, - verify=False) + upload_page = self.session.get(self.upload_url) # Submit the data upload_data = {'system': system_id} @@ -94,9 +86,7 @@ data=upload_data, files=files, headers={'X-CSRFToken': upload_page.cookies['csrftoken'], - 'referer': self.upload_url,}, - verify=False, - auth=self.auth) + 'referer': self.upload_url}) if upload_submit.status_code != 200: logger.warning("Connection error. Status code: %s" % upload_submit.status_code) @@ -118,9 +108,10 @@ files etc. """ # Get the file - request = self.session.get(download_url, auth=self.auth, - verify=False, - stream=True) + request = self.session.get(download_url, stream=True) + + if not request.ok: + raise Exception("Could not download files for measurement '%s'" % measurement_id) # Create the dir if it does not exist local_dir = os.path.join(self.output_dir, measurement_id, subdir) @@ -154,7 +145,7 @@ def download_optical(self, measurement_id): """ Download optical files for the measurement id. """ # Construct the download url - download_url = self.download_optical.format(measurement_id) + download_url = self.download_optical_pattern.format(measurement_id) self.download_files(measurement_id, 'scc_optical', download_url) def download_graphs(self, measurement_id): @@ -167,9 +158,7 @@ measurement = self.get_measurement(measurement_id) if measurement: - request = self.session.get(measurement.rerun_processing_url, auth=self.auth, - verify=False, - stream=True) + request = self.session.get(measurement.rerun_processing_url, stream=True) if request.status_code != 200: logger.error( @@ -188,9 +177,7 @@ if measurement: logger.debug("Attempting to rerun all processing through %s." % measurement.rerun_all_url) - request = self.session.get(measurement.rerun_all_url, auth=self.auth, - verify=False, - stream=True) + request = self.session.get(measurement.rerun_all_url, stream=True) if request.status_code != 200: logger.error("Could not rerun pre processing for %s. Status code: %s" % @@ -200,7 +187,7 @@ if monitor: self.monitor_processing(measurement_id) - def process(self, filename, system_id): + def process(self, filename, system_id, monitor): """ Upload a file for processing and wait for the processing to finish. If the processing is successful, it will download all produced files. """ @@ -208,8 +195,9 @@ # Upload file measurement_id = self.upload_file(filename, system_id) - measurement = self.monitor_processing(measurement_id) - return measurement + if monitor: + return self.monitor_processing(measurement_id) + return None def monitor_processing(self, measurement_id): """ Monitor the processing progress of a measurement id""" @@ -240,9 +228,7 @@ """ Get the processing status for a measurement id through the API. """ measurement_url = urlparse.urljoin(self.api_base_url, 'measurements/?id__exact=%s' % measurement_id) - response = self.session.get(measurement_url, - auth=self.auth, - verify=False) + response = self.session.get(measurement_url) response_dict = response.json() @@ -257,18 +243,16 @@ def get_measurement(self, measurement_id): measurement_url = urlparse.urljoin(self.api_base_url, 'measurements/%s/' % measurement_id) - response = self.session.get(measurement_url, - auth=self.auth, - verify=False) + response = self.session.get(measurement_url) - if response.status_code != 200: + if not response.ok: logger.error('Could not access API. Status code %s.' % response.status_code) - sys.exit(1) + return None response_dict = response.json() if response_dict: - measurement = Measurement(self.base_url,response_dict) + measurement = Measurement(self.base_url, response_dict) return measurement else: logger.error("No measurement with id %s found on the SCC." % measurement_id) @@ -290,11 +274,9 @@ return None # Go the the page confirming the deletion - delete_url = self.delete_measurement_pattern.format(measurement.id) + delete_url = self.delete_measurement_pattern.format(measurement_id) - confirm_page = self.session.get(delete_url, - auth=self.auth, - verify=False) + confirm_page = self.session.get(delete_url) # Check that the page opened properly if confirm_page.status_code != 200: @@ -303,8 +285,6 @@ # Delete the measurement delete_page = self.session.post(delete_url, - auth=self.auth, - verify=False, data={'post': 'yes'}, headers={'X-CSRFToken': confirm_page.cookies['csrftoken'], 'referer': delete_url} @@ -320,9 +300,7 @@ def available_measurements(self): """ Get a list of available measurement on the SCC. """ measurement_url = urlparse.urljoin(self.api_base_url, 'measurements') - response = self.session.get(measurement_url, - auth=self.auth, - verify=False) + response = self.session.get(measurement_url) response_dict = response.json() measurements = None @@ -335,6 +313,40 @@ return measurements + def list_measurements(self, station=None, system=None, start=None, stop=None, upload_status=None, + processing_status=None, optical_processing=None): + + # Need to set to empty string if not specified, we won't get any results + params = { + "station": station if station is not None else "", + "system": system if system is not None else "", + "stop": stop if stop is not None else "", + "start": start if start is not None else "", + "upload_status": upload_status if upload_status is not None else "", + "preprocessing_status": processing_status if processing_status is not None else "", + "optical_processing_status": optical_processing if optical_processing is not None else "" + } + resp = self.session.get(self.list_measurements_pattern, params=params).text + tbl_rgx = re.compile(r'<table id="measurements">(.*?)</table>', re.DOTALL) + entry_rgx = re.compile(r'<tr>(.*?)</tr>', re.DOTALL) + measurement_rgx = re.compile( + r'.*?<td><a[^>]*>(\w+)</a>.*?<td>.*?<td>([\w-]+ [\w:]+)</td>.*<td data-order="([-]?\d+),([-]?\d+),([-]?\d+)".*', + re.DOTALL) + matches = tbl_rgx.findall(resp) + if len(matches) != 1: + return [] + + ret = [] + for entry in entry_rgx.finditer(matches[0]): + m = measurement_rgx.match(entry.string[entry.start(0):entry.end(0)]) + if m: + name, date, upload, preproc, optical = m.groups() + ret.append( + Measurement(self.base_url, {"id": name, "upload": int(upload), "pre_processing": int(preproc), + "processing": int(optical)})) + + return ret + def measurement_id_for_date(self, t1, call_sign='bu', base_number=0): """ Give the first available measurement id on the SCC for the specific date. @@ -342,9 +354,7 @@ date_str = t1.strftime('%Y%m%d') search_url = urlparse.urljoin(self.api_base_url, 'measurements/?id__startswith=%s' % date_str) - response = self.session.get(search_url, - auth=self.auth, - verify=False) + response = self.session.get(search_url) response_dict = response.json() @@ -426,63 +436,94 @@ return measurement_id -def process_file(filename, system_id, settings): +def process_file(filename, system_id, monitor, settings): """ Shortcut function to process a file to the SCC. """ logger.info("Processing file %s, using sytem %s" % (filename, system_id)) - scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) - measurement = scc.process(filename, system_id) + measurement = scc.process(filename, system_id, monitor) scc.logout() return measurement -def delete_measurement(measurement_id, settings): - """ Shortcut function to delete a measurement from the SCC. """ - logger.info("Deleting %s" % measurement_id) +def delete_measurement(measurement_ids, settings): + """ Shortcut function to delete measurements from the SCC. """ + scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) + scc.login(settings['website_credentials']) + for m_id in measurement_ids: + logger.info("Deleting %s" % m_id) + scc.delete_measurement(m_id) + scc.logout() + + +def rerun_all(measurement_ids, monitor, settings): + """ Shortcut function to rerun measurements from the SCC. """ scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) - scc.delete_measurement(measurement_id) + for m_id in measurement_ids: + logger.info("Rerunning all products for %s" % m_id) + scc.rerun_all(m_id, monitor) scc.logout() -def rerun_all(measurement_id, monitor, settings): +def rerun_processing(measurement_ids, monitor, settings): """ Shortcut function to delete a measurement from the SCC. """ - logger.info("Rerunning all products for %s" % measurement_id) scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) - scc.rerun_all(measurement_id, monitor) + for m_id in measurement_ids: + logger.info("Rerunning (optical) processing for %s" % m_id) + scc.rerun_processing(m_id, monitor) + scc.logout() + + +def list_measurements(settings, station=None, system=None, start=None, stop=None, upload_status=None, + preprocessing_status=None, + optical_processing=None): + """List all available measurements""" + scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) + scc.login(settings['website_credentials']) + ret = scc.list_measurements(station=station, system=system, start=start, stop=stop, upload_status=upload_status, + processing_status=preprocessing_status, optical_processing=optical_processing) + for entry in ret: + print("%s" % entry.id) scc.logout() -def rerun_processing(measurement_id, monitor, settings): - """ Shortcut function to delete a measurement from the SCC. """ - logger.info("Rerunning (optical) processing for %s" % measurement_id) - +def download_measurements(measurement_ids, download_preproc, download_optical, download_graph, settings): + """Download all measurements for the specified IDs""" scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) - scc.rerun_processing(measurement_id, monitor) - scc.logout() + for m_id in measurement_ids: + if download_preproc: + logger.info("Downloading preprocessed files for '%s'" % m_id) + scc.download_preprocessed(m_id) + logger.info("Complete") + if download_optical: + logger.info("Downloading optical files for '%s'" % m_id) + scc.download_optical(m_id) + logger.info("Complete") + if download_graph: + logger.info("Downloading profile graph files for '%s'" % m_id) + scc.download_graphs(m_id) + logger.info("Complete") -def import_settings(config_file_path): +def settings_from_path(config_file_path): """ Read the configuration file. The file should be in YAML syntax.""" if not os.path.isfile(config_file_path): - logger.error("Wrong path for configuration file (%s)" % config_file_path) - sys.exit(1) + raise argparse.ArgumentTypeError("Wrong path for configuration file (%s)" % config_file_path) with open(config_file_path) as yaml_file: try: settings = yaml.safe_load(yaml_file) logger.debug("Read settings file(%s)" % config_file_path) - except: - logger.error("Could not parse YAML file (%s)" % config_file_path) - sys.exit(1) + except Exception: + raise argparse.ArgumentTypeError("Could not parse YAML file (%s)" % config_file_path) # YAML limitation: does not read tuples settings['basic_credentials'] = tuple(settings['basic_credentials']) @@ -490,17 +531,122 @@ return settings +# Setup for command specific parsers +def setup_delete(parser): + def delete_from_args(parsed): + delete_measurement(parsed.IDs, parsed.config) + + parser.add_argument("IDs", nargs="+", help="measurement IDs to delete.") + parser.set_defaults(execute=delete_from_args) + + +def setup_rerun_all(parser): + def rerun_all_from_args(parsed): + rerun_all(parsed.IDs, parsed.process, parsed.config) + + parser.add_argument("IDs", nargs="+", help="Measurement IDs to rerun.") + parser.add_argument("-p", "--process", help="Wait for the results of the processing.", + action="store_true") + parser.set_defaults(execute=rerun_all_from_args) + + +def setup_rerun_processing(parser): + def rerun_processing_from_args(parsed): + rerun_processing(parsed.IDs, parsed.process, parsed.config) + + parser.add_argument("IDs", nargs="+", help="Measurement IDs to rerun the processing on.") + parser.add_argument("-p", "--process", help="Wait for the results of the processing.", + action="store_true") + parser.set_defaults(execute=rerun_processing_from_args) + + +def setup_process_file(parser): + def process_file_from_args(parsed): + process_file(parsed.file, parsed.system, parsed.process, parsed.config) + + parser.add_argument("filename", help="Measurement file name or path.") + parser.add_argument("system", help="Processing system id.") + parser.add_argument("-p", "--process", help="Wait for the results of the processing.", + action="store_true") + parser.set_defaults(execute=process_file_from_args) + + +def setup_upload_file(parser): + def upload_file_from_args(parsed): + upload_file(parsed.file, parsed.system, parsed.config) + + parser.add_argument("filename", help="Measurement file name or path.") + parser.add_argument("system", help="Processing system id.") + parser.set_defaults(execute=upload_file_from_args) + + +def setup_list_measurements(parser): + def list_measurements_from_args(parsed): + list_measurements(parsed.config, station=parsed.station, system=parsed.system, start=parsed.start, + stop=parsed.stop, + upload_status=parsed.upload_status, preprocessing_status=parsed.preprocessing_status, + optical_processing=parsed.optical_processing_status) + + def status(arg): + if -127 <= int(arg) <= 127: + return arg + else: + raise argparse.ArgumentTypeError("Status must be between -127 and 127") + + def date(arg): + if re.match(r'\d{4}-\d{2}-\d{2}', arg): + return arg + else: + raise argparse.ArgumentTypeError("Date must be in format 'YYYY-MM-DD'") + + parser.add_argument("--station", help="Filter for only the selected station") + parser.add_argument("--system", help="Filter for only the selected station") + parser.add_argument("--start", help="Filter for only the selected station", type=date) + parser.add_argument("--stop", help="Filter for only the selected station", type=date) + parser.add_argument("--upload-status", help="Filter for only the selected station", type=status) + parser.add_argument("--preprocessing-status", help="Filter for only the selected station", type=status) + parser.add_argument("--optical-processing-status", help="Filter for only the selected station", type=status) + parser.set_defaults(execute=list_measurements_from_args) + + +def setup_download_measurements(parser): + def download_measurements_from_args(parsed): + preproc = parsed.download_preprocessed + optical = parsed.download_optical + graphs = parsed.download_profile_graphs + if not preproc and not graphs: + optical = True + download_measurements(parsed.IDs, preproc, optical, graphs, parsed.config) + + parser.add_argument("IDs", help="Measurement IDs that should be downloaded.", nargs="+") + parser.add_argument("--download-preprocessed", action="store_true", help="Download preprocessed files.") + parser.add_argument("--download-optical", action="store_true", + help="Download optical files (default if no other download is used).") + parser.add_argument("--download-profile-graphs", action="store_true", help="Download profile graph files.") + parser.set_defaults(execute=download_measurements_from_args) + + def main(): # Define the command line arguments. parser = argparse.ArgumentParser() - parser.add_argument("filename", nargs='?', help="Measurement file name or path.", default='') - parser.add_argument("system", nargs='?', help="Processing system id.", default=0) - parser.add_argument("-c", "--config", nargs='?', help="Path to configuration file") - parser.add_argument("-p", "--process", help="Wait for the results of the processing.", - action="store_true") - parser.add_argument("--delete", help="Measurement ID to delete.") - parser.add_argument("--rerun-all", help="Measurement ID to rerun.") - parser.add_argument("--rerun-processing", help="Measurement ID to rerun processing routines.") + subparsers = parser.add_subparsers() + + delete_parser = subparsers.add_parser("delete", help="Deletes a measurement.") + rerun_all_parser = subparsers.add_parser("rerun-all", help="Rerun a measurement.") + rerun_processing_parser = subparsers.add_parser("rerun-processing", + help="Rerun processing routings for a measurement.") + process_file_parser = subparsers.add_parser("process-file", help="Process a file.") + upload_file_parser = subparsers.add_parser("upload-file", help="Upload a file.") + list_parser = subparsers.add_parser("list", help="List all measurements.") + download_parser = subparsers.add_parser("download", help="Download selected measurements.") + + setup_delete(delete_parser) + setup_rerun_all(rerun_all_parser) + setup_rerun_processing(rerun_processing_parser) + setup_process_file(process_file_parser) + setup_upload_file(upload_file_parser) + setup_list_measurements(list_parser) + setup_download_measurements(download_parser) # Verbosity settings from http://stackoverflow.com/a/20663028 parser.add_argument('-d', '--debug', help="Print debugging information.", action="store_const", @@ -510,26 +656,20 @@ dest="loglevel", const=logging.WARNING ) + home = os.path.expanduser("~") + default_config_location = os.path.abspath(os.path.join(home, ".scc_access.yaml")) + parser.add_argument("-c", "--config", help="Path to the config file.", type=settings_from_path, + default=default_config_location) + args = parser.parse_args() # Get the logger with the appropriate level logging.basicConfig(format='%(levelname)s: %(message)s', level=args.loglevel) - settings = import_settings(args.config) + # Dispatch to appropriate function + args.execute(args) + - # If the arguments are OK, try to log-in to SCC and upload. - if args.delete: - # If the delete is provided, do nothing else - delete_measurement(args.delete, settings) - elif args.rerun_all: - rerun_all(args.rerun_all, args.process, settings) - elif args.rerun_processing: - rerun_processing(args.rerun_processing, args.process, settings) - else: - if (args.filename == '') or (args.system == 0): - parser.error('Provide a valid filename and system parameters.\nRun with -h for help.\n') - - if args.process: - process_file(args.filename, args.system, settings) - else: - upload_file(args.filename, args.system, settings) +# When running through terminal +if __name__ == '__main__': + main()