Thu, 11 Oct 2018 12:11:52 +0300
Minor: changed one error level.
import requests requests.packages.urllib3.disable_warnings() import sys import urlparse import argparse import os import re import time import StringIO from zipfile import ZipFile import datetime import logging import yaml logger = logging.getLogger(__name__) # The regex to find the measurement id from the measurement page # This should be read from the uploaded file, but would require an extra NetCDF module. regex = "<h3>Measurement (?P<measurement_id>.{12}) <small>" class SCC: """ A simple class that will attempt to upload a file on the SCC server. The uploading is done by simulating a normal browser session. In the current version no check is performed, and no feedback is given if the upload was successful. If everything is setup correctly, it will work. """ def __init__(self, auth, output_dir, base_url): self.auth = auth self.output_dir = output_dir self.base_url = base_url self.session = requests.Session() self.construct_urls() def construct_urls(self): """ Construct all URLs needed for processing. """ # Construct the absolute URLs self.login_url = urlparse.urljoin(self.base_url, 'accounts/login/') self.upload_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/quick/') self.download_preprocessed_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/download-preprocessed/') self.download_optical_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/download-optical/') self.download_graph_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/download-plots/') self.delete_measurement_pattern = urlparse.urljoin(self.base_url, 'admin/database/measurements/{0}/delete/') self.api_base_url = urlparse.urljoin(self.base_url, 'api/v1/') def login(self, credentials): """ Login the the website. """ logger.debug("Attempting to login to SCC, username %s." % credentials[0]) self.login_credentials = {'username': credentials[0], 'password': credentials[1]} logger.debug("Accessing login page at %s." % self.login_url) # Get upload form login_page = self.session.get(self.login_url, auth=self.auth, verify=False) if login_page.status_code != 200: logger.error('Could not access login pages. Status code %s' % login_page.status_code) sys.exit(1) logger.debug("Submiting credentials.") # Submit the login data login_submit = self.session.post(self.login_url, data=self.login_credentials, headers={'X-CSRFToken': login_page.cookies['csrftoken'], 'referer': self.login_url}, verify=False, auth=self.auth) return login_submit def logout(self): pass def upload_file(self, filename, system_id): """ Upload a filename for processing with a specific system. If the upload is successful, it returns the measurement id. """ # Get submit page upload_page = self.session.get(self.upload_url, auth=self.auth, verify=False) # Submit the data upload_data = {'system': system_id} files = {'data': open(filename, 'rb')} logger.info("Uploading of file %s started." % filename) upload_submit = self.session.post(self.upload_url, data=upload_data, files=files, headers={'X-CSRFToken': upload_page.cookies['csrftoken'], 'referer': self.upload_url,}, verify=False, auth=self.auth) if upload_submit.status_code != 200: logger.warning("Connection error. Status code: %s" % upload_submit.status_code) return False # Check if there was a redirect to a new page. if upload_submit.url == self.upload_url: measurement_id = False logger.error("Uploaded file rejected! Try to upload manually to see the error.") else: measurement_id = re.findall(regex, upload_submit.text)[0] logger.info("Successfully uploaded measurement with id %s." % measurement_id) return measurement_id def download_files(self, measurement_id, subdir, download_url): """ Downloads some files from the download_url to the specified subdir. This method is used to download preprocessed file, optical files etc. """ # Get the file request = self.session.get(download_url, auth=self.auth, verify=False, stream=True) # Create the dir if it does not exist local_dir = os.path.join(self.output_dir, measurement_id, subdir) if not os.path.exists(local_dir): os.makedirs(local_dir) # Save the file by chunk, needed if the file is big. memory_file = StringIO.StringIO() for chunk in request.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks memory_file.write(chunk) memory_file.flush() zip_file = ZipFile(memory_file) for ziped_name in zip_file.namelist(): basename = os.path.basename(ziped_name) local_file = os.path.join(local_dir, basename) with open(local_file, 'wb') as f: f.write(zip_file.read(ziped_name)) def download_preprocessed(self, measurement_id): """ Download preprocessed files for the measurement id. """ # Construct the download url download_url = self.download_preprocessed_pattern.format(measurement_id) self.download_files(measurement_id, 'scc_preprocessed', download_url) def download_optical(self, measurement_id): """ Download optical files for the measurement id. """ # Construct the download url download_url = self.download_optical_pattern.format(measurement_id) self.download_files(measurement_id, 'scc_optical', download_url) def download_graphs(self, measurement_id): """ Download profile graphs for the measurement id. """ # Construct the download url download_url = self.download_graph_pattern.format(measurement_id) self.download_files(measurement_id, 'scc_plots', download_url) def rerun_processing(self, measurement_id, monitor=True): measurement = self.get_measurement(measurement_id) if measurement: request = self.session.get(measurement.rerun_processing_url, auth=self.auth, verify=False, stream=True) if request.status_code != 200: logger.error( "Could not rerun processing for %s. Status code: %s" % (measurement_id, request.status_code)) return if monitor: self.monitor_processing(measurement_id) def rerun_all(self, measurement_id, monitor=True): logger.debug("Started rerun_all procedure.") logger.debug("Getting measurement %s" % measurement_id) measurement = self.get_measurement(measurement_id) if measurement: logger.debug("Attempting to rerun all processing through %s." % measurement.rerun_all_url) request = self.session.get(measurement.rerun_all_url, auth=self.auth, verify=False, stream=True) if request.status_code != 200: logger.error("Could not rerun pre processing for %s. Status code: %s" % (measurement_id, request.status_code)) return if monitor: self.monitor_processing(measurement_id) def process(self, filename, system_id): """ Upload a file for processing and wait for the processing to finish. If the processing is successful, it will download all produced files. """ logger.info("--- Processing started on %s. ---" % datetime.datetime.now()) # Upload file measurement_id = self.upload_file(filename, system_id) measurement = self.monitor_processing(measurement_id) return measurement def monitor_processing(self, measurement_id): """ Monitor the processing progress of a measurement id""" measurement = self.get_measurement(measurement_id) if measurement is not None: while measurement.is_running: logger.info("Measurement is being processed (status: %s, %s, %s). Please wait." % (measurement.upload, measurement.pre_processing, measurement.processing)) time.sleep(10) measurement = self.get_measurement(measurement_id) logger.info("Measurement processing finished (status: %s, %s, %s)." % (measurement.upload, measurement.pre_processing, measurement.processing)) if measurement.pre_processing == 127: logger.info("Downloading preprocessed files.") self.download_preprocessed(measurement_id) if measurement.processing == 127: logger.info("Downloading optical files.") self.download_optical(measurement_id) logger.info("Downloading graphs.") self.download_graphs(measurement_id) logger.info("--- Processing finished. ---") return measurement def get_status(self, measurement_id): """ Get the processing status for a measurement id through the API. """ measurement_url = urlparse.urljoin(self.api_base_url, 'measurements/?id__exact=%s' % measurement_id) response = self.session.get(measurement_url, auth=self.auth, verify=False) response_dict = response.json() if response_dict['objects']: measurement_list = response_dict['objects'] measurement = Measurement(self.base_url, measurement_list[0]) return measurement.upload, measurement.pre_processing, measurement.processing else: logger.error("No measurement with id %s found on the SCC." % measurement_id) return None def get_measurement(self, measurement_id): measurement_url = urlparse.urljoin(self.api_base_url, 'measurements/%s/' % measurement_id) response = self.session.get(measurement_url, auth=self.auth, verify=False) if response.status_code != 200: logger.error('Could not access API. Status code %s.' % response.status_code) sys.exit(1) response_dict = response.json() if response_dict: measurement = Measurement(self.base_url,response_dict) return measurement else: logger.error("No measurement with id %s found on the SCC." % measurement_id) return None def delete_measurement(self, measurement_id): """ Deletes a measurement with the provided measurement id. The user should have the appropriate permissions. The procedures is performed directly through the web interface and NOT through the API. """ # Get the measurement object measurement = self.get_measurement(measurement_id) # Check that it exists if measurement is None: logger.warning("Nothing to delete.") return None # Go the the page confirming the deletion delete_url = self.delete_measurement_pattern.format(measurement.id) confirm_page = self.session.get(delete_url, auth=self.auth, verify=False) # Check that the page opened properly if confirm_page.status_code != 200: logger.warning("Could not open delete page. Status: {0}".format(confirm_page.status_code)) return None # Delete the measurement delete_page = self.session.post(delete_url, auth=self.auth, verify=False, data={'post': 'yes'}, headers={'X-CSRFToken': confirm_page.cookies['csrftoken'], 'referer': delete_url} ) if delete_page.status_code != 200: logger.warning("Something went wrong. Delete page status: {0}".format( delete_page.status_code)) return None logger.info("Deleted measurement {0}".format(measurement_id)) return True def available_measurements(self): """ Get a list of available measurement on the SCC. """ measurement_url = urlparse.urljoin(self.api_base_url, 'measurements') response = self.session.get(measurement_url, auth=self.auth, verify=False) response_dict = response.json() measurements = None if response_dict: measurement_list = response_dict['objects'] measurements = [Measurement(self.base_url, measurement_dict) for measurement_dict in measurement_list] logger.info("Found %s measurements on the SCC." % len(measurements)) else: logger.warning("No response received from the SCC when asked for available measurements.") return measurements def measurement_id_for_date(self, t1, call_sign='bu', base_number=0): """ Give the first available measurement id on the SCC for the specific date. """ date_str = t1.strftime('%Y%m%d') search_url = urlparse.urljoin(self.api_base_url, 'measurements/?id__startswith=%s' % date_str) response = self.session.get(search_url, auth=self.auth, verify=False) response_dict = response.json() measurement_id = None if response_dict: measurement_list = response_dict['objects'] existing_ids = [measurement_dict['id'] for measurement_dict in measurement_list] measurement_number = base_number measurement_id = "%s%s%02i" % (date_str, call_sign, measurement_number) while measurement_id in existing_ids: measurement_number = measurement_number + 1 measurement_id = "%s%s%02i" % (date_str, call_sign, measurement_number) if measurement_number == 100: raise ValueError('No available measurement id found.') return measurement_id class ApiObject: """ A generic class object. """ def __init__(self, base_url, dict_response): self.base_url = base_url if dict_response: # Add the dictionary key value pairs as object properties for key, value in dict_response.items(): setattr(self, key, value) self.exists = True else: self.exists = False class Measurement(ApiObject): """ This class represents the measurement object as returned in the SCC API. """ @property def is_running(self): """ Returns True if the processing has not finished. """ if self.upload == 0: return False if self.pre_processing == -127: return False if self.pre_processing == 127: if self.processing in [127, -127]: return False return True @property def rerun_processing_url(self): url_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/rerun-optical/') return url_pattern.format(self.id) @property def rerun_all_url(self): ulr_pattern = urlparse.urljoin(self.base_url, 'data_processing/measurements/{0}/rerun-all/') return ulr_pattern.format(self.id) def __str__(self): return "%s: %s, %s, %s" % (self.id, self.upload, self.pre_processing, self.processing) def upload_file(filename, system_id, settings): """ Shortcut function to upload a file to the SCC. """ logger.info("Uploading file %s, using sytem %s" % (filename, system_id)) scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) measurement_id = scc.upload_file(filename, system_id) scc.logout() return measurement_id def process_file(filename, system_id, settings): """ Shortcut function to process a file to the SCC. """ logger.info("Processing file %s, using sytem %s" % (filename, system_id)) scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) measurement = scc.process(filename, system_id) scc.logout() return measurement def delete_measurement(measurement_id, settings): """ Shortcut function to delete a measurement from the SCC. """ logger.info("Deleting %s" % measurement_id) scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) scc.delete_measurement(measurement_id) scc.logout() def rerun_all(measurement_id, monitor, settings): """ Shortcut function to delete a measurement from the SCC. """ logger.info("Rerunning all products for %s" % measurement_id) scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) scc.rerun_all(measurement_id, monitor) scc.logout() def rerun_processing(measurement_id, monitor, settings): """ Shortcut function to delete a measurement from the SCC. """ logger.info("Rerunning (optical) processing for %s" % measurement_id) scc = SCC(settings['basic_credentials'], settings['output_dir'], settings['base_url']) scc.login(settings['website_credentials']) scc.rerun_processing(measurement_id, monitor) scc.logout() def import_settings(config_file_path): """ Read the configuration file. The file should be in YAML syntax.""" if not os.path.isfile(config_file_path): logger.error("Wrong path for configuration file (%s)" % config_file_path) sys.exit(1) with open(config_file_path) as yaml_file: try: settings = yaml.safe_load(yaml_file) logger.debug("Read settings file(%s)" % config_file_path) except: logger.error("Could not parse YAML file (%s)" % config_file_path) sys.exit(1) # YAML limitation: does not read tuples settings['basic_credentials'] = tuple(settings['basic_credentials']) settings['website_credentials'] = tuple(settings['website_credentials']) return settings def main(): # Define the command line arguments. parser = argparse.ArgumentParser() parser.add_argument("config", help="Path to configuration file") parser.add_argument("filename", nargs='?', help="Measurement file name or path.", default='') parser.add_argument("system", nargs='?', help="Processing system id.", default=0) parser.add_argument("-p", "--process", help="Wait for the results of the processing.", action="store_true") parser.add_argument("--delete", help="Measurement ID to delete.") parser.add_argument("--rerun-all", help="Measurement ID to rerun.") parser.add_argument("--rerun-processing", help="Measurement ID to rerun processing routines.") # Verbosity settings from http://stackoverflow.com/a/20663028 parser.add_argument('-d', '--debug', help="Print debugging information.", action="store_const", dest="loglevel", const=logging.DEBUG, default=logging.INFO, ) parser.add_argument('-s', '--silent', help="Show only warning and error messages.", action="store_const", dest="loglevel", const=logging.WARNING ) args = parser.parse_args() # Get the logger with the appropriate level logging.basicConfig(format='%(levelname)s: %(message)s', level=args.loglevel) settings = import_settings(args.config) # If the arguments are OK, try to log-in to SCC and upload. if args.delete: # If the delete is provided, do nothing else delete_measurement(args.delete, settings) elif args.rerun_all: rerun_all(args.rerun_all, args.process, settings) elif args.rerun_processing: rerun_processing(args.rerun_processing, args.process, settings) else: if (args.filename == '') or (args.system == 0): parser.error('Provide a valid filename and system parameters.\nRun with -h for help.\n') if args.process: process_file(args.filename, args.system, settings) else: upload_file(args.filename, args.system, settings)