victor@7: import requests
moritz@29:
ioannis@17: try:
ioannis@17: import urllib.parse as urlparse # Python 3
ioannis@17: except ImportError:
ioannis@17: import urlparse # Python 2
victor@7:
victor@7: import argparse
madrouin@24: import datetime
madrouin@24: import logging
victor@7: import os
victor@7: import re
ioannis@32: from io import BytesIO
madrouin@24: import sys
victor@7: import time
i@40:
victor@7: from zipfile import ZipFile
madrouin@24:
i@14: import yaml
victor@7:
madrouin@24: requests.packages.urllib3.disable_warnings()
i@14: logger = logging.getLogger(__name__)
victor@7:
victor@7: # The regex to find the measurement id from the measurement page
victor@7: # This should be read from the uploaded file, but would require an extra NetCDF module.
i@36: regex = "
Measurement (?P.{12,15}) " # {12, 15} to handle both old- and new-style measurement ids.
victor@7:
victor@7:
victor@7: class SCC:
madrouin@24: """A simple class that will attempt to upload a file on the SCC server.
i@14:
victor@7: The uploading is done by simulating a normal browser session. In the current
madrouin@20: version no check is performed, and no feedback is given if the upload
madrouin@20: was successful. If everything is setup correctly, it will work.
victor@7: """
victor@7:
i@14: def __init__(self, auth, output_dir, base_url):
moritz@29:
victor@7: self.auth = auth
victor@7: self.output_dir = output_dir
i@14: self.base_url = base_url
victor@7: self.session = requests.Session()
moritz@29: self.session.auth = auth
moritz@29: self.session.verify = False
i@31:
i@14: self.login_url = urlparse.urljoin(self.base_url, 'accounts/login/')
i@31: self.logout_url = urlparse.urljoin(self.base_url, 'accounts/logout/')
i@31: self.list_measurements_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/')
i@31:
i@14: self.upload_url = urlparse.urljoin(self.base_url, 'data_processing/measurements/quick/')
ioannis@34: self.download_hirelpp_pattern = urlparse.urljoin(self.base_url,
ioannis@34: 'data_processing/measurements/{0}/download-hirelpp/')
ioannis@34: self.download_cloudmask_pattern = urlparse.urljoin(self.base_url,
ioannis@34: 'data_processing/measurements/{0}/download-cloudmask/')
ioannis@34:
moritz@29: self.download_preprocessed_pattern = urlparse.urljoin(self.base_url,
moritz@29: 'data_processing/measurements/{0}/download-preprocessed/')
moritz@29: self.download_optical_pattern = urlparse.urljoin(self.base_url,
moritz@29: 'data_processing/measurements/{0}/download-optical/')
moritz@29: self.download_graph_pattern = urlparse.urljoin(self.base_url,
moritz@29: 'data_processing/measurements/{0}/download-plots/')
ioannis@34: self.download_elic_pattern = urlparse.urljoin(self.base_url,
ioannis@34: 'data_processing/measurements/{0}/download-elic/')
i@14: self.delete_measurement_pattern = urlparse.urljoin(self.base_url, 'admin/database/measurements/{0}/delete/')
i@31:
i@14: self.api_base_url = urlparse.urljoin(self.base_url, 'api/v1/')
i@31: self.api_measurement_pattern = urlparse.urljoin(self.api_base_url, 'measurements/{0}/')
i@31: self.api_measurements_url = urlparse.urljoin(self.api_base_url, 'measurements')
i@38: self.api_sounding_search_pattern = urlparse.urljoin(self.api_base_url, 'sounding_files/?filename={0}')
i@38: self.api_lidarratio_search_pattern = urlparse.urljoin(self.api_base_url, 'lidarratio_files/?filename={0}')
i@38: self.api_overlap_search_pattern = urlparse.urljoin(self.api_base_url, 'overlap_files/?filename={0}')
i@14:
i@14: def login(self, credentials):
i@31: """ Login to SCC. """
victor@7: logger.debug("Attempting to login to SCC, username %s." % credentials[0])
moritz@29: login_credentials = {'username': credentials[0],
moritz@29: 'password': credentials[1]}
victor@7:
i@14: logger.debug("Accessing login page at %s." % self.login_url)
victor@7:
victor@7: # Get upload form
moritz@29: login_page = self.session.get(self.login_url)
victor@7:
i@31: if not login_page.ok:
i@31: raise self.PageNotAccessibleError('Could not access login pages. Status code %s' % login_page.status_code)
victor@7:
i@31: logger.debug("Submitting credentials.")
victor@7: # Submit the login data
i@14: login_submit = self.session.post(self.login_url,
moritz@29: data=login_credentials,
victor@7: headers={'X-CSRFToken': login_page.cookies['csrftoken'],
moritz@29: 'referer': self.login_url})
victor@7: return login_submit
victor@7:
victor@7: def logout(self):
i@31: """ Logout from SCC """
i@31: return self.session.get(self.logout_url, stream=True)
victor@7:
ioannis@32: def upload_file(self, filename, system_id, rs_filename=None, ov_filename=None, lr_filename=None):
madrouin@20: """ Upload a filename for processing with a specific system. If the
victor@7: upload is successful, it returns the measurement id. """
victor@7: # Get submit page
moritz@29: upload_page = self.session.get(self.upload_url)
victor@7:
victor@7: # Submit the data
victor@7: upload_data = {'system': system_id}
victor@7: files = {'data': open(filename, 'rb')}
victor@7:
madrouin@20: if rs_filename is not None:
i@38: ancillary_file, _ = self.get_ancillary(rs_filename, 'sounding')
i@38:
i@38: if ancillary_file.already_on_scc:
i@38: logger.warning("Sounding file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
i@38: else:
i@38: logger.debug('Adding sounding file %s' % rs_filename)
i@38: files['sounding_file'] = open(rs_filename, 'rb')
madrouin@20:
ioannis@32: if ov_filename is not None:
i@38: ancillary_file, _ = self.get_ancillary(ov_filename, 'overlap')
i@38:
i@38: if ancillary_file.already_on_scc:
i@38: logger.warning("Overlap file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
i@38: else:
i@38: logger.debug('Adding overlap file %s' % ov_filename)
i@38: files['overlap_file'] = open(ov_filename, 'rb')
i@31:
i@31: if lr_filename is not None:
i@38: ancillary_file, _ = self.get_ancillary(lr_filename, 'lidarratio')
i@38:
i@38: if ancillary_file.already_on_scc:
i@38: logger.warning(
i@38: "Lidar ratio file {0.filename} already on the SCC with id {0.id}. Ignoring it.".format(ancillary_file))
i@38: else:
i@38: logger.debug('Adding lidar ratio file %s' % lr_filename)
i@38: files['lidar_ratio_file'] = open(lr_filename, 'rb')
i@31:
ioannis@32: logger.info("Uploading of file(s) %s started." % filename)
victor@7:
i@14: upload_submit = self.session.post(self.upload_url,
victor@7: data=upload_data,
victor@7: files=files,
victor@7: headers={'X-CSRFToken': upload_page.cookies['csrftoken'],
moritz@29: 'referer': self.upload_url})
victor@7:
victor@7: if upload_submit.status_code != 200:
i@14: logger.warning("Connection error. Status code: %s" % upload_submit.status_code)
victor@7: return False
victor@7:
victor@7: # Check if there was a redirect to a new page.
i@14: if upload_submit.url == self.upload_url:
victor@7: measurement_id = False
ioannis@32: logger.error("Uploaded file(s) rejected! Try to upload manually to see the error.")
victor@7: else:
victor@7: measurement_id = re.findall(regex, upload_submit.text)[0]
madrouin@23: logger.info("Successfully uploaded measurement with id %s." % measurement_id)
victor@7:
victor@7: return measurement_id
victor@7:
victor@7: def download_files(self, measurement_id, subdir, download_url):
victor@7: """ Downloads some files from the download_url to the specified
madrouin@20: subdir. This method is used to download preprocessed file, optical
victor@7: files etc.
victor@7: """
ioannis@32: # TODO: Make downloading more robust (e.g. in case that files do not exist on server).
victor@7: # Get the file
moritz@29: request = self.session.get(download_url, stream=True)
moritz@29:
moritz@29: if not request.ok:
moritz@29: raise Exception("Could not download files for measurement '%s'" % measurement_id)
victor@7:
victor@7: # Create the dir if it does not exist
victor@7: local_dir = os.path.join(self.output_dir, measurement_id, subdir)
victor@7: if not os.path.exists(local_dir):
victor@7: os.makedirs(local_dir)
victor@7:
victor@7: # Save the file by chunk, needed if the file is big.
ioannis@32: memory_file = BytesIO()
victor@7:
victor@7: for chunk in request.iter_content(chunk_size=1024):
victor@7: if chunk: # filter out keep-alive new chunks
victor@7: memory_file.write(chunk)
victor@7: memory_file.flush()
victor@7:
victor@7: zip_file = ZipFile(memory_file)
victor@7:
victor@7: for ziped_name in zip_file.namelist():
victor@7: basename = os.path.basename(ziped_name)
victor@7:
victor@7: local_file = os.path.join(local_dir, basename)
victor@7:
victor@7: with open(local_file, 'wb') as f:
victor@7: f.write(zip_file.read(ziped_name))
victor@7:
ioannis@34: def download_hirelpp(self, measurement_id):
ioannis@34: """ Download HiRElPP files for the measurement id. """
ioannis@34: # Construct the download url
ioannis@34: download_url = self.download_hirelpp_pattern.format(measurement_id)
ioannis@34: self.download_files(measurement_id, 'hirelpp', download_url)
ioannis@34:
ioannis@34: def download_cloudmask(self, measurement_id):
ioannis@34: """ Download preprocessed files for the measurement id. """
ioannis@34: # Construct the download url
ioannis@34: download_url = self.download_cloudmask_pattern.format(measurement_id)
ioannis@34: self.download_files(measurement_id, 'cloudmask', download_url)
ioannis@34:
victor@7: def download_preprocessed(self, measurement_id):
victor@7: """ Download preprocessed files for the measurement id. """
victor@7: # Construct the download url
i@14: download_url = self.download_preprocessed_pattern.format(measurement_id)
victor@7: self.download_files(measurement_id, 'scc_preprocessed', download_url)
victor@7:
victor@7: def download_optical(self, measurement_id):
victor@7: """ Download optical files for the measurement id. """
victor@7: # Construct the download url
moritz@29: download_url = self.download_optical_pattern.format(measurement_id)
victor@7: self.download_files(measurement_id, 'scc_optical', download_url)
victor@7:
victor@7: def download_graphs(self, measurement_id):
victor@7: """ Download profile graphs for the measurement id. """
victor@7: # Construct the download url
i@14: download_url = self.download_graph_pattern.format(measurement_id)
victor@7: self.download_files(measurement_id, 'scc_plots', download_url)
victor@7:
ioannis@34: def download_elic(self, measurement_id):
ioannis@34: """ Download profile graphs for the measurement id. """
ioannis@34: # Construct the download url
ioannis@34: download_url = self.download_elic_pattern.format(measurement_id)
ioannis@34: self.download_files(measurement_id, 'elic', download_url)
ioannis@32:
victor@7: def rerun_processing(self, measurement_id, monitor=True):
madrouin@26: measurement, status = self.get_measurement(measurement_id)
victor@7:
victor@7: if measurement:
moritz@29: request = self.session.get(measurement.rerun_processing_url, stream=True)
victor@7:
victor@7: if request.status_code != 200:
i@14: logger.error(
i@14: "Could not rerun processing for %s. Status code: %s" % (measurement_id, request.status_code))
victor@7: return
victor@7:
victor@7: if monitor:
victor@7: self.monitor_processing(measurement_id)
victor@7:
victor@7: def rerun_all(self, measurement_id, monitor=True):
victor@7: logger.debug("Started rerun_all procedure.")
victor@7:
victor@7: logger.debug("Getting measurement %s" % measurement_id)
madrouin@26: measurement, status = self.get_measurement(measurement_id)
victor@7:
victor@7: if measurement:
victor@7: logger.debug("Attempting to rerun all processing through %s." % measurement.rerun_all_url)
victor@7:
moritz@29: request = self.session.get(measurement.rerun_all_url, stream=True)
victor@7:
victor@7: if request.status_code != 200:
victor@7: logger.error("Could not rerun pre processing for %s. Status code: %s" %
victor@7: (measurement_id, request.status_code))
victor@7: return
victor@7:
victor@7: if monitor:
victor@7: self.monitor_processing(measurement_id)
victor@7:
ioannis@32: def process(self, filename, system_id, monitor, rs_filename=None, lr_filename=None, ov_filename=None):
victor@7: """ Upload a file for processing and wait for the processing to finish.
victor@7: If the processing is successful, it will download all produced files.
victor@7: """
victor@7: logger.info("--- Processing started on %s. ---" % datetime.datetime.now())
victor@7: # Upload file
madrouin@26: logger.info("--- Uploading file")
ioannis@32: measurement_id = self.upload_file(filename, system_id,
ioannis@32: rs_filename=rs_filename,
ioannis@32: lr_filename=lr_filename,
ioannis@32: ov_filename=ov_filename)
victor@7:
ioannis@32: if measurement_id and monitor:
ioannis@32: logger.info("--- Monitoring processing")
moritz@29: return self.monitor_processing(measurement_id)
ioannis@32:
moritz@29: return None
victor@7:
victor@7: def monitor_processing(self, measurement_id):
victor@7: """ Monitor the processing progress of a measurement id"""
victor@7:
madrouin@24: # try to deal with error 404
madrouin@24: error_count = 0
madrouin@24: error_max = 6
madrouin@26: time_sleep = 10
madrouin@24:
madrouin@25: # try to wait for measurement to appear in API
madrouin@24: measurement = None
ioannis@32: logger.info("Looking for measurement %s in SCC", measurement_id)
madrouin@26: while error_count < error_max:
madrouin@26: time.sleep(time_sleep)
madrouin@26: measurement, status = self.get_measurement(measurement_id)
madrouin@26: if status != 200 and error_count < error_max:
ioannis@32: logger.error("Measurement not found. waiting %ds", time_sleep)
madrouin@24: error_count += 1
madrouin@26: else:
madrouin@26: break
madrouin@24:
madrouin@24: if error_count == error_max:
ioannis@32: logger.critical("Measurement %s doesn't seem to exist", measurement_id)
madrouin@26: sys.exit(1)
madrouin@26:
ioannis@32: logger.info('Measurement %s found', measurement_id)
madrouin@24:
victor@7: if measurement is not None:
victor@7: while measurement.is_running:
ioannis@34: logger.info("Measurement is being processed. Please wait.")
victor@7: time.sleep(10)
madrouin@26: measurement, status = self.get_measurement(measurement_id)
madrouin@24:
ioannis@34: logger.info("Measurement processing finished.")
ioannis@34: if measurement.hirelpp == 127:
ioannis@34: logger.info("Downloading hirelpp files.")
ioannis@34: self.download_hirelpp(measurement_id)
ioannis@34: if measurement.cloudmask == 127:
ioannis@34: logger.info("Downloading cloudmask files.")
ioannis@34: self.download_cloudmask(measurement_id)
ioannis@34: if measurement.elpp == 127:
victor@7: logger.info("Downloading preprocessed files.")
victor@7: self.download_preprocessed(measurement_id)
ioannis@34: if measurement.elda == 127:
victor@7: logger.info("Downloading optical files.")
victor@7: self.download_optical(measurement_id)
victor@7: logger.info("Downloading graphs.")
victor@7: self.download_graphs(measurement_id)
ioannis@34: if measurement.elic == 127:
ioannis@34: logger.info("Downloading preprocessed files.")
ioannis@34: self.download_elic(measurement_id)
victor@7: logger.info("--- Processing finished. ---")
victor@7: return measurement
victor@7:
victor@7: def get_measurement(self, measurement_id):
i@31: measurement_url = self.api_measurement_pattern.format(measurement_id)
i@31: logger.debug("Measurement API URL: %s" % measurement_url)
victor@7:
moritz@29: response = self.session.get(measurement_url)
victor@7:
moritz@29: if not response.ok:
i@14: logger.error('Could not access API. Status code %s.' % response.status_code)
madrouin@26: return None, response.status_code
i@14:
victor@7: response_dict = response.json()
victor@7:
victor@7: if response_dict:
moritz@29: measurement = Measurement(self.base_url, response_dict)
madrouin@26: return measurement, response.status_code
victor@7: else:
victor@7: logger.error("No measurement with id %s found on the SCC." % measurement_id)
madrouin@26: return None, response.status_code
victor@7:
victor@7: def delete_measurement(self, measurement_id):
victor@7: """ Deletes a measurement with the provided measurement id. The user
madrouin@20: should have the appropriate permissions.
madrouin@20:
victor@7: The procedures is performed directly through the web interface and
victor@7: NOT through the API.
victor@7: """
victor@7: # Get the measurement object
i@31: measurement, _ = self.get_measurement(measurement_id)
victor@7:
victor@7: # Check that it exists
victor@7: if measurement is None:
victor@7: logger.warning("Nothing to delete.")
victor@7: return None
victor@7:
victor@7: # Go the the page confirming the deletion
moritz@29: delete_url = self.delete_measurement_pattern.format(measurement_id)
victor@7:
moritz@29: confirm_page = self.session.get(delete_url)
victor@7:
victor@7: # Check that the page opened properly
victor@7: if confirm_page.status_code != 200:
victor@7: logger.warning("Could not open delete page. Status: {0}".format(confirm_page.status_code))
victor@7: return None
victor@7:
victor@7: # Delete the measurement
victor@7: delete_page = self.session.post(delete_url,
victor@7: data={'post': 'yes'},
victor@7: headers={'X-CSRFToken': confirm_page.cookies['csrftoken'],
victor@7: 'referer': delete_url}
victor@7: )
i@31: if not delete_page.ok:
victor@7: logger.warning("Something went wrong. Delete page status: {0}".format(
victor@7: delete_page.status_code))
victor@7: return None
victor@7:
victor@7: logger.info("Deleted measurement {0}".format(measurement_id))
victor@7: return True
victor@7:
victor@7: def available_measurements(self):
victor@7: """ Get a list of available measurement on the SCC. """
i@31: response = self.session.get(self.api_measurements_url)
victor@7: response_dict = response.json()
victor@7:
victor@7: if response_dict:
victor@7: measurement_list = response_dict['objects']
i@14: measurements = [Measurement(self.base_url, measurement_dict) for measurement_dict in measurement_list]
victor@7: logger.info("Found %s measurements on the SCC." % len(measurements))
victor@7: else:
victor@7: logger.warning("No response received from the SCC when asked for available measurements.")
i@31: measurements = None
victor@7:
victor@7: return measurements
victor@7:
moritz@29: def list_measurements(self, station=None, system=None, start=None, stop=None, upload_status=None,
moritz@29: processing_status=None, optical_processing=None):
moritz@29:
i@31: # TODO: Change this to work through the API
i@31:
moritz@29: # Need to set to empty string if not specified, we won't get any results
moritz@29: params = {
moritz@29: "station": station if station is not None else "",
moritz@29: "system": system if system is not None else "",
moritz@29: "stop": stop if stop is not None else "",
moritz@29: "start": start if start is not None else "",
moritz@29: "upload_status": upload_status if upload_status is not None else "",
moritz@29: "preprocessing_status": processing_status if processing_status is not None else "",
moritz@29: "optical_processing_status": optical_processing if optical_processing is not None else ""
moritz@29: }
i@31:
i@31: response_txt = self.session.get(self.list_measurements_url, params=params).text
moritz@29: tbl_rgx = re.compile(r'', re.DOTALL)
moritz@29: entry_rgx = re.compile(r'(.*?)
', re.DOTALL)
moritz@29: measurement_rgx = re.compile(
moritz@29: r'.*?]*>(\w+).*? | .*? | ([\w-]+ [\w:]+) | .* |