scc_access.py

Fri, 24 Nov 2017 18:50:29 +0200

author
Iannis <ulalume3@yahoo.com>
date
Fri, 24 Nov 2017 18:50:29 +0200
changeset 6
c02712d2ab9e
parent 4
809c63be8a40
permissions
-rw-r--r--

Updated script according to new database/api changes.

Added basic logging functions, instead of print statements.

#!/usr/bin/env python
"""
The MIT License (MIT)

Copyright (c) 2015, Ioannis Binietoglou

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

__version__ = "0.6.0"

# Try to read the settings from the settings.py file
try:
    from settings import *
except:
    raise ImportError(
        """A settings file (setting.py) is required to run the script. 
         You can use settings.sample.py as a template.""")

import requests
requests.packages.urllib3.disable_warnings()

import urlparse
import argparse
import os
import re
import time
import StringIO
from zipfile import ZipFile
import datetime
import logging

logger = logging.getLogger(__name__)


# Construct the absolute URLs
LOGIN_URL = urlparse.urljoin(BASE_URL, 'accounts/login/')
UPLOAD_URL = urlparse.urljoin(BASE_URL, 'data_processing/measurements/quick/')
DOWNLOAD_PREPROCESSED = urlparse.urljoin(BASE_URL, 'data_processing/measurements/{0}/download-preprocessed/')
DOWNLOAD_OPTICAL = urlparse.urljoin(BASE_URL, 'data_processing/measurements/{0}/download-optical/')
DOWNLOAD_GRAPH = urlparse.urljoin(BASE_URL, 'data_processing/measurements/{0}/download-plots/')
RERUN_ALL = urlparse.urljoin(BASE_URL, 'data_processing/measurements/{0}/rerun-all/')
RERUN_PROCESSING = urlparse.urljoin(BASE_URL, 'data_processing/measurements/{0}/rerun-optical/')

DELETE_MEASUREMENT = urlparse.urljoin(BASE_URL, 'admin/database/measurements/{0}/delete/')
API_BASE_URL = urlparse.urljoin(BASE_URL, 'api/v1/')

# The regex to find the measurement id from the measurement page
# This should be read from the uploaded file, but would require an extra NetCDF module.
regex = "<h3>Measurement (?P<measurement_id>.{12}) <small>"


class SCC:
    """ A simple class that will attempt to upload a file on the SCC server.
    The uploading is done by simulating a normal browser session. In the current
    version no check is performed, and no feedback is given if the upload 
    was successful. If everything is setup correctly, it will work. 
    """

    def __init__(self, auth=BASIC_LOGIN, output_dir=OUTPUT_DIR):
        self.auth = auth
        self.output_dir = output_dir
        self.session = requests.Session()

    def login(self, credentials=DJANGO_LOGIN):
        """ Login the the website. """
        logger.debug("Attempting to login to SCC, username %s." % credentials[0])
        self.login_credentials = {'username': credentials[0],
                                  'password': credentials[1]}

        logger.debug("Accessing login page at %s." % LOGIN_URL)

        # Get upload form
        login_page = self.session.get(LOGIN_URL,
                                      auth=self.auth, verify=False)

        logger.debug("Submiting credentials.")
        # Submit the login data
        login_submit = self.session.post(LOGIN_URL,
                                         data=self.login_credentials,
                                         headers={'X-CSRFToken': login_page.cookies['csrftoken'],
                                                  'referer': LOGIN_URL},
                                         verify=False,
                                         auth=self.auth)
        return login_submit

    def logout(self):
        pass

    def upload_file(self, filename, system_id):
        """ Upload a filename for processing with a specific system. If the 
        upload is successful, it returns the measurement id. """
        # Get submit page
        upload_page = self.session.get(UPLOAD_URL,
                                       auth=self.auth,
                                       verify=False)

        # Submit the data
        upload_data = {'system': system_id}
        files = {'data': open(filename, 'rb')}

        logging.info("Uploading of file %s started." % filename)

        upload_submit = self.session.post(UPLOAD_URL,
                                          data=upload_data,
                                          files=files,
                                          headers={'X-CSRFToken': upload_page.cookies['csrftoken'],
                                                   'referer': UPLOAD_URL},
                                          verify=False,
                                          auth=self.auth)

        if upload_submit.status_code != 200:
            logging.warning("Connection error. Status code: %s" % upload_submit.status_code)
            return False

        # Check if there was a redirect to a new page.
        if upload_submit.url == UPLOAD_URL:
            measurement_id = False
            logging.error("Uploaded file rejected! Try to upload manually to see the error.")
        else:
            measurement_id = re.findall(regex, upload_submit.text)[0]
            logging.error("Successfully uploaded measurement with id %s." % measurement_id)

        return measurement_id

    def download_files(self, measurement_id, subdir, download_url):
        """ Downloads some files from the download_url to the specified
        subdir. This method is used to download preprocessed file, optical 
        files etc.
        """
        # Get the file
        request = self.session.get(download_url, auth=self.auth,
                                   verify=False,
                                   stream=True)

        # Create the dir if it does not exist
        local_dir = os.path.join(self.output_dir, measurement_id, subdir)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Save the file by chunk, needed if the file is big.
        memory_file = StringIO.StringIO()

        for chunk in request.iter_content(chunk_size=1024):
            if chunk:  # filter out keep-alive new chunks
                memory_file.write(chunk)
                memory_file.flush()

        zip_file = ZipFile(memory_file)

        for ziped_name in zip_file.namelist():
            basename = os.path.basename(ziped_name)

            local_file = os.path.join(local_dir, basename)

            with open(local_file, 'wb') as f:
                f.write(zip_file.read(ziped_name))

    def download_preprocessed(self, measurement_id):
        """ Download preprocessed files for the measurement id. """
        # Construct the download url
        download_url = DOWNLOAD_PREPROCESSED.format(measurement_id)
        self.download_files(measurement_id, 'scc_preprocessed', download_url)

    def download_optical(self, measurement_id):
        """ Download optical files for the measurement id. """
        # Construct the download url
        download_url = DOWNLOAD_OPTICAL.format(measurement_id)
        self.download_files(measurement_id, 'scc_optical', download_url)

    def download_graphs(self, measurement_id):
        """ Download profile graphs for the measurement id. """
        # Construct the download url
        download_url = DOWNLOAD_GRAPH.format(measurement_id)
        self.download_files(measurement_id, 'scc_plots', download_url)

    def rerun_processing(self, measurement_id, monitor=True):
        measurement = self.get_measurement(measurement_id)

        if measurement:
            request = self.session.get(measurement.rerun_processing_url, auth=self.auth,
                                       verify=False,
                                       stream=True)

            if request.status_code != 200:
                logging.error("Could not rerun processing for %s. Status code: %s" % (measurement_id, request.status_code))
                return

            if monitor:
                self.monitor_processing(measurement_id)

    def rerun_all(self, measurement_id, monitor=True):
        logger.debug("Started rerun_all procedure.")

        logger.debug("Getting measurement %s" % measurement_id)
        measurement = self.get_measurement(measurement_id)

        if measurement:
            logger.debug("Attempting to rerun all processing through %s." % measurement.rerun_all_url)

            request = self.session.get(measurement.rerun_all_url, auth=self.auth,
                                       verify=False,
                                       stream=True)

            if request.status_code != 200:
                logger.error("Could not rerun pre processing for %s. Status code: %s" %
                             (measurement_id, request.status_code))
                return

            if monitor:
                self.monitor_processing(measurement_id)

    def process(self, filename, system_id):
        """ Upload a file for processing and wait for the processing to finish.
        If the processing is successful, it will download all produced files.
        """
        logger.info("--- Processing started on %s. ---" % datetime.datetime.now())
        # Upload file
        measurement_id = self.upload_file(filename, system_id)

        measurement = self.monitor_processing(measurement_id)
        return measurement

    def monitor_processing(self, measurement_id):
        """ Monitor the processing progress of a measurement id"""

        measurement = self.get_measurement(measurement_id)
        if measurement is not None:
            while measurement.is_running:
                logger.info("Measurement is being processed (status: %s, %s, %s). Please wait." % (measurement.upload,
                                                                                             measurement.pre_processing,
                                                                                             measurement.processing))
                time.sleep(10)
                measurement = self.get_measurement(measurement_id)
            logger.info("Measurement processing finished (status: %s, %s, %s)." % (measurement.upload,
                                                                             measurement.pre_processing,
                                                                             measurement.processing))
            if measurement.pre_processing == 127:
                logger.info("Downloading preprocessed files.")
                self.download_preprocessed(measurement_id)
            if measurement.processing == 127:
                logger.info("Downloading optical files.")
                self.download_optical(measurement_id)
                logger.info("Downloading graphs.")
                self.download_graphs(measurement_id)
            logger.info("--- Processing finished. ---")
        return measurement

    def get_status(self, measurement_id):
        """ Get the processing status for a measurement id through the API. """
        measurement_url = urlparse.urljoin(API_BASE_URL, 'measurements/?id__exact=%s' % measurement_id)

        response = self.session.get(measurement_url,
                                    auth=self.auth,
                                    verify=False)

        response_dict = response.json()

        if response_dict['objects']:
            measurement_list = response_dict['objects']
            measurement = Measurement(measurement_list[0])
            return (measurement.upload, measurement.pre_processing, measurement.processing)
        else:
            logger.error("No measurement with id %s found on the SCC." % measurement_id)
            return None

    def get_measurement(self, measurement_id):
        measurement_url = urlparse.urljoin(API_BASE_URL, 'measurements/%s/' % measurement_id)

        response = self.session.get(measurement_url,
                                    auth=self.auth,
                                    verify=False)

        response_dict = response.json()

        if response_dict:
            measurement = Measurement(response_dict)
            return measurement
        else:
            logger.error("No measurement with id %s found on the SCC." % measurement_id)
            return None

    def delete_measurement(self, measurement_id):
        """ Deletes a measurement with the provided measurement id. The user
        should have the appropriate permissions. 
        
        The procedures is performed directly through the web interface and
        NOT through the API.
        """
        # Get the measurement object
        measurement = self.get_measurement(measurement_id)

        # Check that it exists
        if measurement is None:
            logger.warning("Nothing to delete.")
            return None

        # Go the the page confirming the deletion
        delete_url = DELETE_MEASUREMENT.format(measurement.id)

        confirm_page = self.session.get(delete_url,
                                        auth=self.auth,
                                        verify=False)

        # Check that the page opened properly
        if confirm_page.status_code != 200:
            logger.warning("Could not open delete page. Status: {0}".format(confirm_page.status_code))
            return None

        # Delete the measurement
        delete_page = self.session.post(delete_url,
                                        auth=self.auth,
                                        verify=False,
                                        data={'post': 'yes'},
                                        headers={'X-CSRFToken': confirm_page.cookies['csrftoken'],
                                                 'referer': delete_url}
                                        )
        if delete_page.status_code != 200:
            logger.warning("Something went wrong. Delete page status: {0}".format(
                delete_page.status_code))
            return None

        logger.info("Deleted measurement {0}".format(measurement_id))
        return True

    def available_measurements(self):
        """ Get a list of available measurement on the SCC. """
        measurement_url = urlparse.urljoin(API_BASE_URL, 'measurements')
        response = self.session.get(measurement_url,
                                    auth=self.auth,
                                    verify=False)
        response_dict = response.json()

        measurements = None
        if response_dict:
            measurement_list = response_dict['objects']
            measurements = [Measurement(measurement_dict) for measurement_dict in measurement_list]
            logger.info("Found %s measurements on the SCC." % len(measurements))
        else:
            logger.warning("No response received from the SCC when asked for available measurements.")

        return measurements

    def measurement_id_for_date(self, t1, call_sign='bu', base_number=0):
        """ Give the first available measurement id on the SCC for the specific
        date. 
        """
        date_str = t1.strftime('%Y%m%d')
        search_url = urlparse.urljoin(API_BASE_URL, 'measurements/?id__startswith=%s' % date_str)

        response = self.session.get(search_url,
                                    auth=self.auth,
                                    verify=False)

        response_dict = response.json()

        measurement_id = None

        if response_dict:
            measurement_list = response_dict['objects']
            existing_ids = [measurement_dict['id'] for measurement_dict in measurement_list]

            measurement_number = base_number
            measurement_id = "%s%s%02i" % (date_str, call_sign, measurement_number)

            while measurement_id in existing_ids:
                measurement_number = measurement_number + 1
                measurement_id = "%s%s%02i" % (date_str, call_sign, measurement_number)
                if measurement_number == 100:
                    raise ValueError('No available measurement id found.')

        return measurement_id


class ApiObject:
    """ A generic class object. """

    def __init__(self, dict_response):

        if dict_response:
            # Add the dictionary key value pairs as object properties
            for key, value in dict_response.items():
                setattr(self, key, value)
            self.exists = True
        else:
            self.exists = False


class Measurement(ApiObject):
    """ This class represents the measurement object as returned in the SCC API.
    """

    @property
    def is_running(self):
        """ Returns True if the processing has not finished.
        """
        if self.upload == 0:
            return False
        if self.pre_processing == -127:
            return False
        if self.pre_processing == 127:
            if self.processing in [127, -127]:
                return False
        return True

    @property
    def rerun_processing_url(self):
        return RERUN_PROCESSING.format(self.id)

    @property
    def rerun_all_url(self):
        return RERUN_ALL.format(self.id)

    def __str__(self):
        return "%s: %s, %s, %s" % (self.id,
                                   self.upload,
                                   self.pre_processing,
                                   self.processing)


def upload_file(filename, system_id, auth=BASIC_LOGIN, credential=DJANGO_LOGIN):
    """ Shortcut function to upload a file to the SCC. """
    logger.info("Uploading file %s, using sytem %s" % (filename, system_id))

    scc = SCC(auth)
    scc.login(credential)
    measurement_id = scc.upload_file(filename, system_id)
    scc.logout()
    return measurement_id


def process_file(filename, system_id, auth=BASIC_LOGIN, credential=DJANGO_LOGIN):
    """ Shortcut function to process a file to the SCC. """
    logger.info("Processing file %s, using sytem %s" % (filename, system_id))

    scc = SCC(auth)
    scc.login(credential)
    measurement = scc.process(filename, system_id)
    scc.logout()
    return measurement


def delete_measurement(measurement_id, auth=BASIC_LOGIN, credential=DJANGO_LOGIN):
    """ Shortcut function to delete a measurement from the SCC. """
    logger.info("Deleting %s" % measurement_id)
    scc = SCC(auth)
    scc.login(credential)
    scc.delete_measurement(measurement_id)
    scc.logout()


def rerun_all(measurement_id, monitor, auth=BASIC_LOGIN, credential=DJANGO_LOGIN):
    """ Shortcut function to delete a measurement from the SCC. """
    logger.info("Rerunning all products for %s" % measurement_id)
    scc = SCC(auth)
    scc.login(credential)
    scc.rerun_all(measurement_id, monitor)
    scc.logout()


def rerun_processing(measurement_id, monitor, auth=BASIC_LOGIN, credential=DJANGO_LOGIN):
    """ Shortcut function to delete a measurement from the SCC. """
    logger.info("Rerunning (optical) processing for %s" % measurement_id)
    scc = SCC(auth)
    scc.login(credential)
    scc.rerun_processing(measurement_id, monitor)
    scc.logout()


# When running through terminal
if __name__ == '__main__':

    # Define the command line arguments.
    parser = argparse.ArgumentParser()
    parser.add_argument("filename", nargs='?', help="Measurement file name or path.", default='')
    parser.add_argument("system", nargs='?', help="Processing system id.", default=0)
    parser.add_argument("-p", "--process", help="Wait for the results of the processing.",
                        action="store_true")
    parser.add_argument("--delete", help="Measurement ID to delete.")
    parser.add_argument("--rerun-all", help="Measurement ID to rerun.")
    parser.add_argument("--rerun-processing", help="Measurement ID to rerun processing routings.")

    # Verbosity settings from http://stackoverflow.com/a/20663028
    parser.add_argument('-d', '--debug', help="Print debugging information.", action="store_const",
                        dest="loglevel", const=logging.DEBUG, default=logging.INFO,
                        )
    parser.add_argument('-s', '--silent', help="Show only warning and error messages.", action="store_const",
                        dest="loglevel", const=logging.WARNING
                        )

    args = parser.parse_args()

    # Get the logger with the appropriate level
    logging.basicConfig(format='%(levelname)s: %(message)s', level=args.loglevel)

    # If the arguments are OK, try to login on the site and upload.
    if args.delete:
        # If the delete is provided, do nothing else
        delete_measurement(args.delete)
    elif args.rerun_all:
        rerun_all(args.rerun_all, args.process)
    elif args.rerun_processing:
        rerun_processing(args.rerun_processing, args.process)
    else:
        if (args.filename == '') or (args.system == 0):
            parser.error('Provide a valid filename and system parameters.\nRun with -h for help.\n')

        if args.process:
            process_file(args.filename, args.system)
        else:
            upload_file(args.filename, args.system)

mercurial