Source code for pbcommand.utils

"""Utils for common funcs, such as setting up a log, composing functions."""
import multiprocessing
import functools
import os
import logging
import logging.config
import argparse
import pprint
import traceback
import time
import types
import subprocess
from contextlib import contextmanager
import xml.etree.ElementTree as ET

from pbcommand.models import FileTypes, DataSetMetaData

log = logging.getLogger(__name__)
log.addHandler(logging.NullHandler())  # suppress the annoying no handlers msg


[docs]class Constants(object): """Log Level format strings""" LOG_FMT_ONLY_MSG = '%(message)s' LOG_FMT_ERR = '%(message)s' LOG_FMT_LVL = '[%(levelname)s] %(message)s' LOG_FMT_MIN = '[%(asctime)-15sZ] %(message)s' LOG_FMT_SIMPLE = '[%(levelname)s] %(asctime)-15sZ %(message)s' LOG_FMT_STD = '[%(levelname)s] %(asctime)-15sZ [%(name)s] %(message)s' LOG_FMT_FULL = '[%(levelname)s] %(asctime)-15sZ [%(name)s %(funcName)s %(lineno)d] %(message)s'
[docs]class ExternalCommandNotFoundError(Exception): """External command is not found in Path""" pass
def _handler_stream_d(stream, level_str, formatter_id): d = {'level': level_str, 'class': "logging.StreamHandler", 'formatter': formatter_id, 'stream': stream} return d _handler_stdout_stream_d = functools.partial(_handler_stream_d, "ext://sys.stdout") _handler_stderr_stream_d = functools.partial(_handler_stream_d, "ext://sys.stderr") def _handler_file(level_str, path, formatter_id): d = {'class': 'logging.FileHandler', 'level': level_str, 'formatter': formatter_id, 'filename': path} return d def _get_default_logging_config_dict(level, file_name_or_none, formatter): """ Setup a logger to either a file or console. If file name is none, then a logger will be setup to stdout. :note: adds console Returns a dict configuration of the logger. """ level_str = logging.getLevelName(level) formatter_id = 'custom_logger_fmt' console_handler_id = "console_handler" error_fmt_id = "error_fmt_id" error_handler_id = "error_handler" error_handler_d = _handler_stderr_stream_d(logging.ERROR, error_fmt_id) if file_name_or_none is None: handler_d = _handler_stdout_stream_d(level_str, formatter_id) else: handler_d = _handler_file(level_str, file_name_or_none, formatter_id) formatters_d = {fid: {'format': fx} for fid, fx in [(formatter_id, formatter), (error_fmt_id, Constants.LOG_FMT_ERR)]} handlers_d = {console_handler_id: handler_d, error_handler_id: error_handler_d} loggers_d = {"custom": {'handlers': [console_handler_id], 'stderr': {'handlers': [error_handler_id]}}} d = { 'version': 1, 'disable_existing_loggers': False, # this fixes the problem 'formatters': formatters_d, 'handlers': handlers_d, 'loggers': loggers_d, 'root': {'handlers': [error_handler_id, console_handler_id], 'level': logging.NOTSET} } #print pprint.pformat(d) return d def _get_console_and_file_logging_config_dict(console_level, console_formatter, path, path_level, path_formatter): """ Get logging configuration that is both for console and a file. :note: A stderr logger handler is also added. """ def _to_handler_d(handlers_, level): return {"handlers": handlers_, "level": level, "propagate": True} console_handler_id = "console_handler" console_fmt_id = "console_fmt" console_handler_d = _handler_stdout_stream_d(console_level, console_fmt_id) stderr_handler_id = "stderr_handler" error_fmt_id = "error_fmt" stderr_handler_d = _handler_stderr_stream_d(logging.ERROR, console_fmt_id) file_handler_id = "file_handler" file_fmt_id = "file_fmt" file_handler_d = _handler_file(path_level, path, file_fmt_id) formatters = {console_fmt_id: {"format": console_formatter}, file_fmt_id: {"format": path_formatter}, error_fmt_id: {"format": Constants.LOG_FMT_ERR} } handlers = {console_handler_id: console_handler_d, file_handler_id: file_handler_d, stderr_handler_id: stderr_handler_d} loggers = {"console": _to_handler_d([console_handler_id], console_level), "custom_file": _to_handler_d([file_handler_id], path_level), "stderr_err": _to_handler_d([stderr_handler_id], logging.ERROR) } d = {'version': 1, 'disable_existing_loggers': False, # this fixes the problem 'formatters': formatters, 'handlers': handlers, 'loggers': loggers, 'root': {'handlers': handlers.keys(), 'level': logging.DEBUG} } # print pprint.pformat(d) return d def _setup_logging_config_d(d): logging.config.dictConfig(d) logging.Formatter.converter = time.gmtime return d
[docs]def setup_logger(file_name_or_none, level, formatter=Constants.LOG_FMT_FULL): """ :param file_name_or_none: Path to log file, None will default to stdout :param level: logging.LEVEL of :param formatter: Log Formatting string """ d = _get_default_logging_config_dict(level, file_name_or_none, formatter) return _setup_logging_config_d(d)
def setup_console_and_file_logger(stdout_level, stdout_formatter, path, path_level, path_formatter): d = _get_console_and_file_logging_config_dict(stdout_level, stdout_formatter, path, path_level, path_formatter) return _setup_logging_config_d(d)
[docs]def setup_log(alog, level=logging.INFO, file_name=None, log_filter=None, str_formatter=Constants.LOG_FMT_FULL): """Core Util to setup log handler :param alog: a log instance :param level: (int) Level of logging debug :param file_name: (str, None) if None, stdout is used, str write to file :param log_filter: (LogFilter, None) :param str_formatter: (str) log formatting str .. warning:: THIS NEEDS TO BE DEPRECATED """ setup_logger(file_name, level, formatter=str_formatter) # FIXME. Keeping the interface, but the specific log instance isn't used, # the python logging setup mutates global state if log_filter is not None: alog.warn("log_filter kw is no longer supported") return alog
[docs]def get_parsed_args_log_level(pargs, default_level=logging.INFO): """ Utility for handling logging setup flexibly in a variety of use cases, assuming standard command-line arguments. :param pargs: argparse namespace or equivalent :param default_level: logging level to use if the parsed arguments do not specify one """ level = default_level if isinstance(level, basestring): level = logging.getLevelName(level) if hasattr(pargs, 'verbosity') and pargs.verbosity > 0: if pargs.verbosity >= 2: level = logging.DEBUG else: level = logging.INFO elif hasattr(pargs, 'debug') and pargs.debug: level = logging.DEBUG elif hasattr(pargs, 'quiet') and pargs.quiet: level = logging.ERROR elif hasattr(pargs, 'log_level'): level = logging.getLevelName(pargs.log_level) return level
[docs]def log_traceback(alog, ex, ex_traceback): """ Log a python traceback in the log file :param ex: python Exception instance :param ex_traceback: exception traceback Example Usage (assuming you have a log instance in your scope) :Example: >>> value = 0 >>> try: >>> 1 / value >>> except Exception as e: >>> msg = "{i} failed validation. {e}".format(i=value, e=e) >>> log.error(msg) >>> _, _, ex_traceback = sys.exc_info() >>> log_traceback(log, e, ex_traceback) """ tb_lines = traceback.format_exception(ex.__class__, ex, ex_traceback) tb_text = ''.join(tb_lines) alog.error(tb_text)
def validate_type_or_raise(instance, type_or_types, error_prefix=None): _d = dict(t=instance, x=type(instance), v=instance) e = error_prefix if error_prefix is not None else "" msg = e + "Expected type {t}. Got type {x} for {v}".format(**_d) if not isinstance(instance, type_or_types): raise TypeError(msg) else: return instance def _simple_validate_type(atype, instance): return validate_type_or_raise(instance, atype) _is_argparser_instance = functools.partial(_simple_validate_type, argparse.ArgumentParser) def is_argparser_instance(func): @functools.wraps def wrapper(*args, **kwargs): _is_argparser_instance(args[0]) return func(*args, **kwargs) return wrapper
[docs]def compose(*funcs): """ Functional composition of a non-empty list [f, g, h] will be f(g(h(x))) :Example: >>> f = lambda x: x * x >>> g = lambda x: x + 1 >>> h = lambda x: x * 2 >>> funcs = [f, g, h] >>> fgh = compose(*funcs) >>> fgh(3) # 49 >>> compose(f, g, h)(3) """ if not funcs: raise ValueError("Compose only supports non-empty lists") for func in funcs: if not isinstance(func, (types.BuiltinMethodType, functools.partial, types.MethodType, types.BuiltinFunctionType, types.FunctionType)): raise TypeError("Only Function types are supported") def compose_two(f, g): def c(x): return f(g(x)) return c return functools.reduce(compose_two, funcs)
[docs]def which(exe_str): """walk the current PATH for exe_str to get the absolute path of the exe :param exe_str: Executable name :rtype: str | None :returns Absolute path to the executable or None if the exe is not found """ paths = os.environ.get('PATH', None) resolved_exe = None if paths is None: # log warning msg = "PATH env var is not defined." log.error(msg) return resolved_exe for path in paths.split(":"): exe_path = os.path.join(path, exe_str) # print exe_path if os.path.exists(exe_path): resolved_exe = exe_path break # log.debug("Resolved cmd {e} to {x}".format(e=exe_str, x=resolved_exe)) return resolved_exe
[docs]def which_or_raise(cmd): """Find exe in path or raise ExternalCommandNotFoundError""" resolved_cmd = which(cmd) if resolved_cmd is None: raise ExternalCommandNotFoundError("Unable to find required cmd '{c}'".format(c=cmd)) else: return resolved_cmd
[docs]class Singleton(type): """ General Purpose singleton class Usage: >>> class MyClass(object): >>> __metaclass__ = Singleton >>> def __init__(self): >>> self.name = 'name' """ def __init__(cls, name, bases, dct): super(Singleton, cls).__init__(name, bases, dct) cls.instance = None def __call__(cls, *args, **kw): if cls.instance is None: cls.instance = super(Singleton, cls).__call__(*args) return cls.instance
[docs]def nfs_exists_check(ff): """ Central place for all NFS hackery Return whether a file or a dir ff exists or not. Call listdir() instead of os.path.exists() to eliminate NFS errors. Added try/catch black hole exception cases to help trigger an NFS refresh :rtype bool: """ try: # All we really need is opendir(), but listdir() is usually fast. os.listdir(os.path.dirname(os.path.realpath(ff))) # But is it a file or a directory? We do not know until it actually exists. if os.path.exists(ff): return True # Might be a directory, so refresh itself too. # Not sure this is necessary, since we already ran this on parent, # but it cannot hurt. os.listdir(os.path.realpath(ff)) if os.path.exists(ff): return True except OSError: pass # The rest is probably unnecessary, but it cannot hurt. # try to trigger refresh for File case try: f = open(ff, 'r') f.close() except Exception: pass # try to trigger refresh for Directory case try: _ = os.stat(ff) _ = os.listdir(ff) except Exception: pass # Call externally # this is taken from Yuan cmd = "ls %s" % ff rcode = 1 try: p = subprocess.Popen([cmd], shell=True) rcode = p.wait() except Exception: pass return rcode == 0
def nfs_refresh(path, ntimes=3, sleep_time=1.0): while True: if nfs_exists_check(path): return True ntimes -= 1 if ntimes <= 0: break time.sleep(sleep_time) log.warn("NFS refresh failed. unable to resolve {p}".format(p=path)) return False @contextmanager def ignored(*exceptions): try: yield except exceptions: pass
[docs]def get_dataset_metadata(path): """ Returns DataSetMeta data or raises ValueError if dataset XML is missing the required UniqueId and MetaType values. :param path: Path to DataSet XML :raises: ValueError :return: DataSetMetaData """ uuid = mt = None for event, element in ET.iterparse(path, events=("start",)): uuid = element.get("UniqueId") mt = element.get("MetaType") break if mt in FileTypes.ALL_DATASET_TYPES().keys(): return DataSetMetaData(uuid, mt) else: raise ValueError("Unsupported dataset type '{t}'".format(t=mt))
[docs]def get_dataset_metadata_or_none(path): """ Returns DataSetMeta data, else None if the file doesn't exist or a processing of the XML raises. :param path: Path to DataSet XML :return: DataSetMetaData or None """ try: return get_dataset_metadata(path) except Exception: return None
[docs]def is_dataset(path): """peek into the XML to get the MetaType and verify that it's a valid dataset :param path: Path to DataSet XML """ return get_dataset_metadata_or_none(path) is not None
[docs]def walker(root_dir, file_filter_func): """ Walk the file sytem and filter by the supplied filter function. Filter function F(path) -> bool """ for root, dnames, fnames in os.walk(root_dir): for fname in fnames: path = os.path.join(root, fname) if file_filter_func(path): yield path
def to_ascii(s): # This is not awesome return s.encode('ascii', 'ignore')
[docs]def pool_map(func, args, nproc): """ Wrapper for calling a function in parallel using the multiprocessing module and blocking until results are available. """ nargs = len(args) computed_nproc = min(nargs, nproc, multiprocessing.cpu_count()) if computed_nproc > 1: log.debug("Running on %d processors", computed_nproc) pool = multiprocessing.Pool(processes=computed_nproc) result = pool.map(func, args) # TODO try map_async instead pool.close() pool.join() else: log.debug("computed_nproc=1, running serially") result = map(func, args) return result