Source code for glue.core.util

from __future__ import absolute_import, division, print_function

import logging
from itertools import count
from functools import partial


import numpy as np
import pandas as pd

from matplotlib.ticker import AutoLocator, MaxNLocator, LogLocator
from matplotlib.ticker import (LogFormatterMathtext, ScalarFormatter,
                               FuncFormatter)

__all__ = ["relim", "split_component_view", "join_component_view",
           "facet_subsets", "colorize_subsets", "disambiguate",
           "row_lookup", 'small_view', 'small_view_array', 'visible_limits',
           'tick_linker', 'update_ticks']


[docs]def relim(lo, hi, log=False): logging.getLogger(__name__).debug("Inputs to relim: %r %r", lo, hi) x, y = lo, hi if log: if lo < 0: x = 1e-5 if hi < 0: y = 1e5 return x * .95, y * 1.05 delta = y - x return (x - .02 * delta, y + .02 * delta)
[docs]def split_component_view(arg): """Split the input to data or subset.__getitem__ into its pieces. :param arg: The input passed to data or subset.__getitem__. Assumed to be either a scalar or tuple :rtype: tuple The first item is the Component selection (a ComponentID or string) The second item is a view (tuple of slices, slice scalar, or view object) """ if isinstance(arg, tuple): if len(arg) == 1: raise TypeError("Expected a scalar or >length-1 tuple, " "got length-1 tuple") if len(arg) == 2: return arg[0], arg[1] return arg[0], arg[1:] else: return arg, None
[docs]def join_component_view(component, view): """Pack a componentID and optional view into single tuple Returns an object compatible with data.__getitem__ and related methods. Handles edge cases of when view is None, a scalar, a tuple, etc. :param component: ComponentID :param view: view into data, or None """ if view is None: return component result = [component] try: result.extend(view) except TypeError: # view is a scalar result = [component, view] return tuple(result)
[docs]def facet_subsets(data_collection, cid, lo=None, hi=None, steps=5, prefix='', log=False): """Create a series of subsets that partition the values of a particular attribute into several bins This creates `steps` new subet groups, adds them to the data collection, and returns the list of newly created subset groups. :param data: DataCollection object to use :type data: :class:`~glue.core.data_collection.DataCollection` :param cid: ComponentID to facet on :type data: :class:`~glue.core.component_id.ComponentID` :param lo: The lower bound for the faceting. Defaults to minimum value in data :type lo: float :param hi: The upper bound for the faceting. Defaults to maximum value in data :type hi: float :param steps: The number of subsets to create. Defaults to 5 :type steps: int :param prefix: If present, the new subset labels will begin with `prefix` :type prefix: str :param log: If True, space divisions logarithmically. Default=False :type log: bool :returns: List of :class:`~glue.core.subset_group.SubsetGroup` instances added to `data` Example:: facet_subset(data, data.id['mass'], lo=0, hi=10, steps=2) creates 2 new subsets. The first represents the constraint 0 <= mass < 5. The second represents 5 <= mass < 10:: facet_subset(data, data.id['mass'], lo=10, hi=0, steps=2) Creates 2 new subsets. The first represents the constraint 10 >= x > 5 The second represents 5 >= mass > 0:: facet_subset(data, data.id['mass'], lo=0, hi=10, steps=2, prefix='m') Labels the subsets ``m_1`` and ``m_2`` """ from glue.core.exceptions import IncompatibleAttribute if lo is None or hi is None: for data in data_collection: try: vals = data[cid] break except IncompatibleAttribute: continue else: raise ValueError("Cannot infer data limits for ComponentID %s" % cid) if lo is None: lo = np.nanmin(vals) if hi is None: hi = np.nanmax(vals) reverse = lo > hi if log: rng = np.logspace(np.log10(lo), np.log10(hi), steps + 1) else: rng = np.linspace(lo, hi, steps + 1) states = [] labels = [] for i in range(steps): if reverse: states.append((cid <= rng[i]) & (cid > rng[i + 1])) labels.append(prefix + '{0}<{1}<={2}'.format(rng[i + 1], cid, rng[i])) else: states.append((cid >= rng[i]) & (cid < rng[i + 1])) labels.append(prefix + '{0}<={1}<{2}'.format(rng[i], cid, rng[i + 1])) result = [] for lbl, s in zip(labels, states): sg = data_collection.new_subset_group(label=lbl, subset_state=s) result.append(sg) return result
[docs]def colorize_subsets(subsets, cmap, lo=0, hi=1): """Re-color a list of subsets according to a colormap :param subsets: List of subsets :param cmap: Matplotlib colormap instance :param lo: Start location in colormap. 0-1. Defaults to 0 :param hi: End location in colormap. 0-1. Defaults to 1 The colormap will be sampled at `len(subsets)` even intervals between `lo` and `hi`. The color at the `ith` interval will be applied to `subsets[i]` """ from matplotlib import cm sm = cm.ScalarMappable(cmap=cmap) sm.norm.vmin = 0 sm.norm.vmax = 1 vals = np.linspace(lo, hi, len(subsets)) rgbas = sm.to_rgba(vals) for color, subset in zip(rgbas, subsets): r, g, b, a = color r = int(255 * r) g = int(255 * g) b = int(255 * b) subset.style.color = '#%2.2x%2.2x%2.2x' % (r, g, b)
[docs]def disambiguate(label, taken): """If necessary, add a suffix to label to avoid name conflicts :param label: desired label :param taken: set of taken names Returns label if it is not in the taken set. Otherwise, returns label_NN where NN is the lowest integer such that label_NN not in taken. """ if label not in taken: return label suffix = "_%2.2i" label = str(label) for i in count(1): candidate = label + (suffix % i) if candidate not in taken: return candidate
[docs]def row_lookup(data, categories): """ Lookup which row in categories each data item is equal to :param data: array-like :param categories: array-like of unique values :returns: Float array. If result[i] is finite, then data[i] = categoreis[result[i]] Otherwise, data[i] is not in the categories list """ # np.searchsorted doesn't work on mixed types in Python3 ndata, ncat = len(data), len(categories) data = pd.DataFrame({'data': data, 'row': np.arange(ndata)}) cats = pd.DataFrame({'categories': categories, 'cat_row': np.arange(ncat)}) m = pd.merge(data, cats, left_on='data', right_on='categories') result = np.zeros(ndata, dtype=float) * np.nan result[np.array(m.row)] = m.cat_row return result
[docs]def small_view(data, attribute): """ Extract a downsampled view from a dataset, for quick statistical summaries """ shp = data.shape view = tuple([slice(None, None, np.intp(max(s / 50, 1))) for s in shp]) return data[attribute, view]
[docs]def small_view_array(data): """ Same as small_view, except using a numpy array as input """ shp = data.shape view = tuple([slice(None, None, np.intp(max(s / 50, 1))) for s in shp]) return np.asarray(data)[view]
[docs]def visible_limits(artists, axis): """ Determines the data limits for the data in a set of artists. Ignores non-visible artists Assumes each artist as a get_data method wich returns a tuple of x,y Returns a tuple of min, max for the requested axis, or None if no data present :param artists: An iterable collection of artists :param axis: Which axis to compute. 0=xaxis, 1=yaxis """ data = [] for art in artists: if not art.visible: continue xy = art.get_data() assert isinstance(xy, tuple) val = xy[axis] if val.size > 0: data.append(xy[axis]) if len(data) == 0: return data = np.hstack(data) if data.size == 0: return data = data[np.isfinite(data)] if data.size == 0: return lo, hi = np.nanmin(data), np.nanmax(data) if not np.isfinite(lo): return return lo, hi
[docs]def tick_linker(all_categories, pos, *args): try: pos = np.round(pos) return all_categories[int(pos)] except IndexError: return ''
[docs]def update_ticks(axes, coord, components, is_log): """ Changes the axes to have the proper tick formatting based on the type of component. :param axes: A matplotlib axis object to alter :param coord: 'x' or 'y' :param components: A list() of components that are plotted along this axis :param is_log: Boolean for log-scale. :kwarg max_categories: The maximum number of categories to display. :return: None or #categories if components is Categorical """ if coord == 'x': axis = axes.xaxis elif coord == 'y': axis = axes.yaxis else: raise TypeError("coord must be one of x,y") is_cat = all(comp.categorical for comp in components) if is_log: axis.set_major_locator(LogLocator()) axis.set_major_formatter(LogFormatterMathtext()) elif is_cat: all_categories = np.empty((0,), dtype=np.object) for comp in components: all_categories = np.union1d(comp.categories, all_categories) locator = MaxNLocator(10, integer=True) locator.view_limits(0, all_categories.shape[0]) format_func = partial(tick_linker, all_categories) formatter = FuncFormatter(format_func) axis.set_major_locator(locator) axis.set_major_formatter(formatter) return all_categories.shape[0] else: axis.set_major_locator(AutoLocator()) axis.set_major_formatter(ScalarFormatter())