Source code for dival.evaluation

# -*- coding: utf-8 -*-
"""Tools for the evaluation of reconstruction methods.
"""
import sys
from warnings import warn
from itertools import product
from math import ceil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from dival.util.odl_utility import CallbackStoreAfter
from dival.util.odl_utility import CallbackStore
# to be replaced by odl.solvers.util.callback.CallbackStore when
# https://github.com/odlgroup/odl/pull/1539 is included in ODL release
from dival.util.plot import plot_image, plot_images
from dival.util.std_out_err_redirect_tqdm import std_out_err_redirect_tqdm
from dival.measure import Measure
from dival.data import DataPairs
from dival import IterativeReconstructor, LearnedReconstructor


[docs]class TaskTable: """Task table containing reconstruction tasks to evaluate. Attributes ---------- name : str Name of the task table. tasks : list of dict Tasks that shall be run. The fields of each dict are set from the parameters to :meth:`append` (or :meth:`append_all_combinations`). Cf. documentation of :meth:`append` for details. results : :class:`ResultTable` or `None` Results from the latest call to :meth:`run`. """
[docs] def __init__(self, name=''): self.name = name self.tasks = [] self.results = None
[docs] def run(self, save_reconstructions=True, reuse_iterates=True, show_progress='text'): """Run all tasks and return the results. The returned :class:`ResultTable` object is also stored as :attr:`results`. Parameters ---------- save_reconstructions : bool, optional Whether the reconstructions should be saved in the results. The default is ``True``. If measures shall be applied after this method returns, it must be ``True``. If ``False``, no iterates (intermediate reconstructions) will be saved, even if ``task['options']['save_iterates']==True``. reuse_iterates : bool, optional Whether to reuse iterates from other sub-tasks if possible. The default is ``True``. If there are sub-tasks whose hyper parameter choices differ only in the number of iterations of an :class:`IterativeReconstructor`, only the sub-task with the maximum number of iterations is run and the results for the other ones determined by storing iterates if this option is ``True``. Note 1: If enabled, the callbacks assigned to the reconstructor will be run only for the above specified sub-tasks with the maximum number of iterations. Note 2: If the reconstructor is non-deterministic, this option can affect the results as the same realization is used for multiple sub-tasks. show_progress : str, optional Whether and how to show progress. Options are: ``'text'`` (default) print a line before running each task ``'tqdm'`` show a progress bar with ``tqdm`` `None` do not show progress Returns ------- results : :class:`ResultTable` The results. """ row_list = [] with std_out_err_redirect_tqdm(None if show_progress == 'tqdm' else sys.stdout) as orig_stdout: for i, task in enumerate(tqdm(self.tasks, desc='task', file=orig_stdout, disable=(show_progress != 'tqdm'))): if show_progress == 'text': print('running task {i}/{num_tasks} ...'.format( i=i, num_tasks=len(self.tasks))) test_data = task['test_data'] reconstructor = task['reconstructor'] if test_data.ground_truth is None and task['measures']: raise ValueError('missing ground truth, cannot apply ' 'measures') measures = [(measure if isinstance(measure, Measure) else Measure.get_by_short_name(measure)) for measure in task['measures']] options = task['options'] skip_training = options.get('skip_training', False) save_best_reconstructor = options.get( 'save_best_reconstructor') save_iterates = (save_reconstructions and options.get('save_iterates')) hp_choices = task.get('hyper_param_choices') if hp_choices: # run all hyper param choices as sub-tasks retrain_param_keys = [k for k, v in reconstructor.HYPER_PARAMS.items() if v.get('retrain', False)] orig_hyper_params = reconstructor.hyper_params.copy() def _warn_if_invalid_keys(keys): for k in keys: if k not in reconstructor.HYPER_PARAMS.keys(): warn("choice for unknown hyper parameter '{}' " "for reconstructor of type '{}' will be " 'ignored'.format(k, type(reconstructor))) if isinstance(hp_choices, dict): _warn_if_invalid_keys(hp_choices.keys()) keys_retrain_first = sorted( hp_choices.keys(), key=lambda k: k not in retrain_param_keys) # if isinstance(reconstructor, IterativeReconstructor): # 'iterations' treated specially to re-use iterates # keys_retrain_first.remove('iterations') # hp_choices_iterations = hp_choices.get( # 'iterations', # [orig_hyper_params['iterations']]) param_values = [ hp_choices.get(k, [orig_hyper_params[k]]) for k in keys_retrain_first] hp_choice_list = [ dict(zip(keys_retrain_first, v)) for v in product(*param_values)] else: hp_choice_list = hp_choices for hp_choice in hp_choice_list: _warn_if_invalid_keys(hp_choice.keys()) # if isinstance(reconstructor, IterativeReconstructor): # no special support for re-using iterates # hp_choices_iterations = [] if (isinstance(reconstructor, IterativeReconstructor) and reuse_iterates): reuse_iterates_from = [] for j, hp_choice_j in enumerate(hp_choice_list): iter_j = hp_choice_j.get( 'iterations', orig_hyper_params['iterations']) (k_max, iter_max) = (-1, iter_j) for k, hp_choice_k in enumerate(hp_choice_list): iter_k = hp_choice_k.get( 'iterations', orig_hyper_params['iterations']) if iter_k > iter_max: hp_choice_j_rem = hp_choice_j.copy() hp_choice_j_rem.pop('iterations') hp_choice_k_rem = hp_choice_k.copy() hp_choice_k_rem.pop('iterations') if hp_choice_j_rem == hp_choice_k_rem: (k_max, iter_max) = (k, iter_k) reuse_iterates_from.append(k_max) if save_best_reconstructor: if len(measures) == 0 and len(hp_choice_list) > 1: warn("No measures are chosen to be evaluated, so " "no best reconstructor can be selected. Will " "not save like requested by " "'save_best_reconstructor' option.") save_best_reconstructor = None else: best_loss = np.inf row_sub_list = [None] * len(hp_choice_list) # run sub-tasks for j, hp_choice in enumerate( tqdm(hp_choice_list, desc='sub-task', file=orig_stdout, disable=(show_progress != 'tqdm'), leave=False)): if show_progress == 'text': print('sub-task {j}/{n} ...' .format(j=j, n=len(hp_choice_list))) train = (isinstance(reconstructor, LearnedReconstructor) and ( j == 0 or any( (hp_choice.get(k, orig_hyper_params[k]) != reconstructor.hyper_params[k] for k in retrain_param_keys)))) reconstructor.hyper_params = orig_hyper_params.copy() reconstructor.hyper_params.update(hp_choice) # if (isinstance(reconstructor, IterativeReconstructor) # and hp_choices_iterations): # reconstructor.hyper_params['iterations'] = max( # hp_choices_iterations) # only largest number if train and not skip_training: reconstructor.train(task['dataset']) run_sub_task = not (isinstance(reconstructor, IterativeReconstructor) and reuse_iterates and reuse_iterates_from[j] != -1) if run_sub_task: return_rows_iterates = None if (isinstance(reconstructor, IterativeReconstructor) and reuse_iterates): # determine the iteration numbers needed for # other sub-tasks return_iterates_for = [ k for k, from_k in enumerate(reuse_iterates_from) if from_k == j] # sub-task indices return_rows_iterates = [ hp_choice_list[k].get( 'iterations', orig_hyper_params['iterations']) for k in return_iterates_for] # iterations row = self._run_task( reconstructor=reconstructor, test_data=test_data, measures=measures, hp_choice=hp_choice, return_rows_iterates=return_rows_iterates, options=options, save_reconstructions=save_reconstructions, save_iterates=save_iterates, ) if return_rows_iterates is not None: (row, rows_iterates) = row # assign rows for other sub-tasks for r_i, k in enumerate(return_iterates_for): rows_iterates[r_i]['task_ind'] = i rows_iterates[r_i]['sub_task_ind'] = k row_sub_list[k] = rows_iterates[r_i] # assign row for current sub-task row['task_ind'] = i row['sub_task_ind'] = j row_sub_list[j] = row if save_best_reconstructor: def save_if_best_reconstructor( measure_values, iterations=None): measure = save_best_reconstructor.get( 'measure', measures[0]) if isinstance(measure, str): measure = Measure.get_by_short_name( measure) loss_sign = ( 1 if measure.measure_type == 'distance' else -1) cur_loss = ( loss_sign * np.mean(measure_values[ measure.short_name])) if cur_loss < best_loss: if iterations is not None: reconstructor.hyper_params[ 'iterations'] = iterations reconstructor.save_params( save_best_reconstructor['path']) return cur_loss return best_loss best_loss = save_if_best_reconstructor( row['measure_values']) if return_rows_iterates is not None: for row_iterates, iterations in zip( rows_iterates, return_rows_iterates): best_loss = save_if_best_reconstructor( row_iterates['measure_values'], iterations=iterations) reconstructor.hyper_params = orig_hyper_params.copy() row_list += row_sub_list else: # run task (with hyper params as they are) if (isinstance(reconstructor, LearnedReconstructor) and not skip_training): reconstructor.train(task['dataset']) row = self._run_task( reconstructor=reconstructor, test_data=test_data, measures=measures, hp_choice=None, return_rows_iterates=None, options=options, save_reconstructions=save_reconstructions, save_iterates=save_iterates, ) row['task_ind'] = i row['sub_task_ind'] = 0 row_list.append(row) if save_best_reconstructor: reconstructor.save_params( save_best_reconstructor['path']) self.results = ResultTable(row_list) return self.results
def _run_task(self, reconstructor, test_data, measures, options, hp_choice, return_rows_iterates, save_reconstructions, save_iterates): # Parameters # ---------- # return_rows_iterates : list of int or `None` # If specified, also return rows for the specified iterates. # Must be `None` if reconstructor is no `IterativeReconstructor`. # # Returns # ------- # row [, rows_iterates] : dict or (dict, list of dict) # The resulting row, and if `return_rows_iterates` is specified, # as second output a list of rows for the iterates. reconstructions = [] if save_reconstructions else None if isinstance(reconstructor, IterativeReconstructor): if save_iterates: iterates = [] if options.get('save_iterates_measure_values'): iterates_measure_values = {m.short_name: [] for m in measures} save_iterates_step = options.get('save_iterates_step', 1) if return_rows_iterates is not None: iterates_for_rows = [] measure_values = {m.short_name: [] for m in measures} for observation, ground_truth in zip(test_data.observations, test_data.ground_truth): if isinstance(reconstructor, IterativeReconstructor): callbacks = [] if return_rows_iterates is not None: iters_for_rows = [] store_after_iters = return_rows_iterates.copy() if 0 in store_after_iters: warn('reporting a dummy zero reconstruction as ' 'zero-th iterate of `IterativeReconstructor`') # there is no general way to obtain the 0-th iterate, # the first callback call is after first iteration iters_for_rows.append(reconstructor.reco_space.zero()) store_after_iters.remove(0) iterates_for_rows.append(iters_for_rows) callback_store_after = CallbackStoreAfter( iters_for_rows, store_after_iters=store_after_iters) callbacks.append(callback_store_after) if save_iterates: iters = [] iterates.append(iters) callback_store = CallbackStore( iters, step=save_iterates_step) callbacks.append(callback_store) if options.get('save_iterates_measure_values'): for measure in measures: iters_mvs = [] iterates_measure_values[ measure.short_name].append(iters_mvs) callback_store = CallbackStore( iters_mvs, step=save_iterates_step) callbacks.append( callback_store * measure.as_operator_for_fixed_ground_truth( ground_truth)) callback = None if len(callbacks) > 0: callback = callbacks[-1] for c in callbacks[-2::-1]: callback &= c reconstruction = reconstructor.reconstruct( observation, callback=callback) else: reconstruction = reconstructor.reconstruct(observation) for measure in measures: measure_values[measure.short_name].append( measure.apply(reconstruction, ground_truth)) if save_reconstructions: reconstructions.append(reconstruction) misc = {} if isinstance(reconstructor, IterativeReconstructor): if save_iterates: misc['iterates'] = iterates if options.get('save_iterates_measure_values'): misc['iterates_measure_values'] = iterates_measure_values if hp_choice: misc['hp_choice'] = hp_choice row = {'reconstructions': reconstructions, 'reconstructor': reconstructor, 'test_data': test_data, 'measure_values': measure_values, 'misc': misc} if return_rows_iterates is not None: # create rows for iterates given by return_rows_iterates rows_iterates = [] # convert iterates_for_rows[reconstructions_idx][rows_iterates_idx] # to # reconstructions_iterates[rows_iterates_idx][reconstructions_idx] reconstructions_iterates = [ list(it) for it in zip(*iterates_for_rows)] for iterations, recos_iterates in zip( return_rows_iterates, reconstructions_iterates): measure_values_iterates = {} for measure in measures: measure_values_iterates[measure.short_name] = [ measure.apply(r, g) for r, g in zip( recos_iterates, test_data.ground_truth)] misc_iterates = {} # number of iterates to keep n_iterates = ceil(iterations / save_iterates_step) if save_iterates: misc_iterates['iterates'] = iterates[:n_iterates] if options.get('save_iterates_measure_values'): misc_iterates['iterates_measure_values'] = { short_name: values[:n_iterates] for short_name, values in iterates_measure_values.items()} if hp_choice: misc_iterates['hp_choice'] = hp_choice.copy() # specify 'iterations' hyper param, which was emulated by # using CallbackStoreAfter while running for more iters misc_iterates['hp_choice']['iterations'] = iterations row_iterates = {'reconstructions': recos_iterates, 'reconstructor': reconstructor, 'test_data': test_data, 'measure_values': measure_values_iterates, 'misc': misc_iterates} if not save_reconstructions: row_iterates['reconstructions'] = None rows_iterates.append(row_iterates) return row if return_rows_iterates is None else (row, rows_iterates)
[docs] def append(self, reconstructor, test_data, measures=None, dataset=None, hyper_param_choices=None, options=None): """Append a task. Parameters ---------- reconstructor : :class:`.Reconstructor` The reconstructor. test_data : :class:`.DataPairs` The test data. measures : sequence of (:class:`.Measure` or str), optional Measures that will be applied. Either :class:`.Measure` objects or their short names can be passed. dataset : :class:`.Dataset`, optional The dataset that will be passed to :meth:`reconstructor.train <LearnedReconstructor.train>` if it is a :class:`.LearnedReconstructor`. hyper_param_choices : dict of list or list of dict, optional Choices of hyper parameter combinations to try as sub-tasks. * If a dict of lists is specified, all combinations of the list elements (cartesian product space) are tried. * If a list of dicts is specified, each dict is taken as a parameter combination to try. The current parameter values are read from :attr:`Reconstructor.hyper_params` in the beginning and used as default values for all parameters not specified in the passed dicts. Afterwards, the original values are restored. options : dict Options that will be used. Options are: ``'skip_training'`` : bool, optional Whether to skip training. Can be used for manual training of reconstructors (or loading of a stored state). Default: ``False``. ``'save_best_reconstructor'`` : dict, optional If specified, save the best reconstructor from the sub-tasks (cf. `hyper_param_choices`) by calling :meth:`Reconstructor.save_params`. For ``hyper_param_choices=None``, the reconstructor from the single sub-task is saved. This option requires `measures` to be non-empty if there are multiple sub-tasks. The fields are: ``'path'`` : str The path to save the best reconstructor at (argument to :meth:`save_params`). Note that this path is used during execution of the task to store the best reconstructor params so far, so the file(s) are most likely updated multiple times. ``'measure'`` : :class:`.Measure` or str, optional The measure used to define the "best" reconstructor (in terms of mean performance). Must be one of the `measures`. By default ``measures[0]`` is used. This field is ignored if there is only one sub-task. ``'save_iterates'`` : bool, optional Whether to save the intermediate reconstructions of iterative reconstructors. Default: ``False``. Will be ignored if ``save_reconstructions=False`` is passed to `run`. If ``reuse_iterates=True`` is passed to `run` and there are sub-tasks for which iterates are reused, these iterates are the same objects for all of those sub-tasks (i.e. no copies). ``'save_iterates_measure_values'`` : bool, optional Whether to compute and save the measure values for each intermediate reconstruction of iterative reconstructors (the default is ``False``). ``'save_iterates_step'`` : int, optional Step size for ``'save_iterates'`` and ``'save_iterates_measure_values'`` (the default is 1). """ if measures is None: measures = [] if options is None: options = {} if (isinstance(reconstructor, LearnedReconstructor) and not options.get('skip_training', False) and dataset is None): raise ValueError('in order to use a learned reconstructor you ' 'must specify a `dataset` for training (or set ' '``skip_training: True`` in `options` and train ' 'manually)') self.tasks.append({'reconstructor': reconstructor, 'test_data': test_data, 'measures': measures, 'dataset': dataset, 'hyper_param_choices': hyper_param_choices, 'options': options})
[docs] def append_all_combinations(self, reconstructors, test_data, measures=None, datasets=None, hyper_param_choices=None, options=None): """Append tasks of all combinations of test data, reconstructors and optionally datasets. The order is taken from the lists, with test data changing slowest and reconstructor changing fastest. Parameters ---------- reconstructors : list of `Reconstructor` Reconstructor list. test_data : list of `DataPairs` Test data list. measures : sequence of (`Measure` or str) Measures that will be applied. The same measures are used for all combinations of test data and reconstructors. Either `Measure` objects or their short names can be passed. datasets : list of `Dataset`, optional Dataset list. Required if `reconstructors` contains at least one `LearnedReconstructor`. hyper_param_choices : list of (dict of list or list of dict), optional Choices of hyper parameter combinations for each reconstructor, which are tried as sub-tasks. The i-th element of this list is used for the i-th reconstructor. See `append` for documentation of how the choices are passed. options : dict Options that will be used. The same options are used for all combinations of test data and reconstructors. See `append` for documentation of the options. """ if datasets is None: datasets = [None] if hyper_param_choices is None: hyper_param_choices = [None] * len(reconstructors) for test_data_ in test_data: for dataset in datasets: for reconstructor, hp_choices in zip(reconstructors, hyper_param_choices): self.append(reconstructor=reconstructor, test_data=test_data_, measures=measures, dataset=dataset, hyper_param_choices=hp_choices, options=options)
def __repr__(self): return "TaskTable(name='{name}', tasks={tasks})".format( name=self.name, tasks=self.tasks)
[docs]class ResultTable: """The results of a :class:`.TaskTable`. Cf. :attr:`TaskTable.results`. Attributes ---------- results : :class:`pandas.DataFrame` The results. The index is given by ``'task_ind'`` and ``'sub_task_ind'``, and the columns are ``'reconstructions'``, ``'reconstructor'``, ``'test_data'``, ``'measure_values'`` and ``'misc'``. """
[docs] def __init__(self, row_list): """ Usually, objects of this type are constructed by :meth:`TaskTable.run`, which sets :attr:`TaskTable.results`, rather than by manually calling this constructor. Parameters ---------- row_list : list of dict Result rows. Used to build :attr:`results` of type :class:`pandas.DataFrame`. """ self.results = pd.DataFrame(row_list).set_index(['task_ind', 'sub_task_ind'])
[docs] def apply_measures(self, measures, task_ind=None): """Apply (additional) measures to reconstructions. This is not possible if the reconstructions were not saved, in which case a :class:`ValueError` is raised. Parameters ---------- measures : list of :class:`.Measure` Measures to apply. task_ind : int or sequence of ints, optional Indexes of tasks to which the measures shall be applied. If `None`, this is interpreted as "all results". Raises ------ ValueError If reconstructions are missing or `task_ind` is not valid. """ if task_ind is None: indexes = self.results.index.levels[0] elif np.isscalar(task_ind): indexes = [task_ind] elif isinstance(task_ind, list): indexes = task_ind else: raise ValueError('`task_ind` must be a scalar, a list of ints or ' '`None`') for i in indexes: rows = self.results.loc[i] for j in range(len(rows)): row = rows.loc[j] if row['reconstructions'] is None: raise ValueError('reconstructions missing in task {}{}' .format(i, '.{}'.format(j) if len(rows) > 1 else '')) for measure in measures: if isinstance(measure, str): measure = Measure.get_by_short_name(measure) row['measure_values'][measure.short_name] = [ measure.apply(r, g) for r, g in zip( row['reconstructions'], row['test_data'].ground_truth)]
[docs] def plot_reconstruction(self, task_ind, sub_task_ind=0, test_ind=-1, plot_ground_truth=True, **kwargs): """Plot the reconstruction at the specified index. Supports only 1d and 2d reconstructions. Parameters ---------- task_ind : int Index of the task. sub_task_ind : int, optional Index of the sub-task (default ``0``). test_ind : sequence of int or int, optional Index in test data. If ``-1``, plot all reconstructions (the default). plot_ground_truth : bool, optional Whether to show the ground truth next to the reconstruction. The default is ``True``. kwargs : dict Keyword arguments that are passed to :func:`~dival.util.plot.plot_image` if the reconstruction is 2d. Returns ------- ax_list : list of :class:`np.ndarray` of :class:`matplotlib.axes.Axes` The axes in which the reconstructions and eventually the ground truth were plotted. """ row = self.results.loc[task_ind, sub_task_ind] test_data = row.at['test_data'] reconstructor = row.at['reconstructor'] ax_list = [] if isinstance(test_ind, int): if test_ind == -1: test_ind = range(len(test_data)) else: test_ind = [test_ind] for i in test_ind: title = 'reconstruction for task {}{}, test_data[{}]'.format( task_ind, '.{}'.format(sub_task_ind) if len(self.results.loc[task_ind]) > 1 else '', i) reconstruction = row.at['reconstructions'][i] ground_truth = test_data.ground_truth[i] if reconstruction is None: raise ValueError('reconstruction is `None`') if reconstruction.asarray().ndim > 2: print('only 1d and 2d reconstructions can be plotted') return if reconstruction.asarray().ndim == 1: x = reconstruction.space.points() _, ax = plt.subplots() ax.plot(x, reconstruction, label=reconstructor.name) if plot_ground_truth: ax.plot(x, ground_truth, label='ground truth') ax.legend() ax.set_title(title) ax = np.array(ax) elif reconstruction.asarray().ndim == 2: if plot_ground_truth: _, ax = plot_images([reconstruction, ground_truth], **kwargs) ax[1].set_title('ground truth') else: _, ax = plot_image(reconstruction, **kwargs) ax[0].set_title(reconstructor.name) ax[0].figure.suptitle(title) ax_list.append(ax) return ax_list
[docs] def plot_all_reconstructions(self, **kwargs): """Plot all reconstructions. Parameters ---------- kwargs : dict Keyword arguments that are forwarded to :meth:`plot_reconstruction`. Returns ------- ax : :class:`np.ndarray` of :class:`matplotlib.axes.Axes` The axes the reconstructions were plotted in. """ ax = [] for i, j in self.results.index: ax_ = self.plot_reconstruction(task_ind=i, sub_task_ind=j, **kwargs) ax.append(ax_) return np.vstack(ax)
[docs] def plot_convergence(self, task_ind, sub_task_ind=0, measures=None, fig_size=None, gridspec_kw=None): """ Plot measure values for saved iterates. This shows the convergence behavior with respect to the measures. Parameters ---------- task_ind : int Index of the task. sub_task_ind : int, optional Index of the sub-task (default ``0``). measures : [list of ] :class:`.Measure`, optional Measures to apply. Each measure is plotted in a subplot. If `None` is passed, all measures in ``result['measure_values']`` are used. Returns ------- ax : :class:`np.ndarray` of :class:`matplotlib.axes.Axes` The axes the measure values were plotted in. """ row = self.results.loc[task_ind, sub_task_ind] iterates_measure_values = row['misc'].get('iterates_measure_values') if not iterates_measure_values: iterates = row['misc'].get('iterates') if not iterates: raise ValueError( "no 'iterates_measure_values' or 'iterates' in results " "of task {}{}".format( task_ind, '.{}'.format(sub_task_ind) if len(self.results.loc[task_ind]) > 1 else '')) if measures is None: measures = row['measure_values'].keys() elif isinstance(measures, Measure): measures = [measures] fig, ax = plt.subplots(len(measures), 1, gridspec_kw=gridspec_kw) if not isinstance(ax, np.ndarray): ax = np.array([ax]) if fig_size is not None: fig.set_size_inches(fig_size) fig.suptitle('convergence of {}'.format(row['reconstructor'].name)) for measure, ax_ in zip(measures, ax.flat): if isinstance(measure, str): measure = Measure.get_by_short_name(measure) if iterates_measure_values: errors = np.mean([iters_mvs[measure.short_name] for iters_mvs in iterates_measure_values], axis=0) else: ground_truth = row['test_data'].ground_truth errors = np.mean([[measure.apply(x, g) for x in iters] for iters, g in zip(iterates, ground_truth)], axis=0) ax_.plot(errors) ax_.set_title(measure.short_name) return ax
[docs] def plot_performance(self, measure, reconstructors=None, test_data=None, weighted_average=False, **kwargs): """ Plot average measure values for different reconstructors. The values have to be computed previously, e.g. by :meth:`apply_measures`. The average is computed over all rows of :attr:`results` with the specified `test_data` that store the requested `measure` value. Note that for tasks with multiple sub-tasks, all of them are used when computing the average (i.e., the measure values for all hyper parameter choices are averaged). Parameters ---------- measure : :class:`.Measure` or str The measure to plot (or its :attr:`~.Measure.short_name`). reconstructors : sequence of :class:`.Reconstructor`, optional The reconstructors to compare. If `None` (default), all reconstructors that are found in the results are compared. test_data : [sequence of ] :class:`.DataPairs`, optional Test data to take into account for computing the mean value. By default, all test data is used. weighted_average : bool, optional Whether to weight the rows according to the number of pairs in their test data. Default: ``False``, i.e. all rows are weighted equally. If ``True``, all test data pairs are weighted equally. Returns ------- ax : `matplotlib.axes.Axes` The axes the performance was plotted in. """ if not isinstance(measure, Measure): measure = Measure.get_by_short_name(measure) if reconstructors is None: reconstructors = self.results['reconstructor'].unique() if isinstance(test_data, DataPairs): test_data = [test_data] mask = [measure.short_name in row['measure_values'].keys() and row['reconstructor'] in reconstructors and (test_data is None or row['test_data'] in test_data) for _, row in self.results.iterrows()] rows = self.results[mask] v = [] for reconstructor in reconstructors: r_rows = rows[rows['reconstructor'] == reconstructor] values = [mvs[measure.short_name] for mvs in r_rows['measure_values']] weights = None if weighted_average: weights = [len(test_data.observations) for test_data in r_rows['test_data']] v.append(np.average(values, weights=weights)) fig, ax = plt.subplots(**kwargs) ax.bar(range(len(v)), v) ax.set_xticks(range(len(v))) ax.set_xticklabels([r.name for r in reconstructors], rotation=30) ax.set_title('{measure_name}'.format(measure_name=measure.name)) return ax
[docs] def to_string(self, max_colwidth=70, formatters=None, hide_columns=None, show_columns=None, **kwargs): """Convert to string. Used by :meth:`__str__`. Parameters ---------- max_colwidth : int, optional Maximum width of the columns, c.f. the option ``'display.max_colwidth'`` of pandas. formatters : dict of functions, optional Custom formatter functions for the columns, passed to :meth:`results.to_string <pandas.DataFrame.to_string>`. hide_columns : list of str, optional Columns to hide. Default: ``['reconstructions', 'misc']``. show_columns : list of str, optional Columns to show. Overrides `hide_columns`. kwargs : dict Keyword arguments passed to :meth:`results.to_string <pandas.DataFrame.to_string>`. Returns ------- string : str The string. """ def measure_values_formatter(measure_values): means = ['{}: {:.4g}'.format(k, np.mean(v)) for k, v in measure_values.items()] return 'mean: {{{}}}'.format(', '.join(means)) def name_or_repr_formatter(x): return x.name or x.__repr__() formatters_ = {} formatters_['measure_values'] = measure_values_formatter formatters_['test_data'] = name_or_repr_formatter formatters_['reconstructor'] = name_or_repr_formatter if formatters is not None: formatters_.update(formatters) if hide_columns is None: hide_columns = ['reconstructions', 'misc'] if show_columns is None: show_columns = [] columns = [c for c in self.results.columns if c not in hide_columns or c in show_columns] with pd.option_context('display.max_colwidth', max_colwidth): return "ResultTable(results=\n{}\n)".format( self.results.to_string(formatters=formatters_, columns=columns, **kwargs))
[docs] def print_summary(self): """Prints a summary of the results. """ print('ResultTable with {:d} tasks.'.format( len(self.results.index.levels[0]))) if len(self.results.index.levels[1]) > 1: print('Total count of sub-tasks: {}'.format(len(self.results))) test_data_list = pd.unique(self.results['test_data']) if len(test_data_list) == 1: print('Test data: {}'.format(test_data_list[0]))
def __repr__(self): return "ResultTable(results=\n{results})".format(results=self.results) def __str__(self): return self.to_string()