Source code for ada.utils.experimentation_results

import os.path
import logging
import shutil
import pandas as pd
import numpy as np
import ada.utils.experimentation as xp


[docs]class XpResults:
[docs] @staticmethod def from_file(metrics, filepath): if os.path.exists(filepath): df = pd.read_csv(filepath, index_col=0) time_str = xp.create_timestamp_string() backup_file = f"{filepath}.{time_str}.bak" logging.info(f"Copying {filepath} to {backup_file}") shutil.copyfile(filepath, backup_file) return XpResults(metrics, df) else: return XpResults(metrics)
def __init__(self, metrics, df=None): """ Args: metrics (list of string): Which metrics to record. df (pandas.DataFrame, optional): columns are: metrics + [seed, method, split]. Defaults to None. """ self._metrics = metrics[:] if df is None: self._df = pd.DataFrame(columns=metrics + ["seed", "method", "split"]) else: self._df = df.copy() def __len__(self): return len(self._df)
[docs] def already_computed(self, method_name, seed): if len(self._df) == 0: return False ms_df = self._df.query(f"method == '{method_name}' and seed == '{seed}'") if len(ms_df) == 0: return False for m in self._metrics: if m not in ms_df.columns: return False return True
[docs] def remove(self, method_names): print(method_names) self._df = self._df[~self._df["method"].isin(method_names)]
[docs] def update(self, is_validation, method_name, seed, metric_values): split, prefix = ("Validation", "V") if is_validation else ("Test", "Te") results = pd.DataFrame( { k: metric_values.get(f"{prefix}_{k.replace(' ', '_')}", None) for k in self._metrics }, index=[0], ) results["seed"] = seed results["method"] = method_name results["split"] = split self._df = self._df.append(results, ignore_index=True)
[docs] def get_data(self): return self._df
[docs] def get_best_archi_seed(self): return ( self._df.sort_values(by=self._metrics, ascending=False) .head(1) .seed.values[0] )
[docs] def get_last_seed(self): return self._df.tail(1).seed.values[0]
[docs] def get_mean_seed(self, mean_metric): if mean_metric not in self._metrics: raise ValueError(f"Unknown metric: {mean_metric}") all_res_valid = self._df.query("split=='Validation'").dropna() if all_res_valid.empty: all_res_valid = self._df.query("split=='Test'").dropna() all_res_valid[mean_metric] = all_res_valid[mean_metric].astype(np.float) tres_means = all_res_valid.groupby("method").mean()[mean_metric] all_seed_res = all_res_valid.pivot( index="seed", columns="method", values=mean_metric ) def dist_to_mean(row): return np.mean((row - tres_means) ** 2) all_seed_res["dist"] = all_seed_res.apply(dist_to_mean, axis=1) return all_seed_res.sort_values(by="dist", ascending=True).head(1).index[0]
[docs] def to_csv(self, filepath): self._df.to_csv(filepath)
[docs] def print_scores( self, method_name, split="Validation", stdout=True, fdout=None, print_func=print, file_format="markdown", ): mres = self._df.query(f"method == '{method_name}' and split == '{split}'") nsamples = len(mres) mmres = [ (mres[m].mean(), mres[m].std() / np.sqrt(nsamples)) for m in self._metrics ] if stdout: print_func(split, end=" ") print_func(method_name, end="") print_func(" " * (10 - len(method_name)), end="") print_func( "\t\t".join( ( f"{m * 100:.1f}% +- {1.96 * s * 100:.2f} ({nsamples} runs)" for m, s in mmres ) ) ) if fdout is not None: if file_format == "markdown": fdout.write(f"|{method_name}|") fdout.write( "|".join( (f"{m * 100:.1f}% +- {1.96 * s * 100:.2f}" for m, s in mmres) ) ) fdout.write("|\n") else: fdout.write(method_name) fdout.write(" " * (10 - len(method_name))) fdout.write( "\t\t".join( (f"{m * 100:.1f}% +- {1.96 * s * 100:.2f}" for m, s in mmres) ) ) fdout.write(f" ({split})") fdout.write("\n")
[docs] def append_to_txt(self, filepath, test_params, nseeds, splits=None): if splits is None: splits = ["Validation", "Test"] with open(filepath, "a") as fd: fd.write(xp.param_to_str(test_params)) fd.write("\n") print(" " * 10, "\t\t".join(self._metrics)) fd.write("nseeds = ") fd.write(str(nseeds)) fd.write("\n") fd.write("method\t") fd.write("\t\t".join(self._metrics)) fd.write("\n") for name in self._df.method.unique(): for split in splits: self.print_scores( method_name=name, split=split, stdout=True, fdout=fd, file_format="text", ) fd.write("\n")
[docs] def append_to_markdown(self, filepath, test_params, nseeds, splits=None): if splits is None: splits = ["Validation", "Test"] with open(filepath, "a") as fd: fd.write(xp.param_to_str(test_params)) fd.write("\n") print(" " * 10, "\t\t".join(self._metrics)) fd.write("nseeds = ") fd.write(str(nseeds)) fd.write("\n") fd.write(f"|Method|{'|'.join(self._metrics)}|\n") fd.write("|:----|") for i in range(len(self._metrics)): fd.write(":---:|") fd.write("\n") for name in self._df.method.unique(): for split in splits: self.print_scores( method_name=name, split=split, stdout=True, fdout=fd ) fd.write("\n")