Source code for ada.utils.plotting

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from collections import defaultdict
from tqdm import tqdm
import logging


PALETTES = [
    "deep",
    "pastel",
    "bright",
    "dark",
    "colorblind",
]
COLORS = [sns.color_palette(pal_name, 4) for pal_name in PALETTES]
DIV_PAL = sns.diverging_palette(180, 0, l=60, n=10)


[docs]def colored_scattered_plot2x2(
    X_s,
    X_t,
    y_sparse_train_s,
    y_sparse_train_t,
    figsize=(12, 6),
    set_aspect_equal=False,
):
    # scatter plot, dots colored by class value
    df_s = pd.DataFrame(
        dict(x=X_s[:, 0], y=X_s[:, 1], label=y_sparse_train_s.squeeze())
    )
    df_t = pd.DataFrame(
        dict(x=X_t[:, 0], y=X_t[:, 1], label=y_sparse_train_t.squeeze())
    )
    marker_s = "o"
    marker_t = "x"
    fig, ax = plt.subplots(figsize=figsize)
    grouped_s = df_s.groupby("label")
    grouped_t = df_t.groupby("label")
    if len(grouped_s) < 4:
        colors = COLORS
    else:
        colors = [sns.color_palette(pal_name, len(grouped_s)) for pal_name in PALETTES]
    for key, group in grouped_s:
        group.plot(
            ax=ax,
            kind="scatter",
            x="x",
            y="y",
            label=str(key) + "_source",
            color=colors[0][key],
            marker=marker_s,
        )
    for key, group in grouped_t:
        group.plot(
            ax=ax,
            kind="scatter",
            x="x",
            y="y",
            label=str(key) + "_target",
            color=colors[1][key],
            marker=marker_t,
        )
    if set_aspect_equal:
        ax.set_aspect("equal")
    fig.set_tight_layout(tight=None)
    return fig, ax


[docs]def plot_archi_data(
    domain_archi,
    tag,
    save_prefix=None,
    plot_features=None,
    plot_f_lines=False,
    do_domain_boundary=False,
    do_entropy=False,
    num_samples=600,
):
    """This method generates a series of figures depending on the model and
    on the dataset used.

    For toy data, more figures are available:

        - the classifier boundary
        - the domain boundary
        - the entropy values
        - the lines corresponding to the hidden neurons of the first feature layer
        - a PCA or TSNE or UMAP projection of the features

    For other datasets with more than 2 dimensions,
        - only the feature projections

    are available.

    Args:
        domain_archi (BaseAdaptTrainer):
            the trained architecture.
        tag (string):
            the name of the method used both in the generated image titles and file names.
        save_prefix (string, optional): defaults to None.
            images will be saved to "{save_prefix}_{auto-gen-name}.png"
            If save_prefix is None, the images are not saved to disk.
        plot_features (bool): defaults to None
            None or string or list of strings from ("pca", "tsne", "umap")
        plot_f_lines (bool, optional): defaults to False.
            If True, plot the lines corresponding to the first neurons for 2D data.
        do_domain_boundary (bool, optional): defaults to False
            If True, plot the domain boundary for 2D toy data.
        do_entropy (bool, optional): defaults to False
            If True, plots the level of entropy values between 0 and 1
        num_samples (int, optional): defaults to 600
            Number of random samples use for plotting

    """
    import torch

    dataset = domain_archi._dataset
    model = domain_archi.to("cpu")
    feat_extract = domain_archi.feat
    figs = {}

    num_src = num_samples // 2

    dl = iter(dataset.get_domain_loaders(batch_size=num_src))
    if dataset.is_semi_supervised():
        (X_source, y_source), (X_tl, y_tl), (X_target, y_target) = next(dl)
    else:
        (X_source, y_source), (X_target, y_target) = next(dl)

    dim = X_source.view(X_source.shape[0], -1).shape[1]
    h = 0.1

    if dim > 2:
        do_domain_boundary = False
        do_classifier_boundary = False
        do_entropy = False
        plot_f_lines = False
    else:
        do_classifier_boundary = True
        do_domain_boundary = (
            do_domain_boundary and not domain_archi.method.is_mmd_method()
        )

    if do_classifier_boundary:
        x_min, x_max = (
            np.minimum(X_source[:, 0].min(), X_target[:, 0].min()),
            np.maximum(X_source[:, 0].max(), X_target[:, 0].max()),
        )
        y_min, y_max = (
            np.minimum(X_source[:, 1].min(), X_target[:, 1].min()),
            np.maximum(X_source[:, 1].max(), X_target[:, 1].max()),
        )
        # increase margin
        x_min, x_max = 1.1 * x_min - 0.1, 1.1 * x_max + 0.1
        y_min, y_max = 1.1 * y_min - 0.1, 1.1 * y_max + 0.1

        fig, ax = colored_scattered_plot2x2(X_source, X_target, y_source, y_target)

        if dataset.is_semi_supervised():
            plt.scatter(X_tl[:, 0], X_tl[:, 1], c=np.array(COLORS[2])[y_tl])

        xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
        Z = np.c_[xx.ravel(), yy.ravel()]
        Z_torch = torch.from_numpy(np.atleast_2d(Z)).float()

        if not domain_archi.method.is_mmd_method():
            Z_feat, Z_class, D_class = model.forward(Z_torch)
        else:
            Z_feat, Z_class = model.forward(Z_torch)

        classe = Z_class.data.max(1)[1].numpy()

        classe = classe.reshape(xx.shape)
        n_classes = len(np.unique(y_source))
        plt.contour(xx, yy, classe, alpha=0.8, colors=COLORS[0], levels=n_classes - 2)
        plt.contourf(xx, yy, classe, alpha=0.2, colors=COLORS[0], levels=n_classes - 1)
        plt.title(f"{tag} Classifier boundary")
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
        if save_prefix is not None:
            plt.savefig(
                f"{save_prefix}_{tag}_classifier_boundary.png", bbox_inches="tight"
            )
        figs["classifier boundary"] = fig

    if do_entropy:
        from torch.nn import functional as F

        class_output = F.softmax(Z_class, dim=1)
        positives = torch.gt(class_output, 0.0).double().sum()
        loss_ent = (
            -1.0
            * torch.sum(class_output * (torch.log(class_output + 1e-9)), 1)
            .detach()
            .numpy()
        )
        entropy = loss_ent.reshape(xx.shape)
        fig, _ = colored_scattered_plot2x2(X_source, X_target, y_source, y_target)
        plt.contourf(xx, yy, entropy, alpha=0.2, colors=DIV_PAL, levels=10)
        plt.title(f"{tag} Entropy")
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
        if save_prefix is not None:
            plt.savefig(f"{save_prefix}_{tag}_entropy.png", bbox_inches="tight")

        figs[f"entropy"] = fig

    if do_domain_boundary:
        fig, _ = colored_scattered_plot2x2(X_source, X_target, y_source, y_target)

        dom_class = D_class.data.max(1)[1].numpy()
        dom_class = dom_class.reshape(xx.shape)
        plt.contour(xx, yy, dom_class, levels=[0], colors="black")
        plt.contourf(xx, yy, dom_class, alpha=0.2, colors=COLORS[0], levels=1)
        plt.title(f"{tag} Domain classifier boundary")
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
        if save_prefix is not None:
            plt.savefig(f"{save_prefix}_{tag}_domain_boundary.png", bbox_inches="tight")

        figs[f"domain boundary"] = fig

    if plot_f_lines and dim == 2:
        fig, ax = colored_scattered_plot2x2(X_source, X_target, y_source, y_target)
        W = feat_extract.feature[0].weight.data.numpy()
        B = feat_extract.feature[0].bias.data.numpy()
        for i in range(W.shape[0]):
            x0 = np.linspace(x_min, x_max)
            y0 = -W[i, 0] / W[i, 1] * x0 - B[i] / W[i, 1]
            ax.plot(x0, y0, alpha=0.3, c="black")
        ax.set_ylim([y_min, y_max])
        plt.title(f"{tag} Hidden neurons (first feature layer)")
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
        if save_prefix is not None:
            plt.savefig(f"{save_prefix}_{tag}_neurons.png", bbox_inches="tight")

        figs[f"neurons"] = fig

    # Get embeddings
    if plot_features:
        if isinstance(plot_features, str):
            plot_features = [plot_features]

        X_source_map = feat_extract(X_source).data.numpy()
        X_target_map = feat_extract(X_target).data.numpy()

        emb_all = np.vstack([X_source_map, X_target_map])

        for proj in plot_features:
            comment = f"{proj.upper()} projection of feature layer"

            if proj.upper() == "PCA":
                from sklearn.decomposition import PCA

                pca = PCA(n_components=2)
                fea_plot = pca.fit_transform(emb_all)
            elif proj.upper() == "TSNE":
                from sklearn.manifold import TSNE

                tsne = TSNE(n_components=2, init="random", random_state=9365)
                fea_plot = tsne.fit_transform(emb_all)
            elif proj.upper() == "UMAP":
                import umap

                um = umap.UMAP(n_neighbors=10, min_dist=0.1, metric="euclidean")
                um.fit(emb_all)
                fea_plot = um.transform(emb_all)

            num = X_source.shape[0]

            fig, _ = colored_scattered_plot2x2(
                fea_plot[:num, :], fea_plot[num:, :], y_source.numpy(), y_target.numpy()
            )
            plt.title(f"{tag} {comment}")
            ax = plt.gca()
            ax.axes.xaxis.set_visible(False)
            ax.axes.yaxis.set_visible(False)
            if len(np.unique(y_source)) < 4:
                # legend becomes useless when there are many classes
                plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
            if save_prefix is not None:
                plt.savefig(
                    f"{save_prefix}_{tag}_features_{proj.upper()}.png",
                    bbox_inches="tight",
                )

            figs[proj.upper()] = fig
    return figs
Source code for ada.utils.plotting

Ada

Navigation

Related Topics