Plotting

Plotting helpers and dimensionality-reduction preparation functions.

Result Plots

plot_qqplot

plot_qqplot(
    df: DataFrame,
    exp_col: str = "FC",
    stat_col: str = "adjusted p-value",
    plot_qqline: bool = True,
    sig_threshold: float = 0.1,
) -> None

Plot QQ-plot using a d_regulation dataframe

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	A d_regulation dataframe	required
`exp_col`	`str`	Column name of data used to put the y-axis	`'FC'`
`stat_col`	`str`	Column name of data used to check significance	`'adjusted p-value'`
`plot_qqline`	`bool`	Plot Q-Q line on the plot	`True`
`sig_threshold`	`float`	The significance	`0.1`

Returns:

Type	Description
`None`

Source code in scTenifold/plotting/_plotting.py

def plot_qqplot(df: pd.DataFrame,
                exp_col: str = "FC",
                stat_col: str = "adjusted p-value",
                plot_qqline: bool = True,
                sig_threshold: float = 0.1) -> None:
    """
    Plot QQ-plot using a d_regulation dataframe

    Parameters
    ----------
    df: pd.DataFrame
        A d_regulation dataframe
    exp_col: str
        Column name of data used to put the y-axis
    stat_col: str
        Column name of data used to check significance
    plot_qqline: bool
        Plot Q-Q line on the plot
    sig_threshold: float
        The significance
    Returns
    -------
    None
    """
    the_col = "Theoretical quantiles"
    len_x = df.shape[0]
    data = df.loc[:, [exp_col, stat_col]]
    data["significant"] = data[stat_col].apply(lambda x: x < sig_threshold)
    data.sort_values(exp_col, inplace=True)
    data[the_col] = chi2.ppf(q=np.linspace(0, 1, len_x + 2)[1:-1], df=1)
    sns.scatterplot(data=data, x="Theoretical quantiles", y=exp_col, hue="significant")
    if plot_qqline:
        xl_1, xl_2 = plt.gca().get_xlim()
        x1, x2 = data[the_col].quantile(0.25), data[the_col].quantile(0.75)
        y1, y2 = data[exp_col].quantile(0.25), data[exp_col].quantile(0.75)
        slope = (y2 - y1) / (x2 - x1)
        intercept = y1 - slope * x1
        plt.plot([xl_1, xl_2],
                 [slope * xl_1 + intercept, slope * xl_2 + intercept])
        plt.xlim([xl_1, xl_2])
    plt.show()

plot_hist

plot_hist(
    df_1: DataFrame,
    df_1_name: str,
    df_2: Optional[DataFrame] = None,
    df_2_name: Optional[str] = None,
    sum_axis: int = 0,
    label: str = "Sample",
    figsize: Tuple[int, int] = (10, 8),
) -> None

Plot library-size histograms for one or two QC matrices.

Parameters:

Name	Type	Description	Default
`df_1`	`DataFrame`	Genes-by-cells (or cells-by-genes) DataFrame.	required
`df_1_name`	`str`	Legend label for `df_1`.	required
`df_2`	`Optional[DataFrame]`	Optional second DataFrame plotted on the same axes.	`None`
`df_2_name`	`Optional[str]`	Legend label for `df_2`.	`None`
`sum_axis`	`int`	Axis to sum over before histogramming (`0` for genes-by-cells, `1` for cells-by-genes).	`0`
`label`	`str`	X-axis label for the histogram.	`'Sample'`
`figsize`	`Tuple[int, int]`	Figure size in inches.	`(10, 8)`

Source code in scTenifold/plotting/_plotting.py

def plot_hist(df_1: pd.DataFrame,
              df_1_name: str,
              df_2: Optional[pd.DataFrame] = None,
              df_2_name: Optional[str] = None,
              sum_axis: int = 0,
              label: str = "Sample",
              figsize: Tuple[int, int] = (10, 8)) -> None:
    """Plot library-size histograms for one or two QC matrices.

    Parameters
    ----------
    df_1
        Genes-by-cells (or cells-by-genes) DataFrame.
    df_1_name
        Legend label for ``df_1``.
    df_2
        Optional second DataFrame plotted on the same axes.
    df_2_name
        Legend label for ``df_2``.
    sum_axis
        Axis to sum over before histogramming (``0`` for genes-by-cells,
        ``1`` for cells-by-genes).
    label
        X-axis label for the histogram.
    figsize
        Figure size in inches.
    """
    fig, ax = plt.subplots(figsize=figsize)
    df_1 = df_1.copy()
    df_2 = df_2.copy() if df_2 is not None else None
    if sum_axis == 0:
        df_1 = df_1.T
        df_2 = df_2.T if df_2 is not None else None
    elif sum_axis != 1:
        raise ValueError("Passed df should be a 2D df")
    df_1 = df_1.sum(axis=1).to_frame()
    df_2 = df_2.sum(axis=1).to_frame() if df_2 is not None else None
    df_1.columns = [label]
    df_1["name"] = df_1_name
    if df_2 is not None:
        df_2.columns = [label]
        df_2["name"] = df_2_name
        df_1 = pd.concat([df_1, df_2])
        sns.histplot(data=df_1, x=label, hue="name", ax=ax)
    else:
        sns.histplot(data=df_1, x=label, ax=ax)
    plt.show()

plot_embedding

plot_embedding(
    df: DataFrame,
    groups: Optional[Dict[str, List[str]]],
    method: str = "UMAP",
    plot_2D: bool = True,
    figsize: Tuple[int, int] = (8, 8),
    size: int = 10,
    title: Optional[str] = None,
    palette: str = "muted",
    **kwargs: object,
) -> None

Do dimension reduction and plot the embeddings onto a 2D plot

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	A dataframe to perform dimension reduction	required
`groups`	`Optional[Dict[str, List[str]]]`	A dict indicating the groups	required
`method`	`str`	The name of used method, could be: PCA, TSNE, UMAP, Isomap, MDS, SpectralEmbedding, LocallyLinearEmbedding	`'UMAP'`
`plot_2D`	`bool`	Draw a 2D or 3D (if false) plot	`True`
`figsize`	`Tuple[int, int]`	The figure size of the plot: (width, height)	`(8, 8)`
`title`	`Optional[str]`	The subplot's title	`None`
`palette`	`str`	The name of used seaborn color palette, reference: https://seaborn.pydata.org/generated/seaborn.color_palette.html	`'muted'`
`kwargs`	`object`		`{}`

Returns:

Type	Description
`None`

Source code in scTenifold/plotting/_plotting.py

def plot_embedding(df: pd.DataFrame,
                   groups: Optional[Dict[str, List[str]]],
                   method: str = "UMAP",
                   plot_2D: bool = True,
                   figsize: Tuple[int, int] = (8, 8),
                   size: int = 10,
                   title: Optional[str] = None,
                   palette: str = "muted",
                   **kwargs: object) -> None:
    """
    Do dimension reduction and plot the embeddings onto a 2D plot

    Parameters
    ----------
    df: pd.DataFrame
        A dataframe to perform dimension reduction
    groups: dict(str, list)
        A dict indicating the groups
    method: str
        The name of used method, could be: PCA, TSNE, UMAP, Isomap, MDS, SpectralEmbedding, LocallyLinearEmbedding
    plot_2D: bool
        Draw a 2D or 3D (if false) plot
    figsize: tuple of int
        The figure size of the plot: (width, height)
    title: str
        The subplot's title
    palette: str
        The name of used seaborn color palette,
        reference: https://seaborn.pydata.org/generated/seaborn.color_palette.html
    kwargs: keyword arguments of doing dimension reduction

    Returns
    -------
    None
    """

    if method == "PCA":
        feature_df, exp_var_df, component_df = prepare_PCA_dfs(df, **kwargs)
        emb_name = "PC"
    else:
        feature_df = prepare_embedding_dfs(df, reducer=method, **kwargs)
        emb_name = method

    if groups is None:
        groups = {"all": df.columns.to_list()}
    colors = sns.color_palette(palette)
    if plot_2D:
        fig, ax = plt.subplots(figsize=figsize)
    else:
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(111, projection="3d")
    for i, (group_name, sample_names) in enumerate(groups.items()):
        em1, em2 = np.array([feature_df.loc[name, '{} 1'.format(emb_name)] for name in sample_names]), \
                   np.array([feature_df.loc[name, '{} 2'.format(emb_name)] for name in sample_names])

        if plot_2D:
            ax.scatter(em1, em2, s=size, label=group_name, c=[colors[i]])
        else:
            em3 = np.array([feature_df.loc[name, '{} 3'.format(emb_name)] for name in sample_names])
            ax.scatter(em1, em2, em3, s=size, label=group_name, c=[colors[i]])

    x_label = '{} 1'.format(emb_name)
    y_label = '{} 2'.format(emb_name)
    z_label = None if plot_2D else '{} 3'.format(emb_name)

    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    if z_label is not None:
        ax.set_zlabel(z_label)
    if title is not None:
        ax.set_title(title)
    ax.legend()
    ax.grid()
    plt.tight_layout()
    plt.show()

Network Plots

plot_network_graph

plot_network_graph(
    network: ndarray,
    weight_thres: float = 0.1,
    con_thres: float = 0,
) -> None

Plot graph of a PCnet

Parameters:

Name	Type	Description	Default
`network`	`ndarray`	A pc net	required
`weight_thres`	`float`	Minimum threshold of the pcnet's weights	`0.1`
`con_thres`	`float`	Minimum threshold of sum of weights	`0`

Returns:

Type	Description
`None`

Source code in scTenifold/plotting/_plotting.py

def plot_network_graph(network: np.ndarray,
                       weight_thres: float = 0.1,
                       con_thres: float = 0) -> None:
    """
    Plot graph of a PCnet

    Parameters
    ----------
    network: np.ndarray
        A pc net
    weight_thres: float
        Minimum threshold of the pcnet's weights
    con_thres: float or int
        Minimum threshold of sum of weights
    Returns
    -------
    None
    """
    network = abs(network.copy())
    network[network < weight_thres] = 0
    valid_rows, valid_cols = (network.sum(axis=1) > con_thres), (network.sum(axis=0) > con_thres)
    network = network[valid_rows,:][:, valid_cols]
    G = nx.from_numpy_array(network)
    pos = nx.kamada_kawai_layout(G)
    fig, ax = plt.subplots(figsize=(8, 8))
    nx.draw_networkx_edges(G, pos,
                           ax=ax, nodelist=[0], alpha=0.4)
    nx.draw_networkx_nodes(G, pos,
                           ax=ax,
                           node_size=10,
                           cmap=plt.cm.Reds_r)
    plt.show()

plot_network_heatmap

plot_network_heatmap(
    network: ndarray, figsize: Tuple[int, int] = (12, 12)
) -> None

Plot a heatmap of a PC network

Parameters:

Name	Type	Description	Default
`network`	`ndarray`	A pcnet	required
`figsize`	`Tuple[int, int]`	output figure size	`(12, 12)`

Returns:

Type	Description
`None`

Source code in scTenifold/plotting/_plotting.py

def plot_network_heatmap(network: np.ndarray,
                         figsize: Tuple[int, int] = (12, 12)) -> None:
    """
    Plot a heatmap of a PC network

    Parameters
    ----------
    network: np.ndarray
        A pcnet
    figsize: tuple of ints
        output figure size
    Returns
    -------
    None
    """
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(network, center=0.0, ax=ax)

Embedding Preparation

prepare_PCA_dfs

prepare_PCA_dfs(
    feature_df: DataFrame,
    transform_func: Optional[
        Callable[[DataFrame], DataFrame]
    ] = None,
    n_components: Optional[int] = None,
    standardize: bool = True,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]

Run PCA on a genes-by-cells DataFrame.

Parameters:

Name	Type	Description	Default
`feature_df`	`DataFrame`	Input expression DataFrame (rows are features, columns are samples).	required
`transform_func`	`Optional[Callable[[DataFrame], DataFrame]]`	Optional pre-PCA transform applied to `feature_df`.	`None`
`n_components`	`Optional[int]`	Number of components; defaults to `min(n_samples, n_features)`.	`None`
`standardize`	`bool`	If True, z-score columns before PCA.	`True`

Returns:

Type	Description
Tuple ``(scores, explained_variance, loadings)`` as DataFrames.

Source code in scTenifold/plotting/_dim_reduction.py

def prepare_PCA_dfs(feature_df: pd.DataFrame,
                    transform_func: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None,
                    n_components: Optional[int] = None,
                    standardize: bool = True) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """Run PCA on a genes-by-cells DataFrame.

    Parameters
    ----------
    feature_df
        Input expression DataFrame (rows are features, columns are samples).
    transform_func
        Optional pre-PCA transform applied to ``feature_df``.
    n_components
        Number of components; defaults to ``min(n_samples, n_features)``.
    standardize
        If True, z-score columns before PCA.

    Returns
    -------
    Tuple ``(scores, explained_variance, loadings)`` as DataFrames.
    """
    if transform_func is not None:
        x = transform_func(feature_df)
    else:
        x = feature_df
    x = StandardScaler().fit_transform(x.values.T) if standardize else x.values.T
    pca = PCA(n_components=n_components)
    if not n_components:
        n_components = min(x.shape[0], x.shape[1])
    principal_components = pca.fit_transform(x)
    final_df = pd.DataFrame(data=principal_components,
                            columns=[f'PC {num + 1}' for num in range(principal_components.shape[1])],
                            index=feature_df.columns)
    exp_var_df = pd.DataFrame(data=pca.explained_variance_ratio_,
                              index=[f'PC {num + 1}' for num in range(n_components)])
    component_df = pd.DataFrame(data=pca.components_.T,
                                columns=[f'PC {num + 1}' for num in range(n_components)],
                                index=feature_df.index)
    return final_df, exp_var_df, component_df

prepare_embedding_dfs

prepare_embedding_dfs(
    feature_df: DataFrame,
    transform_func: Optional[
        Callable[[ndarray], ndarray]
    ] = None,
    n_components: int = 2,
    reducer: Union[str, Reducer] = "TSNE",
    standardize: bool = True,
    **kwargs: object,
) -> pd.DataFrame

Run a non-PCA dimensionality reducer on a feature DataFrame.

Parameters:

Name	Type	Description	Default
`feature_df`	`DataFrame`	Input expression DataFrame (features x samples).	required
`transform_func`	`Optional[Callable[[ndarray], ndarray]]`	Optional pre-embedding transform applied to `feature_df.values`.	`None`
`n_components`	`int`	Number of embedding dimensions.	`2`
`reducer`	`Union[str, Reducer]`	Reducer name or :class:`Reducer` member. `"UMAP"` requires the optional `umap-learn` package.	`'TSNE'`
`standardize`	`bool`	If True, z-score columns before reduction.	`True`
`**kwargs`	`object`	Forwarded to the underlying reducer class.	`{}`

Returns:

Type	Description
`Sample-by-component DataFrame.`

Source code in scTenifold/plotting/_dim_reduction.py

def prepare_embedding_dfs(feature_df: pd.DataFrame,
                          transform_func: Optional[Callable[[np.ndarray], np.ndarray]] = None,
                          n_components: int = 2,
                          reducer: Union[str, "Reducer"] = "TSNE",
                          standardize: bool = True, **kwargs: object) -> pd.DataFrame:
    """Run a non-PCA dimensionality reducer on a feature DataFrame.

    Parameters
    ----------
    feature_df
        Input expression DataFrame (features x samples).
    transform_func
        Optional pre-embedding transform applied to ``feature_df.values``.
    n_components
        Number of embedding dimensions.
    reducer
        Reducer name or :class:`Reducer` member. ``"UMAP"`` requires
        the optional ``umap-learn`` package.
    standardize
        If True, z-score columns before reduction.
    **kwargs
        Forwarded to the underlying reducer class.

    Returns
    -------
    Sample-by-component DataFrame.
    """
    if transform_func:
        x = transform_func(feature_df.values)
    else:
        x = feature_df.values
    if isinstance(reducer, str):
        reducer = Reducer(reducer)
    sample_names = feature_df.columns.to_list()
    x = StandardScaler().fit_transform(x.T) if standardize else x.T
    if reducer == Reducer.UMAP:
        try:
            from importlib import import_module
            umap = import_module("umap")
        except ImportError as exc:
            raise ImportError("Install umap-learn to use reducer='UMAP'.") from exc
        reducer_cls = umap.UMAP
    else:
        reducer_cls = REDUCER_DICT[reducer]
    X_embedded = reducer_cls(n_components=n_components, **kwargs).fit_transform(x)
    df = pd.DataFrame(X_embedded,
                      columns=["{reducer} {i}".format(reducer=reducer.value, i=i) for i in range(1, n_components + 1)],
                      index=sample_names)
    return df