Skip to content

Plotting

Plotting helpers and dimensionality-reduction preparation functions.

Result Plots

plot_qqplot

plot_qqplot(
    df: DataFrame,
    exp_col: str = "FC",
    stat_col: str = "adjusted p-value",
    plot_qqline: bool = True,
    sig_threshold: float = 0.1,
) -> None

Plot QQ-plot using a d_regulation dataframe

Parameters:

Name Type Description Default
df DataFrame

A d_regulation dataframe

required
exp_col str

Column name of data used to put the y-axis

'FC'
stat_col str

Column name of data used to check significance

'adjusted p-value'
plot_qqline bool

Plot Q-Q line on the plot

True
sig_threshold float

The significance

0.1

Returns:

Type Description
None
Source code in scTenifold/plotting/_plotting.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def plot_qqplot(df: pd.DataFrame,
                exp_col: str = "FC",
                stat_col: str = "adjusted p-value",
                plot_qqline: bool = True,
                sig_threshold: float = 0.1) -> None:
    """
    Plot QQ-plot using a d_regulation dataframe

    Parameters
    ----------
    df: pd.DataFrame
        A d_regulation dataframe
    exp_col: str
        Column name of data used to put the y-axis
    stat_col: str
        Column name of data used to check significance
    plot_qqline: bool
        Plot Q-Q line on the plot
    sig_threshold: float
        The significance
    Returns
    -------
    None
    """
    the_col = "Theoretical quantiles"
    len_x = df.shape[0]
    data = df.loc[:, [exp_col, stat_col]]
    data["significant"] = data[stat_col].apply(lambda x: x < sig_threshold)
    data.sort_values(exp_col, inplace=True)
    data[the_col] = chi2.ppf(q=np.linspace(0, 1, len_x + 2)[1:-1], df=1)
    sns.scatterplot(data=data, x="Theoretical quantiles", y=exp_col, hue="significant")
    if plot_qqline:
        xl_1, xl_2 = plt.gca().get_xlim()
        x1, x2 = data[the_col].quantile(0.25), data[the_col].quantile(0.75)
        y1, y2 = data[exp_col].quantile(0.25), data[exp_col].quantile(0.75)
        slope = (y2 - y1) / (x2 - x1)
        intercept = y1 - slope * x1
        plt.plot([xl_1, xl_2],
                 [slope * xl_1 + intercept, slope * xl_2 + intercept])
        plt.xlim([xl_1, xl_2])
    plt.show()

plot_hist

plot_hist(
    df_1: DataFrame,
    df_1_name: str,
    df_2: Optional[DataFrame] = None,
    df_2_name: Optional[str] = None,
    sum_axis: int = 0,
    label: str = "Sample",
    figsize: Tuple[int, int] = (10, 8),
) -> None

Plot library-size histograms for one or two QC matrices.

Parameters:

Name Type Description Default
df_1 DataFrame

Genes-by-cells (or cells-by-genes) DataFrame.

required
df_1_name str

Legend label for df_1.

required
df_2 Optional[DataFrame]

Optional second DataFrame plotted on the same axes.

None
df_2_name Optional[str]

Legend label for df_2.

None
sum_axis int

Axis to sum over before histogramming (0 for genes-by-cells, 1 for cells-by-genes).

0
label str

X-axis label for the histogram.

'Sample'
figsize Tuple[int, int]

Figure size in inches.

(10, 8)
Source code in scTenifold/plotting/_plotting.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def plot_hist(df_1: pd.DataFrame,
              df_1_name: str,
              df_2: Optional[pd.DataFrame] = None,
              df_2_name: Optional[str] = None,
              sum_axis: int = 0,
              label: str = "Sample",
              figsize: Tuple[int, int] = (10, 8)) -> None:
    """Plot library-size histograms for one or two QC matrices.

    Parameters
    ----------
    df_1
        Genes-by-cells (or cells-by-genes) DataFrame.
    df_1_name
        Legend label for ``df_1``.
    df_2
        Optional second DataFrame plotted on the same axes.
    df_2_name
        Legend label for ``df_2``.
    sum_axis
        Axis to sum over before histogramming (``0`` for genes-by-cells,
        ``1`` for cells-by-genes).
    label
        X-axis label for the histogram.
    figsize
        Figure size in inches.
    """
    fig, ax = plt.subplots(figsize=figsize)
    df_1 = df_1.copy()
    df_2 = df_2.copy() if df_2 is not None else None
    if sum_axis == 0:
        df_1 = df_1.T
        df_2 = df_2.T if df_2 is not None else None
    elif sum_axis != 1:
        raise ValueError("Passed df should be a 2D df")
    df_1 = df_1.sum(axis=1).to_frame()
    df_2 = df_2.sum(axis=1).to_frame() if df_2 is not None else None
    df_1.columns = [label]
    df_1["name"] = df_1_name
    if df_2 is not None:
        df_2.columns = [label]
        df_2["name"] = df_2_name
        df_1 = pd.concat([df_1, df_2])
        sns.histplot(data=df_1, x=label, hue="name", ax=ax)
    else:
        sns.histplot(data=df_1, x=label, ax=ax)
    plt.show()

plot_embedding

plot_embedding(
    df: DataFrame,
    groups: Optional[Dict[str, List[str]]],
    method: str = "UMAP",
    plot_2D: bool = True,
    figsize: Tuple[int, int] = (8, 8),
    size: int = 10,
    title: Optional[str] = None,
    palette: str = "muted",
    **kwargs: object,
) -> None

Do dimension reduction and plot the embeddings onto a 2D plot

Parameters:

Name Type Description Default
df DataFrame

A dataframe to perform dimension reduction

required
groups Optional[Dict[str, List[str]]]

A dict indicating the groups

required
method str

The name of used method, could be: PCA, TSNE, UMAP, Isomap, MDS, SpectralEmbedding, LocallyLinearEmbedding

'UMAP'
plot_2D bool

Draw a 2D or 3D (if false) plot

True
figsize Tuple[int, int]

The figure size of the plot: (width, height)

(8, 8)
title Optional[str]

The subplot's title

None
palette str

The name of used seaborn color palette, reference: https://seaborn.pydata.org/generated/seaborn.color_palette.html

'muted'
kwargs object
{}

Returns:

Type Description
None
Source code in scTenifold/plotting/_plotting.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def plot_embedding(df: pd.DataFrame,
                   groups: Optional[Dict[str, List[str]]],
                   method: str = "UMAP",
                   plot_2D: bool = True,
                   figsize: Tuple[int, int] = (8, 8),
                   size: int = 10,
                   title: Optional[str] = None,
                   palette: str = "muted",
                   **kwargs: object) -> None:
    """
    Do dimension reduction and plot the embeddings onto a 2D plot

    Parameters
    ----------
    df: pd.DataFrame
        A dataframe to perform dimension reduction
    groups: dict(str, list)
        A dict indicating the groups
    method: str
        The name of used method, could be: PCA, TSNE, UMAP, Isomap, MDS, SpectralEmbedding, LocallyLinearEmbedding
    plot_2D: bool
        Draw a 2D or 3D (if false) plot
    figsize: tuple of int
        The figure size of the plot: (width, height)
    title: str
        The subplot's title
    palette: str
        The name of used seaborn color palette,
        reference: https://seaborn.pydata.org/generated/seaborn.color_palette.html
    kwargs: keyword arguments of doing dimension reduction

    Returns
    -------
    None
    """

    if method == "PCA":
        feature_df, exp_var_df, component_df = prepare_PCA_dfs(df, **kwargs)
        emb_name = "PC"
    else:
        feature_df = prepare_embedding_dfs(df, reducer=method, **kwargs)
        emb_name = method

    if groups is None:
        groups = {"all": df.columns.to_list()}
    colors = sns.color_palette(palette)
    if plot_2D:
        fig, ax = plt.subplots(figsize=figsize)
    else:
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(111, projection="3d")
    for i, (group_name, sample_names) in enumerate(groups.items()):
        em1, em2 = np.array([feature_df.loc[name, '{} 1'.format(emb_name)] for name in sample_names]), \
                   np.array([feature_df.loc[name, '{} 2'.format(emb_name)] for name in sample_names])

        if plot_2D:
            ax.scatter(em1, em2, s=size, label=group_name, c=[colors[i]])
        else:
            em3 = np.array([feature_df.loc[name, '{} 3'.format(emb_name)] for name in sample_names])
            ax.scatter(em1, em2, em3, s=size, label=group_name, c=[colors[i]])

    x_label = '{} 1'.format(emb_name)
    y_label = '{} 2'.format(emb_name)
    z_label = None if plot_2D else '{} 3'.format(emb_name)

    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    if z_label is not None:
        ax.set_zlabel(z_label)
    if title is not None:
        ax.set_title(title)
    ax.legend()
    ax.grid()
    plt.tight_layout()
    plt.show()

Network Plots

plot_network_graph

plot_network_graph(
    network: ndarray,
    weight_thres: float = 0.1,
    con_thres: float = 0,
) -> None

Plot graph of a PCnet

Parameters:

Name Type Description Default
network ndarray

A pc net

required
weight_thres float

Minimum threshold of the pcnet's weights

0.1
con_thres float

Minimum threshold of sum of weights

0

Returns:

Type Description
None
Source code in scTenifold/plotting/_plotting.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def plot_network_graph(network: np.ndarray,
                       weight_thres: float = 0.1,
                       con_thres: float = 0) -> None:
    """
    Plot graph of a PCnet

    Parameters
    ----------
    network: np.ndarray
        A pc net
    weight_thres: float
        Minimum threshold of the pcnet's weights
    con_thres: float or int
        Minimum threshold of sum of weights
    Returns
    -------
    None
    """
    network = abs(network.copy())
    network[network < weight_thres] = 0
    valid_rows, valid_cols = (network.sum(axis=1) > con_thres), (network.sum(axis=0) > con_thres)
    network = network[valid_rows,:][:, valid_cols]
    G = nx.from_numpy_array(network)
    pos = nx.kamada_kawai_layout(G)
    fig, ax = plt.subplots(figsize=(8, 8))
    nx.draw_networkx_edges(G, pos,
                           ax=ax, nodelist=[0], alpha=0.4)
    nx.draw_networkx_nodes(G, pos,
                           ax=ax,
                           node_size=10,
                           cmap=plt.cm.Reds_r)
    plt.show()

plot_network_heatmap

plot_network_heatmap(
    network: ndarray, figsize: Tuple[int, int] = (12, 12)
) -> None

Plot a heatmap of a PC network

Parameters:

Name Type Description Default
network ndarray

A pcnet

required
figsize Tuple[int, int]

output figure size

(12, 12)

Returns:

Type Description
None
Source code in scTenifold/plotting/_plotting.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def plot_network_heatmap(network: np.ndarray,
                         figsize: Tuple[int, int] = (12, 12)) -> None:
    """
    Plot a heatmap of a PC network

    Parameters
    ----------
    network: np.ndarray
        A pcnet
    figsize: tuple of ints
        output figure size
    Returns
    -------
    None
    """
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(network, center=0.0, ax=ax)

Embedding Preparation

prepare_PCA_dfs

prepare_PCA_dfs(
    feature_df: DataFrame,
    transform_func: Optional[
        Callable[[DataFrame], DataFrame]
    ] = None,
    n_components: Optional[int] = None,
    standardize: bool = True,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]

Run PCA on a genes-by-cells DataFrame.

Parameters:

Name Type Description Default
feature_df DataFrame

Input expression DataFrame (rows are features, columns are samples).

required
transform_func Optional[Callable[[DataFrame], DataFrame]]

Optional pre-PCA transform applied to feature_df.

None
n_components Optional[int]

Number of components; defaults to min(n_samples, n_features).

None
standardize bool

If True, z-score columns before PCA.

True

Returns:

Type Description
Tuple ``(scores, explained_variance, loadings)`` as DataFrames.
Source code in scTenifold/plotting/_dim_reduction.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def prepare_PCA_dfs(feature_df: pd.DataFrame,
                    transform_func: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None,
                    n_components: Optional[int] = None,
                    standardize: bool = True) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """Run PCA on a genes-by-cells DataFrame.

    Parameters
    ----------
    feature_df
        Input expression DataFrame (rows are features, columns are samples).
    transform_func
        Optional pre-PCA transform applied to ``feature_df``.
    n_components
        Number of components; defaults to ``min(n_samples, n_features)``.
    standardize
        If True, z-score columns before PCA.

    Returns
    -------
    Tuple ``(scores, explained_variance, loadings)`` as DataFrames.
    """
    if transform_func is not None:
        x = transform_func(feature_df)
    else:
        x = feature_df
    x = StandardScaler().fit_transform(x.values.T) if standardize else x.values.T
    pca = PCA(n_components=n_components)
    if not n_components:
        n_components = min(x.shape[0], x.shape[1])
    principal_components = pca.fit_transform(x)
    final_df = pd.DataFrame(data=principal_components,
                            columns=[f'PC {num + 1}' for num in range(principal_components.shape[1])],
                            index=feature_df.columns)
    exp_var_df = pd.DataFrame(data=pca.explained_variance_ratio_,
                              index=[f'PC {num + 1}' for num in range(n_components)])
    component_df = pd.DataFrame(data=pca.components_.T,
                                columns=[f'PC {num + 1}' for num in range(n_components)],
                                index=feature_df.index)
    return final_df, exp_var_df, component_df

prepare_embedding_dfs

prepare_embedding_dfs(
    feature_df: DataFrame,
    transform_func: Optional[
        Callable[[ndarray], ndarray]
    ] = None,
    n_components: int = 2,
    reducer: Union[str, Reducer] = "TSNE",
    standardize: bool = True,
    **kwargs: object,
) -> pd.DataFrame

Run a non-PCA dimensionality reducer on a feature DataFrame.

Parameters:

Name Type Description Default
feature_df DataFrame

Input expression DataFrame (features x samples).

required
transform_func Optional[Callable[[ndarray], ndarray]]

Optional pre-embedding transform applied to feature_df.values.

None
n_components int

Number of embedding dimensions.

2
reducer Union[str, Reducer]

Reducer name or :class:Reducer member. "UMAP" requires the optional umap-learn package.

'TSNE'
standardize bool

If True, z-score columns before reduction.

True
**kwargs object

Forwarded to the underlying reducer class.

{}

Returns:

Type Description
Sample-by-component DataFrame.
Source code in scTenifold/plotting/_dim_reduction.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def prepare_embedding_dfs(feature_df: pd.DataFrame,
                          transform_func: Optional[Callable[[np.ndarray], np.ndarray]] = None,
                          n_components: int = 2,
                          reducer: Union[str, "Reducer"] = "TSNE",
                          standardize: bool = True, **kwargs: object) -> pd.DataFrame:
    """Run a non-PCA dimensionality reducer on a feature DataFrame.

    Parameters
    ----------
    feature_df
        Input expression DataFrame (features x samples).
    transform_func
        Optional pre-embedding transform applied to ``feature_df.values``.
    n_components
        Number of embedding dimensions.
    reducer
        Reducer name or :class:`Reducer` member. ``"UMAP"`` requires
        the optional ``umap-learn`` package.
    standardize
        If True, z-score columns before reduction.
    **kwargs
        Forwarded to the underlying reducer class.

    Returns
    -------
    Sample-by-component DataFrame.
    """
    if transform_func:
        x = transform_func(feature_df.values)
    else:
        x = feature_df.values
    if isinstance(reducer, str):
        reducer = Reducer(reducer)
    sample_names = feature_df.columns.to_list()
    x = StandardScaler().fit_transform(x.T) if standardize else x.T
    if reducer == Reducer.UMAP:
        try:
            from importlib import import_module
            umap = import_module("umap")
        except ImportError as exc:
            raise ImportError("Install umap-learn to use reducer='UMAP'.") from exc
        reducer_cls = umap.UMAP
    else:
        reducer_cls = REDUCER_DICT[reducer]
    X_embedded = reducer_cls(n_components=n_components, **kwargs).fit_transform(x)
    df = pd.DataFrame(X_embedded,
                      columns=["{reducer} {i}".format(reducer=reducer.value, i=i) for i in range(1, n_components + 1)],
                      index=sample_names)
    return df