spateo

Publish Date: 2022-09-16

Word Count: 1.1k

Read Count:

def score_and_mask_pixels(
    adata: AnnData,
    layer: str,
    k: int,
    method: Literal["gauss", "moran", "EM", "EM+gauss", "EM+BP", "VI+gauss", "VI+BP"],
    moran_kwargs: Optional[dict] = None,
    em_kwargs: Optional[dict] = None,
    vi_kwargs: Optional[dict] = None,
    bp_kwargs: Optional[dict] = None,
    threshold: Optional[float] = None,
    use_knee: Optional[bool] = False,
    mk: Optional[int] = None,
    bins_layer: Optional[Union[Literal[False], str]] = None,
    certain_layer: Optional[str] = None,
    scores_layer: Optional[str] = None,
    mask_layer: Optional[str] = None,
):

Segmentation approaches

All approaches follow the following general steps.

Apply a 2D convolution (which may be Gaussian or summation) to the UMI count image. The size of the convolution is controlled with the k parameter.
Obtain per-pixel scores, usually in the range [0, 1], indicating how likely each pixel is occupied by a cell.
Apply a threshold to these scores, which is either computed using Otsu’s method or manually provided with the threshold parameter.
Apply morphological opening and closing with size mk to fill in holes and remove noise. By default, this value is set to k+2 when using the Negative binomial mixture model, otherwise to k-2.

Each of the supported methods differ in how the per-pixel scores (step 2) are calculated.

Gaussian Blur
Moran’s I
NB mixture model
Belief Propgadation

The document description about the `score_pixels`

# All methods other than gauss requires EM
    if method == "gauss":
        # For just "gauss" method, we should rescale to [0, 1] because all the
        # other methods eventually produce an array of [0, 1] values.
        res = utils.scale_to_01(res)
    elif method == "moran":
        res = moran.run_moran(res, mask=None if bins is None else bins > 0, **moran_kwargs)
        # Rescale
        res /= res.max()
    else:
        # Obtain initial parameter estimates with Otsu thresholding.
        # These may be overridden by providing the appropriate kwargs.
        nb_kwargs = dict(params=_initial_nb_params(res, bins=bins))
        if "em" in method:
            nb_kwargs.update(em_kwargs)
            lm.main_debug(f"Running EM with kwargs {nb_kwargs}.")
            em_results = em.run_em(res, bins=bins, **nb_kwargs)
            conditional_func = partial(em.conditionals, em_results=em_results, bins=bins)
        else:
            nb_kwargs.update(vi_kwargs)
            lm.main_debug(f"Running VI with kwargs {nb_kwargs}.")
            vi_results = vi.run_vi(res, bins=bins, **nb_kwargs)
            conditional_func = partial(vi.conditionals, vi_results=vi_results, bins=bins)

        if "bp" in method:
            lm.main_debug("Computing conditionals.")
            background_cond, cell_cond = conditional_func(res)
            if certain_mask is not None:
                background_cond[certain_mask] = 1e-2
                cell_cond[certain_mask] = 1 - (1e-2)
            lm.main_debug(f"Running BP with kwargs {bp_kwargs}.")
            res = bp.run_bp(background_cond, cell_cond, **bp_kwargs)
        else:
            lm.main_debug("Computing confidences.")
            res = em.confidence(res, em_results=em_results, bins=bins)
            if certain_mask is not None:
                res = np.clip(res + certain_mask, 0, 1)

        if "gauss" in method:
            lm.main_debug("Computing Gaussian blur.")
            res = utils.conv2d(res, k, mode="gauss", bins=bins)

"""
Score each pixel by how likely it is a cell. Values returned are in [0, 1].
    Args:
        X: UMI counts per pixel as either a sparse or dense array.
        k: Kernel size for convolution.
        method: Method to use. Valid methods are:
            gauss: Gaussian blur
            moran: Moran's I based method
            EM: EM algorithm to estimate cell and background expression
                parameters.
            EM+gauss: Negative binomial EM algorithm followed by Gaussian blur.
            EM+BP: EM algorithm followed by belief propagation to estimate the
                marginal probabilities of cell and background.
            VI+gauss: Negative binomial VI algorithm followed by Gaussian blur.
                Note that VI also supports the zero-inflated negative binomial (ZINB)
                by providing `zero_inflated=True`.
            VI+BP: VI algorithm followed by belief propagation. Note that VI also
                supports the zero-inflated negative binomial (ZINB) by providing
                `zero_inflated=True`.
        moran_kwargs: Keyword arguments to the :func:`moran.run_moran` function.
        em_kwargs: Keyword arguments to the :func:`em.run_em` function.
        bp_kwargs: Keyword arguments to the :func:`bp.run_bp` function.
        certain_mask: A boolean Numpy array indicating which pixels are certain
            to be occupied, a-priori. For example, if nuclei staining is available,
            this would be the nuclei segmentation mask.
        bins: Pixel bins to segment separately. Only takes effect when the EM
            algorithm is run.

    Returns:
        [0, 1] score of each pixel being a cell.
"""

score_and_mask_pixels() code

X = SKM.select_layer_data(adata, layer, make_dense=True)
    certain_mask = None
    if certain_layer:
        certain_mask = SKM.select_layer_data(adata, certain_layer).astype(bool)
    bins = None
    if bins_layer is not False:
        bins_layer = bins_layer or SKM.gen_new_layer_key(layer, SKM.BINS_SUFFIX)
        if bins_layer in adata.layers:
            bins = SKM.select_layer_data(adata, bins_layer)
    method = method.lower()
    lm.main_info(f"Scoring pixels with {method} method.")
    scores = _score_pixels(X, k, method, moran_kwargs, em_kwargs, vi_kwargs, bp_kwargs, certain_mask, bins)
    scores_layer = scores_layer or SKM.gen_new_layer_key(layer, SKM.SCORES_SUFFIX)
    SKM.set_layer_data(adata, scores_layer, scores)

The packages is using SKM to manage its data_loading and data_saving.

SKM.select_layer_data(adata, layer, make_dense=True) means to select the layer of anndata and using dense matrix rather than sparse matrix.

SKM.set_layer_data(adata, scores_layer, scores) means to save the data in layer of anndata and name it scores_layer, scores is the data matrix.

Practice problems

Read the file

There are many read file function in st.io, but all of the function are warp of st.io.read_bgi_agg.

adata = st.io.read_bgi_agg("D2_bgi_new.tsv", "CN13_D2_HE.tiff",prealigned=False)
# prealinged=False to tell the tsv to align with image

Obtain cellxgene adata

cell_adata = st.io.read_bgi(
    # 'SS200000135TL_D1_all_bin1.txt.gz',
    "D2_bgi_new.tsv",
    segmentation_adata=adata,
    labels_layer='watershed_labels',
    add_props=True,
)

Drawing

# you could use the mathplotlib.pyplot to draw the img of interest
# and here is the default setting for spateo
%config InlineBackend.print_figure_kwargs = {'facecolor' : "w"}
%config InlineBackend.figure_format = 'retina'

Drawing different layers in a picture

You could use axes in pyplot to draw figure

Note, for the adata directly read from

fig, axes = plt.subplots(figsize=(8, 8), tight_layout=True)
# The [x:x+x_delta, y:y+y_delta] could used to observe the ROI
st.pl.imshow(adata[400:400+520,1100:1100+667], 'stain', ax=axes, use_scale=False, save_show_or_return = 'return', cmap = clr.LinearSegmentedColormap.from_list('custom blue',['#000000FF', '#FFFFFFFF'], N=256))
st.pl.imshow(adata[400:400+520,1100:1100+667], 'cell_labels_boundary', ax=axes, alpha=1, cmap=clr.LinearSegmentedColormap.from_list('custom blue', ['#FFFFFF00','#FF0000FF'], N=256), use_scale=False, save_show_or_return='return')
plt.show()
# fig.savefig("{path}/{name_of_file}.pdf/.png", dpi=300)

Import the figure or cell mask from other methods

img = plt.imread("{file}")
# img = np.flip(img, axis=0)
y_int = np.array(list(adata.var_names),dtype=np.int16)
x_int = np.array(list(adata.obs_names),dtype=np.int16)
y_max, y_min = y_int.max(), y_int.min()
x_max, x_min = x_int.max(), x_int.min()
print(y_max, y_min, x_max, x_min)

# set the weight of border
ax.spines['top'].set_linewidth(2.5)
ax.spines['bottom'].set_linewidth(2.5)
ax.spines['left'].set_linewidth(2.5)
ax.spines['right'].set_linewidth(2.5)

Statistic Test

# ranksums
from scipy.stats import ranksums

ranksums(df[df["cell_or_bin"] == "Cell"]["density"], df[df["cell_or_bin"] == "Cytoplasma"]["density"])
ranksums(df[df["cell_or_bin"] == "Cell"]["n_counts"], df[df["cell_or_bin"] == "Cell"]["n_counts"])

# T-test
from scipy.stats import ttest_ind

ttest_ind(df[df["cell_or_bin"] == "Cell"]["n_counts"], df[df["cell_or_bin"] == "Cytoplasma"]["n_counts"])