def score_and_mask_pixels(
adata: AnnData,
layer: str,
k: int,
method: Literal["gauss", "moran", "EM", "EM+gauss", "EM+BP", "VI+gauss", "VI+BP"],
moran_kwargs: Optional[dict] = None,
em_kwargs: Optional[dict] = None,
vi_kwargs: Optional[dict] = None,
bp_kwargs: Optional[dict] = None,
threshold: Optional[float] = None,
use_knee: Optional[bool] = False,
mk: Optional[int] = None,
bins_layer: Optional[Union[Literal[False], str]] = None,
certain_layer: Optional[str] = None,
scores_layer: Optional[str] = None,
mask_layer: Optional[str] = None,
):
Segmentation approaches
All approaches follow the following general steps.
- Apply a 2D convolution (which may be Gaussian or summation) to the UMI count image. The size of the convolution is controlled with the
k
parameter. - Obtain per-pixel scores, usually in the range
[0, 1]
, indicating how likely each pixel is occupied by a cell. - Apply a threshold to these scores, which is either computed using Otsu’s method or manually provided with the
threshold
parameter. - Apply morphological opening and closing with size
mk
to fill in holes and remove noise. By default, this value is set tok+2
when using the Negative binomial mixture model, otherwise tok-2
.
Each of the supported methods differ in how the per-pixel scores (step 2) are calculated.
- Gaussian Blur
- Moran’s I
- NB mixture model
- Belief Propgadation
The document description about the score_pixels
# All methods other than gauss requires EM
if method == "gauss":
# For just "gauss" method, we should rescale to [0, 1] because all the
# other methods eventually produce an array of [0, 1] values.
res = utils.scale_to_01(res)
elif method == "moran":
res = moran.run_moran(res, mask=None if bins is None else bins > 0, **moran_kwargs)
# Rescale
res /= res.max()
else:
# Obtain initial parameter estimates with Otsu thresholding.
# These may be overridden by providing the appropriate kwargs.
nb_kwargs = dict(params=_initial_nb_params(res, bins=bins))
if "em" in method:
nb_kwargs.update(em_kwargs)
lm.main_debug(f"Running EM with kwargs {nb_kwargs}.")
em_results = em.run_em(res, bins=bins, **nb_kwargs)
conditional_func = partial(em.conditionals, em_results=em_results, bins=bins)
else:
nb_kwargs.update(vi_kwargs)
lm.main_debug(f"Running VI with kwargs {nb_kwargs}.")
vi_results = vi.run_vi(res, bins=bins, **nb_kwargs)
conditional_func = partial(vi.conditionals, vi_results=vi_results, bins=bins)
if "bp" in method:
lm.main_debug("Computing conditionals.")
background_cond, cell_cond = conditional_func(res)
if certain_mask is not None:
background_cond[certain_mask] = 1e-2
cell_cond[certain_mask] = 1 - (1e-2)
lm.main_debug(f"Running BP with kwargs {bp_kwargs}.")
res = bp.run_bp(background_cond, cell_cond, **bp_kwargs)
else:
lm.main_debug("Computing confidences.")
res = em.confidence(res, em_results=em_results, bins=bins)
if certain_mask is not None:
res = np.clip(res + certain_mask, 0, 1)
if "gauss" in method:
lm.main_debug("Computing Gaussian blur.")
res = utils.conv2d(res, k, mode="gauss", bins=bins)
"""
Score each pixel by how likely it is a cell. Values returned are in [0, 1].
Args:
X: UMI counts per pixel as either a sparse or dense array.
k: Kernel size for convolution.
method: Method to use. Valid methods are:
gauss: Gaussian blur
moran: Moran's I based method
EM: EM algorithm to estimate cell and background expression
parameters.
EM+gauss: Negative binomial EM algorithm followed by Gaussian blur.
EM+BP: EM algorithm followed by belief propagation to estimate the
marginal probabilities of cell and background.
VI+gauss: Negative binomial VI algorithm followed by Gaussian blur.
Note that VI also supports the zero-inflated negative binomial (ZINB)
by providing `zero_inflated=True`.
VI+BP: VI algorithm followed by belief propagation. Note that VI also
supports the zero-inflated negative binomial (ZINB) by providing
`zero_inflated=True`.
moran_kwargs: Keyword arguments to the :func:`moran.run_moran` function.
em_kwargs: Keyword arguments to the :func:`em.run_em` function.
bp_kwargs: Keyword arguments to the :func:`bp.run_bp` function.
certain_mask: A boolean Numpy array indicating which pixels are certain
to be occupied, a-priori. For example, if nuclei staining is available,
this would be the nuclei segmentation mask.
bins: Pixel bins to segment separately. Only takes effect when the EM
algorithm is run.
Returns:
[0, 1] score of each pixel being a cell.
"""
score_and_mask_pixels() code
X = SKM.select_layer_data(adata, layer, make_dense=True)
certain_mask = None
if certain_layer:
certain_mask = SKM.select_layer_data(adata, certain_layer).astype(bool)
bins = None
if bins_layer is not False:
bins_layer = bins_layer or SKM.gen_new_layer_key(layer, SKM.BINS_SUFFIX)
if bins_layer in adata.layers:
bins = SKM.select_layer_data(adata, bins_layer)
method = method.lower()
lm.main_info(f"Scoring pixels with {method} method.")
scores = _score_pixels(X, k, method, moran_kwargs, em_kwargs, vi_kwargs, bp_kwargs, certain_mask, bins)
scores_layer = scores_layer or SKM.gen_new_layer_key(layer, SKM.SCORES_SUFFIX)
SKM.set_layer_data(adata, scores_layer, scores)
The packages is using SKM to manage its data_loading and data_saving.
SKM.select_layer_data(adata, layer, make_dense=True)
means to select the layer of anndata and using dense matrix rather than sparse matrix.
SKM.set_layer_data(adata, scores_layer, scores)
means to save the data in layer of anndata and name it scores_layer, scores is the data matrix.
Practice problems
Read the file
There are many read file function in st.io
, but all of the function are warp of st.io.read_bgi_agg
.
adata = st.io.read_bgi_agg("D2_bgi_new.tsv", "CN13_D2_HE.tiff",prealigned=False)
# prealinged=False to tell the tsv to align with image
Obtain cellxgene adata
cell_adata = st.io.read_bgi(
# 'SS200000135TL_D1_all_bin1.txt.gz',
"D2_bgi_new.tsv",
segmentation_adata=adata,
labels_layer='watershed_labels',
add_props=True,
)
Drawing
# you could use the mathplotlib.pyplot to draw the img of interest
# and here is the default setting for spateo
%config InlineBackend.print_figure_kwargs = {'facecolor' : "w"}
%config InlineBackend.figure_format = 'retina'
Drawing different layers in a picture
You could use axes
in pyplot to draw figure
Note, for the adata
directly read from
fig, axes = plt.subplots(figsize=(8, 8), tight_layout=True)
# The [x:x+x_delta, y:y+y_delta] could used to observe the ROI
st.pl.imshow(adata[400:400+520,1100:1100+667], 'stain', ax=axes, use_scale=False, save_show_or_return = 'return', cmap = clr.LinearSegmentedColormap.from_list('custom blue',['#000000FF', '#FFFFFFFF'], N=256))
st.pl.imshow(adata[400:400+520,1100:1100+667], 'cell_labels_boundary', ax=axes, alpha=1, cmap=clr.LinearSegmentedColormap.from_list('custom blue', ['#FFFFFF00','#FF0000FF'], N=256), use_scale=False, save_show_or_return='return')
plt.show()
# fig.savefig("{path}/{name_of_file}.pdf/.png", dpi=300)
Import the figure or cell mask from other methods
img = plt.imread("{file}")
# img = np.flip(img, axis=0)
y_int = np.array(list(adata.var_names),dtype=np.int16)
x_int = np.array(list(adata.obs_names),dtype=np.int16)
y_max, y_min = y_int.max(), y_int.min()
x_max, x_min = x_int.max(), x_int.min()
print(y_max, y_min, x_max, x_min)
# set the weight of border
ax.spines['top'].set_linewidth(2.5)
ax.spines['bottom'].set_linewidth(2.5)
ax.spines['left'].set_linewidth(2.5)
ax.spines['right'].set_linewidth(2.5)
Statistic Test
# ranksums
from scipy.stats import ranksums
ranksums(df[df["cell_or_bin"] == "Cell"]["density"], df[df["cell_or_bin"] == "Cytoplasma"]["density"])
ranksums(df[df["cell_or_bin"] == "Cell"]["n_counts"], df[df["cell_or_bin"] == "Cell"]["n_counts"])
# T-test
from scipy.stats import ttest_ind
ttest_ind(df[df["cell_or_bin"] == "Cell"]["n_counts"], df[df["cell_or_bin"] == "Cytoplasma"]["n_counts"])