如何从R中的函数打印多个二项式测试结果

如何解决如何从R中的函数打印多个二项式测试结果

我制作了一个在 R 中进行二项式测试的函数。我在不使用循环（for i 语句）的情况下成功运行了它。然而，我一直试图通过申请 i 声明来简化它，但只完成了我想要的一半。我的功能是：

binom_test_2b = function(spot_id,cluster_id,marker_gene){
# Finding the cluster and marker gene in scRNA-seq dataset
cluster_id  = sample(as.data.frame(Idents(glioblastoma))[,1],1)
marker = FindMarkers(glioblastoma,ident.1 = cluster_id)
marker = cbind(gene = rownames(marker),marker)
rownames(marker) = 1:nrow(marker) 

# Finding the cluster and the spot in spatial dataset
cluster = Idents(glio_spatial)
df_cluster = as.data.frame(cluster)
df_cluster = cbind(spot_id = rownames(df_cluster),df_cluster)
rownames(df_cluster) = 1:nrow(df_cluster)
df_cluster = df_cluster[df_cluster$cluster == sample(df_cluster[,ncol(df_cluster)],1),]
spot_id = sample(df_cluster[,1)
spatial_count = as.data.frame(glio_spatial@assays$Spatial@counts)
spatial_count = cbind(gene = rownames(spatial_count),spatial_count)
rownames(spatial_count) = 1:nrow(spatial_count) 
spatial_count = spatial_count[c("gene",spot_id)]
spatial_count = rename(spatial_count,"count" = spot_id)

#Finding the percentage of the marker gene expressed in the spot
# List of genes with not zero count at the chosen spot
gene_spot  = spatial_count[spatial_count$count != 0,]

#Finding gene intersection between scRNA-seq cluster and spot
intersection = inner_join(gene_spot,marker)

#Finding marker_gene sample
marker_gene = sample(head(intersection[order(intersection[2],decreasing = TRUE),],5)[,5)

for (i in 1:length(marker_gene)){
    ratio_spot = gene_spot[which(gene_spot$gene == marker_gene[i]),][,2]/sum(gene_spot[,ncol(gene_spot)])
    #Finding the percentage of the marker gene expressed outside the cluster
    all_cells_one_gene = glioblastoma@assays$RNA@counts[marker_gene[i],]
    selected_cells_one_gene = all_cells_one_gene[cluster_id!=Idents(glioblastoma)]
    gene_count_out_cluster = glioblastoma@assays$RNA@counts[,cluster_id!=Idents(glioblastoma)]
    ratio_out_cluster = sum(selected_cells_one_gene)/sum(gene_count_out_cluster)
    result_1 = sprintf("The gene %s is a marker gene for scRNA-seq cluster %s and is expressed in the spot %s.",marker_gene,spot_id)
    result_2 = binom.test(as.integer(ratio_spot*sum(gene_spot[,ncol(gene_spot)])),as.integer(sum(gene_spot[,as.integer(ratio_out_cluster))
}
return(list(head(intersection[order(intersection[2],1:2],result_1,result_2))
}

这个结果是：

我希望该函数将打印五个给定标记基因的二项式测试结果（提供标记基因的精确二项式测试 5 次，而不是仅显示一个精确二项式测试）。请任何人告诉我如何修复以获得五个给定标记基因的二项式结果？

我一直在尝试做一些修改，例如result_2[i]、ratio_spot[i]、ratio_out_cluster[i]，但不起作用。

更新#2 参考@Sirius 的回答。我是这样修改的：

binom_test_2c = function(spot_id,5)

l <- lapply( marker_gene,function(gene) {
    ratio_spot = gene_spot[which(gene_spot$gene == gene),ncol(gene_spot)])
    #Finding the percentage of the marker gene expressed outside the cluster
    all_cells_one_gene = glioblastoma@assays$RNA@counts[gene,as.integer(ratio_out_cluster))
    list( result_1=result_1,result_2=result_2)
}
result_1 <- sapply(l,function(el) el$result_1 )
result_2 <- sapply(l,function(el) el$result_2 )

return(
list(
    head(intersection[order(intersection[2],result_2
))
}

更新#3 参考@Sirius 的回答。我得到的结果如下：我试图根据标记基因为 binom 精确结果数据框列命名，以使其易于阅读。

解决方法

这将是一种更有意义的方法：


## change your for block rest of the function to this:

l <- lapply( marker_gene,function(gene) {
    ratio_spot = gene_spot[which(gene_spot$gene == gene),][,2]/sum(gene_spot[,ncol(gene_spot)])
    #Finding the percentage of the marker gene expressed outside the cluster
    all_cells_one_gene = glioblastoma@assays$RNA@counts[gene,]
    selected_cells_one_gene = all_cells_one_gene[cluster_id!=Idents(glioblastoma)]
    gene_count_out_cluster = glioblastoma@assays$RNA@counts[,cluster_id!=Idents(glioblastoma)]
    ratio_out_cluster = sum(selected_cells_one_gene)/sum(gene_count_out_cluster)
    result_1 = sprintf("The gene %s is a marker gene for scRNA-seq cluster %s and is expressed in the spot %s.",marker_gene,cluster_id,spot_id)
    result_2 = binom.test(as.integer(ratio_spot*sum(gene_spot[,ncol(gene_spot)])),as.integer(sum(gene_spot[,as.integer(ratio_out_cluster))
    list( result_1=result_1,result_2=result_2 )
}) # <----- I forgot this closing ')'

## fetch the result_1 strings
result_1 <- sapply( l,function(el) el$result_1 )
result_2 <- sapply( l,function(el) el$result_2 )

return(
    list(
        head(intersection[order(intersection[2],decreasing = TRUE),],5)[,1:2],result_1,result_2
    )
)

使用 lapply 从每个 for 循环中获取内容，将其填充到列表中并稍后解包。还有其他方法，但如果您不知道如何从 for 循环的每次迭代中获取内容，这是一个好的开始。