运行时错误：索引超出范围，取 theta 的子集

如何解决运行时错误：索引超出范围，取 theta 的子集

我有一个如下所示的数据框：

y   s
1   cat
0   cat
0   dog
0   dog
0   dog
0   dog
0   dog
0   dog
0   dog
1   dog
1   cat
0   dog
0   dog
1   dog
0   dog
1   dog
0   dog
0   dog
0   dog
0   dog
0   dog
0   dog
1   cat
1   dog
0   cat
1   dog
0   dog
1   dog
1   cat
1   dog
1   dog
1   dog
1   cat
1   cat
1   dog
1   dog
0   cat
1   dog
0   dog
0   cat
1   dog
1   dog
0   dog
1   dog
0   cat
1   dog
1   dog
1   cat
1   cat
1   dog
0   dog
1   cat
1   dog
0   dog
0   dog
1   dog
1   cat
1   dog
0   dog
0   dog
0   dog
0   dog
1   cat
1   dog
1   dog
1   dog
1   cat
1   cat
1   dog
1   dog
0   cat
1   dog
0   dog
0   cat
1   dog
1   dog
0   dog
1   dog
0   cat
1   dog
1   dog
1   cat
1   cat
1   dog
0   dog
1   cat
1   dog
0   dog
0   dog
1   dog
1   cat
1   dog
0   dog
0   dog
0   dog
0   dog
1   cat

我正在使用 jags 执行 MCMC 算法，但我总是收到错误：

Error in jags.model("TEMPmodel.txt",data = dataList,inits = initsList,: RUNTIME ERROR: Compilation error on line 4. Index out of range taking subset of theta

这是源文件：

Ssubj-Mbernbeta.R 
# Accompanies the book:
#   Kruschke,J. K. (2014). Doing Bayesian Data Analysis: 
#   A Tutorial with R,JAGS,and Stan. 2nd Edition. Academic Press / Elsevier.
source("DBDA2E-utilities.R")
#===============================================================================

genMCMC = function( data,numSavedSteps=50000,saveName=NULL ) { 
  require(rjags)
  #-----------------------------------------------------------------------------
  # THE DATA.
  # N.B.: This function expects the data to be a data frame,# with one component named y being a vector of integer 0,1 values,# and one component named s being a factor of subject identifiers.
  y = data$y
  s = as.numeric(data$s) # converts character to consecutive integer levels
  # Do some checking that data make sense:
  if ( any( y!=0 & y!=1 ) ) { stop("All y values must be 0 or 1.") }
  Ntotal = length(y)
  Nsubj = length(unique(s))
  # Specify the data in a list,for later shipment to JAGS:
  dataList = list(
    y = y,s = s,Ntotal = Ntotal,Nsubj = Nsubj
  )
  #-----------------------------------------------------------------------------
  # THE MODEL.
  modelString = "
  model {
    for ( i in 1:Ntotal ) {
      y[i] ~ dbern( theta[s[i]] )
    }
    for ( sIdx in 1:Nsubj ) {
      theta[sIdx] ~ dbeta( 2,2 ) # N.B.: 2,2 prior; change as appropriate.
    }
  }
  "
  # close quote for modelString
  writeLines( modelString,con="TEMPmodel.txt" )
  #-----------------------------------------------------------------------------
  # INTIALIZE THE CHAINS.
  # Initial values of MCMC chains based on data:
  # Option 1: Use single initial value for all chains:
  #  thetaInit = rep(0,Nsubj)
  #  for ( sIdx in 1:Nsubj ) { # for each subject
  #    includeRows = ( s == sIdx ) # identify rows of this subject
  #    yThisSubj = y[includeRows]  # extract data of this subject
  #    thetaInit[sIdx] = sum(yThisSubj)/length(yThisSubj) # proportion
  #  }
  #  initsList = list( theta=thetaInit )
  # Option 2: Use function that generates random values near MLE:
  initsList = function() {
    thetaInit = rep(0,Nsubj)
    for ( sIdx in 1:Nsubj ) { # for each subject
      includeRows = ( s == sIdx ) # identify rows of this subject
      yThisSubj = y[includeRows]  # extract data of this subject
      resampledY = sample( yThisSubj,replace=TRUE ) # resample
      thetaInit[sIdx] = sum(resampledY)/length(resampledY) 
    }
    thetaInit = 0.001+0.998*thetaInit # keep away from 0,1
    return( list( theta=thetaInit ) )
  }
  #-----------------------------------------------------------------------------
  # RUN THE CHAINS
  parameters = c( "theta")     # The parameters to be monitored
  adaptSteps = 500             # Number of steps to adapt the samplers
  burnInSteps = 500            # Number of steps to burn-in the chains
  nChains = 4                  # nChains should be 2 or more for diagnostics 
  thinSteps = 1
  nIter = ceiling( ( numSavedSteps * thinSteps ) / nChains )
  # Create,initialize,and adapt the model:
  jagsModel = jags.model( "TEMPmodel.txt",data=dataList,inits=initsList,n.chains=nChains,n.adapt=adaptSteps )
  # Burn-in:
  cat( "Burning in the MCMC chain...\n" )
  update( jagsModel,n.iter=burnInSteps )
  # The saved MCMC chain:
  cat( "Sampling final MCMC chain...\n" )
  codaSamples = coda.samples( jagsModel,variable.names=parameters,n.iter=nIter,thin=thinSteps )
  # resulting codaSamples object has these indices: 
  #   codaSamples[[ chainIdx ]][ stepIdx,paramIdx ]
  if ( !is.null(saveName) ) {
    save( codaSamples,file=paste(saveName,"Mcmc.Rdata",sep="") )
  }
  return( codaSamples )
} # end function

#===============================================================================

smryMCMC = function(  codaSamples,compVal=0.5,rope=NULL,compValDiff=0.0,ropeDiff=NULL,saveName=NULL ) {
  mcmcMat = as.matrix(codaSamples,chains=TRUE)
  Ntheta = length(grep("theta",colnames(mcmcMat)))
  summaryInfo = NULL
  rowIdx = 0
  for ( tIdx in 1:Ntheta ) {
    parName = paste0("theta[",tIdx,"]")
    summaryInfo = rbind( summaryInfo,summarizePost( mcmcMat[,parName],compVal=compVal,ROPE=rope ) )
    rowIdx = rowIdx+1
    rownames(summaryInfo)[rowIdx] = parName
  }
  for ( t1Idx in 1:(Ntheta-1) ) {
    for ( t2Idx in (t1Idx+1):Ntheta ) {
      parName1 = paste0("theta[",t1Idx,"]")
      parName2 = paste0("theta[",t2Idx,"]")
      summaryInfo = rbind( summaryInfo,parName1]-mcmcMat[,parName2],compVal=compValDiff,ROPE=ropeDiff ) )
      rowIdx = rowIdx+1
      rownames(summaryInfo)[rowIdx] = paste0(parName1,"-",parName2)
    }
  }
  if ( !is.null(saveName) ) {
    write.csv( summaryInfo,"SummaryInfo.csv",sep="") )
  }
  show( summaryInfo )
  return( summaryInfo )
}

#===============================================================================

plotMCMC = function( codaSamples,data,saveName=NULL,saveType="jpg" ) {
  #-----------------------------------------------------------------------------
  # N.B.: This function expects the data to be a data frame,# and one component named s being a factor of subject identifiers.
  y = data$y
  s = as.numeric(data$s) # converts character to consecutive integer levels
  # Now plot the posterior:
  mcmcMat = as.matrix(codaSamples,chains=TRUE)
  chainLength = NROW( mcmcMat )
  Ntheta = length(grep("theta",colnames(mcmcMat)))
  openGraph(width=2.5*Ntheta,height=2.0*Ntheta)
  par( mfrow=c(Ntheta,Ntheta) )
  for ( t1Idx in 1:(Ntheta) ) {
    for ( t2Idx in (1):Ntheta ) {
      parName1 = paste0("theta[","]")
      if ( t1Idx > t2Idx) {  
        # plot.new() # empty plot,advance to next
        par( mar=c(3.5,3.5,1,1),mgp=c(2.0,0.7,0) )
        nToPlot = 700
        ptIdx = round(seq(1,chainLength,length=nToPlot))
        plot ( mcmcMat[ptIdx,mcmcMat[ptIdx,parName1],cex.lab=1.75,xlab=parName2,ylab=parName1,col="skyblue" )
      } else if ( t1Idx == t2Idx ) {
        par( mar=c(3.5,0) )
        postInfo = plotPost( mcmcMat[,cex.lab = 1.75,ROPE=rope,cex.main=1.5,xlab=parName1,main="" )
        includeRows = ( s == t1Idx ) # identify rows of this subject in data
        dataPropor = sum(y[includeRows])/sum(includeRows) 
        points( dataPropor,pch="+",col="red",cex=3 )
      } else if ( t1Idx < t2Idx ) {
        par( mar=c(3.5,0) )
        postInfo = plotPost(mcmcMat[,ROPE=ropeDiff,xlab=paste0(parName1,parName2),main="" )
        includeRows1 = ( s == t1Idx ) # identify rows of this subject in data
        dataPropor1 = sum(y[includeRows1])/sum(includeRows1) 
        includeRows2 = ( s == t2Idx ) # identify rows of this subject in data
        dataPropor2 = sum(y[includeRows2])/sum(includeRows2) 
        points( dataPropor1-dataPropor2,cex=3 )
      }
    }
  }
  #-----------------------------------------------------------------------------  
  if ( !is.null(saveName) ) {
    saveGraph( file=paste(saveName,"Post",sep=""),type=saveType)
  }
}

#===============================================================================

这是我从原始源文件中使用的代码：

# Load the data
myData = read.csv("dogcats_polliberal.csv") # myData is a data frame.

# Load the functions genMCMC,smryMCMC,and plotMCMC:
source("Jags-Ydich-XnomSsubj-MbernBeta.R")
# Generate the MCMC chain:
mcmcCoda = genMCMC( data = myData,numSavedSteps=50000 )
# display diagnostics of chain,for specified parameter:
diagMCMC( mcmcCoda,parName="theta[1]" )
# display numerical summary statistics of chain:
smryMCMC( mcmcCoda,compVal=NULL,compValDiff=0.0 )
# display graphical posterior information:
plotMCMC( mcmcCoda,data=myData,compValDiff=0.0 )

回溯错误来自原始源文件中的行

jagsModel = jags.model( "TEMPmodel.txt",n.adapt=adaptSteps )

谁能帮我弄清楚为什么我会收到错误：

此外，我在数据文件中将 s 变量更改为数字，但我没有在此处显示。将其保留为字符也会导致另一个错误。

解决方法

我使用的 R 版本不支持代码。源文件是使用 R 版本 4.0.0 及以下版本构建的。

问题可能是这样的：

DBDA2E 附带的脚本运行良好，“开箱即用”，多年。但是最近一些脚本出现了问题。为什么？ R 有改变了。在 R 4.0.0 中，read.csv() 等各种函数不再自动将字符串转换为因子。假设了一些 DBDA2E 脚本这些函数的结果包含因素，但如果你现在使用 R 4.0.0（或更新版本）这些脚本会犹豫不决。

那么，怎么办？这是一个临时修复。当您打开 R 会话时，输入这个全局选项：options(stringsAsFactors = TRUE)

不幸的是，此选项最终将被弃用。我将不得不修改每个受影响的脚本并发布更新版本。这会总有一天会发生。我希望。

为什么R会改变？你可以在这里阅读：

https://developer.r-project.org/Blog/public/2020/02/16/stringsasfactors/index.html

以上文字复制自这篇博文，http://doingbayesiandataanalysis.blogspot.com/2020/09/fixing-new-problem-in-some-dbda2e.html

我以前遇到过这个问题，有两种可能的解决方案（都在不同的场合发生在我身上）：

dbern(0) 未定义。

尝试使用此处概述的（非常非常小的）校正因子重新运行：

    for ( i in 1:Ntotal ) {
      y[i] ~ dbern( theta[s[i]]**+0.00000000001** )
    }
    for ( sIdx in 1:Nsubj ) {
      theta[sIdx] ~ dbeta( 2,2 ) # N.B.: 2,2 prior; change as appropriate.
    }
  }
  "

这应该可以解决问题。

另一种选择是您在列表、数据框、输入变量等中错误指定了某些内容。

我不太确定 JAGS 是否正确理解了粗体部分 1:Nsubj 是 1:2，也许可以尝试在 c("cat","dog") 中使用名称 i 或类似的名称。

    for ( i in 1:Ntotal ) {
      y[i] ~ dbern( theta[s[i]] )
    }
    for ( **sIdx in 1:Nsubj** ) {
      theta[sIdx] ~ dbeta( 2,2 prior; change as appropriate.
    }
  }
  "

这些示例很棒，可以使您的模型明确，但有时一些上下文（即您的模型应该做什么？？）会有所帮助。由于这是一个教科书示例，我怀疑您是否存在严重的先前错误指定或类似问题。但是，如果您提供一些关于为什么使用最终具有相同结构的数字和字符列的上下文，那就太好了！