微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

GCalignR

如何解决GCalignR

尝试获取多个.txt(GC-FID)文件提取两列数据并将其分配给一个对象,我最终可以将该对象传递给GCalignR。有没有更好的方法来为GCalignR处理这些数据?

引起问题的辅助功能

'''

# nonempty strings after splitting a row by one space
get_nonempty_splits = function(row) {
  s = strsplit(row,"[[:space:]]")
  s = s[[1]]
  l = c()
  for (i in 1:length(s)) {
    if (s[i] != "") {
      l = c(l,s[i])
    }
  }
  return(l)
}

# filenames have .txt,names do not
filenames_to_names = function(x) {
  l = c()
  for (i in 1:length(x)) {
    x1 = strsplit(x,"[.]")[[i]][1]
    l = c(l,x1)
  }
  return(l)
}

# get data row indices
get_data_row_inds = function(df) {
  ind_start = 0
  ind_end = 0
  for (i in 1:length(df)) {
    row = df[i]
    # find start
    if (grepl("----",row)) {
      stopifnot(ind_start == 0)  # assert ind_start not set
      ind_start = i+1
    }
    # find end
    if (i == length(df) && ind_end == 0) {
      ind_end = length(df)
    } else if (grepl("Totals",row) && grepl(":",row)) {
      stopifnot(ind_end == 0)  # assert ind_end not set
      ind_end = i-1
    }
  }
  stopifnot(ind_start != 0)
  stopifnot(ind_end != 0)
  return(ind_start:ind_end)
}

'''

'''

path_to_raw_data = "/Users/input"
path_to_processed_data = "."
# get paths for all .txt files in pathToRawData directory
paths = list.files(path_to_raw_data,full.name=TRUE,pattern = ".txt")
filenames = list.files(path_to_raw_data,full.name=FALSE,pattern = ".txt")
names = filenames_to_names(filenames)  # without .txt extension

# get data from text file
processed_data = list()
df_lengths = c()
for (i in 1:length(paths)) {  # i indexes the raw files
  path = paths[i]
  df = read.delim(path,fileEncoding= 'UTF-16LE',header=TRUE)
  df = df[[1]]
  inds = get_data_row_inds(df)
  df_lengths = c(df_lengths,length(inds))
  times = c()
  areas = c()
  for (j in inds) {  # j indexes the data rows of a raw file
    row = df[j]
    row = get_nonempty_splits(row)
    time = row_to_time(row)
    area = row_to_area(row)
    times = c(times,time)
    areas = c(areas,area)
  }
  pairs = data.frame(time = times,area = areas)
  processed_data[[i]] = pairs
}

'''

获取错误:strsplit(row,“ [[:space:]]”)中的错误:非字符参数

任何建议如何解决这个问题?它是文件编码吗?已处理数据= list()什么都不返回...?

输入标题: '''

dput(head(df)) 结构(c(59L,53L,45L,48L,47L,52L),.Label = c(“注入量:1 µl”, “ ***报告结束***”,“面积百分比报告”, “ 1 1.353 1 BB 2.85703 2.453e-5?”, “ 2 1.952 1 BV 4411.39551 0.03787吗?”, “ 3 2.058 1 VV 4693.20215 0.04029吗?”, “ 4 2.089 1 VV 6614.89502 0.05679吗?”, “ 5 2.139 2 0.00000 0.00000 NG”, “ 6 2.452 2 0.00000 0.00000 1,3-DNB”, “ 7 3.149 2 0.00000 0.00000 2,4-DNT”, “ 8 3.315 1 VV S 1.15784e7 99.39858吗?”, “ 9 3.347 1 VV S 5169.44629 0.04438吗?”, “#[min]%”,“ 10 3.372 1 VV S 2.09449e4 0.17981?”, “ 11 3.466 1 VV S 2535.17432 0.02176?”, “ 12 3.547 1 VB S 2.45685e4 0.21092吗?”, “ 13 3.602 1 BV T 451.00174 0.00387?”, “ 14 3.686 1 VV T 40.45324 0.00035?”, “ 15 3.734 1 VV T 13.40936 0.00012?”, “ 16 3.819 1 VB T 508.57788 0.00437?”, “ 17 4.119 1 BB 13.01144 0.00011吗?”, “ 18 4.856 2 0.00000 0.00000 TNT”, “ 19 4.975 2 0.00000 0.00000 TNB”, “ 20 5.549 2 0.00000 0.00000 4-Am-DNT”, “ 21 5.869 2 0.00000 0.00000 RDX”, “ 22 5.943 2 0.00000 0.00000 2-Am-DNT”, “ 23 6.516 2 0.00000 0.00000 Tetryl”, “ 24 11.716 1 BB 1.75858 1.510e-5?”, “ 25 14.243 1 BB 2.55644 2.195e-5?”, “ 26 16.654 1 BB 3.81723 3.277e-5吗?”, “ 27 18.826 1 BB 2.58369 2.218e-5?”, “ 28 20.800 1 BB 1.51171 1.298e-5?”, “ 29 24.159 1 BB 1.78975 1.536e-5?”, “ 30 24.269 1 BB 1.81180 1.555e-5?”, “ 31 25.053 1 BB 2.96617 2.546e-5?”, “ 32 25.658 1 BB 6.15337 5.283e-5?”, “ 33 25.809 1 BB 3.89435 3.343e-5?”, “ 34 26.577 1 BB 4.02199 3.453e-5?”, “ 35 26.885 1 BB 2.48416 2.133e-5?”, “ 36 27.219 1 BB 14.88012 0.00013?”, “ 37 27.465 1 BB 3.59732 3.088e-5?”, “ 38 29.377 1 BB 18.55422 0.00016?”, “ 39 32.554 1 BB 17.15620 0.00015?”, “ --------------- | --- || ---- | ---------- | -------- | ----- --------------------“, “ ================================================= ===================, “ 2警告或错误:”,“购买仪器:仪器1位置:样品瓶11”, “ Acq。操作员:HHV序列行:2”, “ Calib。数据已修改:2019年3月12日,星期二,下午6:13:25”, “稀释度:1.0000”,“请勿在ISTD中使用乘数和稀释系数”, “注入日期:20年2月24日,14:37:34注入量:1”, “乐器1 H2 / 24/2020 3:13:35 HHV”,“最后更改时间:HHV于2/2/2020 12:59:45 PM”, “方法:C:\ Chem32 \ 1 \ DATA \ IPOULIN \ VOC_TEST_1 2020-02-24 13-49-15 \ VOC_TEST_HV.M”, “方法信息:VOC”,“乘数:1.000”, “ Peak RetTime Sig类型区域名称”,“样本名称:P1U1 hex 022420”, “序列文件:C:\ Chem32 \ 1 \ DATA \ IPOULIN \ VOC_TEST_1 2020-02-24 13-49-15 \ VOC_TEST_1.S”, “信号1:FID1 B”,“排序:保留时间”, “总数:1.16485e7”,“警告:未找到校准化合物”, “警告:校准警告(请参阅校准表列表)” ),class =“ factor”) '''

解决方法

通过添加以下内容解决:df = read.delim(path,fileEncoding ='UTF-16LE',header = TRUE,stringsAsFactors = FALSE)。解决方案为“ stringsAsFactors = FALSE”。谢谢。

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。