将RNA_seq原始数据存放在raw_data文件夹,经过去除接头的数据存放在clean_data中。
1、 使用Trim galore软件对两次数据进行质控,去掉20bp以下的reads
vim新建RNA_seq_script_1对2022_08_23测序数据进行质控分析
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 使用fastqc软件对数据进行质控分析
# fastqc -t 8 -o ${dir}/fastqc_report/ ${dir}/raw_data/*.fq.gz
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
# 对数据利用Trim_galore去掉20bp以下的接头
trim_galore -q 20 --phred33 --stringency 3 --length 20 -e 0.1 -j 4 --paired \
${dir}/raw_data/"$i"_Clean_Data1.fq.gz \
${dir}/raw_data/"$i"_Clean_Data2.fq.gz \
-o ${dir}/clean_data/
done
后台运行RNA_seq_script_1:
nohup bash RNA_seq_script_1 > RNA_seq_script_1_log &
2. 使用STAR软件对45S rRNA构建索引、对GRCh38.dna.primary_assembly、GRCh38.ncRNA、GRCh38.cds.all构建索引
# 参数说明
--runThreadN是指你要用几个cpu来运行;
--genomeDir构建索引输出文件的目录;
--genomeFastaFiles你的基因组fasta文件所在的目录
--limitGenomeGenerateraM 43749387189 STAR消耗内存太大,输入限制内存数目防止出错,感谢孙小雨帮忙
STAR --runMode genomeGenerate --runThreadN 16 --limitGenomeGenerateraM 43749387189 --genomeDir /home/customer/lizexing/references/Human_45S/star_index --genomeFastaFiles /home/customer/lizexing/references/Human_45S/U13369.1.fasta
STAR --runMode genomeGenerate --runThreadN 16 --genomeDir /home/customer/lizexing/references/Ensembl/Human \
--genomeFastaFiles /home/customer/lizexing/references/Ensembl/Human/Homo_sapiens.GRCh38.dna.primary_assembly.fa
STAR --runMode genomeGenerate --runThreadN 16 --limitGenomeGenerateraM 43749387189 \
--genomeDir /home/customer/lizexing/references/Ensembl/Human/star_ncrna_index/ \
--genomeFastaFiles /home/customer/lizexing/references/Ensembl/Human/Homo_sapiens.GRCh38.ncrna.fa
STAR --runMode genomeGenerate --runThreadN 8 --limitGenomeGenerateraM 82424365322 \
--genomeDir /home/customer/lizexing/references/Ensembl/Human/star_cds_index/ \
--genomeFastaFiles /home/customer/lizexing/references/Ensembl/Human/Homo_sapiens.GRCh38.cds.all.fa
3. 使用STAR软件对测序数据与45S rRNA进行比对
vim新建RNA_seq_script_2对2022_08_23测序数据进行处理
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
STAR --runThreadN 8 --runMode alignReads --readFilesCommand zcat --quantMode TranscriptomeSAM GeneCounts --twopassMode Basic --outSAMtype BAM Unsorted \
--sjdbGTFfile /home/customer/lizexing/references/Human_45S/U13369.1.gtf \
--genomeDir /home/customer/lizexing/references/Human_45S/star_index/ \
--readFilesIn ${dir}/clean_data/"$i"_Clean_Data1_val_1.fq.gz ${dir}/clean_data/"$i"_Clean_Data2_val_2.fq.gz \
--outFileNamePrefix ${dir}/45S_RNA/"$i"-val \
--outReadsUnmapped Fastx
done
后台运行RNA_seq_script_2:
nohup bash RNA_seq_script_2 > RNA_seq_script_2_log &
4. 使用STAR软件对数据中未比对上45S_RNA的序列与GRCh38.ncRNA进行比对
vim新建RNA_seq_script_3 对2022_08_23测序数据进行处理
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
STAR --runThreadN 8 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted \
--genomeDir /home/customer/lizexing/references/Ensembl/Human/star_ncrna_index/ \
--readFilesIn ${dir}/45S_RNA/"$i"-valUnmapped.out.mate1 ${dir}/45S_RNA/"$i"-valUnmapped.out.mate2 \
--outFileNamePrefix ${dir}/ncRNA/"$i"_ncrna_val \
--outReadsUnmapped Fastx
done
后台运行RNA_seq_script_3:
nohup bash RNA_seq_script_3 > RNA_seq_script_3_log &
5. 使用STAR软件对数据中未比对上45S_RNA的序列与GRCh38.cds.all进行比对
vim新建RNA_seq_script_4 对2022_08_23测序数据进行处理
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
STAR --runThreadN 8 --runMode alignReads --twopassMode Basic --outSAMtype BAM Unsorted \
--genomeDir /home/customer/lizexing/references/Ensembl/Human/star_cds_index/ \
--readFilesIn ${dir}/45S_RNA/"$i"-valUnmapped.out.mate1 ${dir}/45S_RNA/"$i"-valUnmapped.out.mate2 \
--outFileNamePrefix ${dir}/cds/"$i"_cds_val \
--outReadsUnmapped Fastx
done
后台运行RNA_seq_script_4:
nohup bash RNA_seq_script_4 > RNA_seq_script_4_log &
6. 使用Samtools软件对三组数据进行排序
vim新建RNA_seq_script_5 对2022_08_23测序数据进行处理
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
samtools sort -@ 8 -l 5 -o ${dir}/45S_RNA/${i}-valAligned.out.bam.sort ${dir}/45S_RNA/${i}-valAligned.out.bam
samtools sort -@ 8 -l 5 -o ${dir}/ncRNA/${i}_ncrna_valAligned.out.bam.sort ${dir}/ncRNA/${i}_ncrna_valAligned.out.bam
samtools sort -@ 8 -l 5 -o ${dir}/cds/${i}_cds_valAligned.out.bam.sort ${dir}/cds/${i}_cds_valAligned.out.bam
done
后台运行RNA_seq_script_5:
nohup bash RNA_seq_script_5 > RNA_seq_script_5_log &
7. 使用featureCounts软件对三组数据read summarization
Step 1 - 对测序数据进行计数:5.8S_RNA_bin=10bp, 3’ETS_RNA_bin=100=bp, others_RNA_bin=200bp
vim新建RNA_seq_script_6 对2022_08_23细胞数据进行处理
# Multimapping reads : not counted
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
featureCounts -T 8 \
-a /home/customer/lizexing/references/Human_45S/U13369.1.2.gtf -p -B -C -f -t exon -g gene_id \
-o ${dir}/45S_RNA/${i}.read.count \
${dir}/45S_RNA/${i}-valAligned.out.bam.sort
done
后台运行RNA_seq_script_6:
nohup bash RNA_seq_script_6 > RNA_seq_script_6_log &
Step 2 - 对测序数据进行计数:5.8S_RNA_bin=10bp, 3’ETS_RNA_bin=100=bp, others_RNA_bin=100bp
vim新建RNA_seq_script_7 对2022_08_23细胞数据进行处理
# Multimapping reads : not counted
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
featureCounts -T 8 \
-a /home/customer/lizexing/references/Human_45S/U13369.1.3.gtf -p -B -C -f -t exon -g gene_id \
-o ${dir}/45S_RNA/${i}.read.count_2 \
${dir}/45S_RNA/${i}-valAligned.out.bam.sort
done
后台运行RNA_seq_script_7:
nohup bash RNA_seq_script_7 > RNA_seq_script_7_log &
Step 3 - 对测序数据进行计数:5.8S_RNA_bin=10bp, 3’ETS_RNA_bin=100=bp, others_RNA_bin=200bp
vim新建RNA_seq_script_8 对2022_08_23细胞数据进行处理
# Level : feature level
# Paired-end : yes
# Multimapping reads : counted
# Multi-overlapping reads : counted
# Both ends mapped : not required
# Chimeric reads : counted
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
featureCounts -T 8 -M -O -p -f -t exon -g gene_id \
-a /home/customer/lizexing/references/Human_45S/U13369.1.2.gtf \
-o ${dir}/45S_RNA/${i}.read.count_multi_overmapping \
${dir}/45S_RNA/${i}-valAligned.out.bam.sort
done
后台运行RNA_seq_script_8:
nohup bash RNA_seq_script_8 > RNA_seq_script_8_log &
Step 4 - 对测序数据进行计数:5.8S_RNA_bin=10bp, 3’ETS_RNA_bin=100=bp, others_RNA_bin=100bp
vim新建RNA_seq_script_9 对2022_08_23细胞数据进行处理
# Level : feature level
# Paired-end : yes
# Multimapping reads : counted
# Multi-overlapping reads : counted
# Both ends mapped : not required
# Chimeric reads : counted
#!/bin/bash
# 上面一行宣告这个script的语法使用bash语法,当程序被执行时,能够载入bash的相关环境配置文件。
# Program
# This program is used for RNA-seq data analysis.
# History
# 2022/08/23 zexing First release
# 设置变量${dir}为常用目录
dir=/home/customer/lizexing/projects/xindi/TreatData/2022_08_23
# 利用for循环进行后续操作
for i in T1 T2 T3 V1 V2 V3
do
featureCounts -T 8 -M -O -p -f -t exon -g gene_id \
-a /home/customer/lizexing/references/Human_45S/U13369.1.3.gtf \
-o ${dir}/45S_RNA/${i}.read.count_multi_overmapping_2 \
${dir}/45S_RNA/${i}-valAligned.out.bam.sort
done
后台运行RNA_seq_script_9:
nohup bash RNA_seq_script_9 > RNA_seq_script_9_log &
原文地址:https://www.jb51.cc/wenti/3284475.html
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。