#!/usr/bin/perl use strict; use warnings; =c--------------------------------------- this perl script is edited to compute tajima's D the origin file path is : /ifs5/ST_COMG/USER/yanzengli/tajima/data/ChrB01/ChrB01.bb.snp /ifs5/ST_COMG/USER/yanzengli/tajima/data/ChrB01/ChrB01.bb.tajima /ifs5/ST_COMG/USER/yanzengli/tajima/data/filter.site.position/ChrB01.filter.comm.gz edited by bangemantou (yanzengli@genomics.org.cn) 2012-03-29 pm; =cut die "\nUsage: compute tajima's D;\ncomands:\nperl $0 snp.data tajima.outfile filter.site.data;\n\n" if (@ARGV != 3); my $snpdb = shift; my $tajima_outfile = shift; my $filter_position = shift; open AA,$snpdb or die $!; open BB,">",$tajima_outfile or die $!; open (CC,"gzip -dc $filter_position|" ) or die $!; my $window = 50000; my $bin = 0.1; my $cout = int ( 1 / $bin ); my $step = $window * $bin; my $sample = 0; my %filter = (); my %pi = (); my %snp = (); while ( <CC> ) { my @tt = split /\s+/; my $k = int ( $tt[1] / $step ); $filter{$k} ++; } close CC; my $chr_name; my $chrlen = 0; while ( <AA> ) { chomp; my @tt = split; my @line = @tt; $chr_name = $line[0]; $chrlen = $tt[1]; $sample = ($tt[6]+$tt[7]) / 2; my $k = int ( $tt[1] / $step ); $pi{$k} += pi ( $line[6],$line[7] ); if ( ($line[6]*$line[7]) != 0 ) { $snp{$k}++; } } close AA; print BB " chrname\tposition\tsum.pi/filter.sites\ttajima's D\tsum.snp\tfilter.site\n"; my $window_num = int ( $chrlen / $step ); #the number of steps for ( my $i=0; $i<=($window_num-$cout); $i++ ) { my $sum_pi = 0; my $sum_snp = 0; my $filter_site = 0; my $end = $i + $cout - 1; for ( my $aa=$i; $aa<=$end; $aa++ ){ if ( !defined($pi{$aa}) ) { $pi{$aa} = 0; } if ( !defined($snp{$aa}) ) { $snp{$aa} = 0; } if ( !defined($filter{$aa}) ) { $filter{$aa} = 0; } $sum_pi += $pi{$aa}; $sum_snp += $snp{$aa}; $filter_site += $filter{$aa}; } my $d = tajima( $sample,$sum_pi,$sum_snp ); my $id = ($i + 1) * $step; my $mean_pi = 0; my $theta = 0; if ( $filter_site ) { $mean_pi = $sum_pi / $filter_site; } print BB "$chr_name\t$id\t$mean_pi\t$d\t$sum_snp\t$filter_site\n"; } close BB; sub pi { my $a = $_[0]; my $b = $_[1]; if ( $a == 0 || $b == 0 ) { return 0; } my $pi = ( 2 * $a * $b ) / ( ($a+$b) * ($a+$b-1) ); return $pi; } sub tajima { my $n = $_[0]; my $pi = $_[1]; my $t = $_[2]; my $a1; my $a2; for ( my $i=1; $i<$n; $i++ ){ $a1 += (1 / $i); $a2 += (1 / ($i*$i)); } my $b1 = ($n + 1) / ( 3 * ($n-1) ); my $b2 = 2 * ($n*$n + $n + 3) / (9 * $n * ($n-1)); my $c1 = $b1 - (1 / $a1); my $c2 = $b2 - ($n + 2) / ($a1 * $n) + $a2 / ($a1*$a1); my $e1 = $c1 / $a1; my $e2 = $c2 / ($a1*$a1 + $a2); if ( $t == 0) { return "NA"; next; } my $d = ( $pi - ($t/$a1) ) / sqrt ( $e1 * $t + $e2 * $t * ($t-1) ); return $d; }
说明:
1、 snp.data format
chr_name position reference_base ancestral_base major minor major_# minor_# HWE genotypes
ChrB01 945 A A G A 6 30 0.166728 GG/0.000003 GG/0.000198 AG/1.000000 GG/0.000000
ChrB01 946 G G T G 6 30 0.166734 TT/0.000003 TT/0.000394 GT/1.000000 TT/0.000000
ChrB01 1660 G G A G 5 31 0.138889 AA/0.000000 AA/0.000000 AG/1.000000 AA/0.000000
this is the standard format of snp.
2、 filter.site.data
ChrB01 190
ChrB01 191
ChrB01 192
ChrB01 193
3、the algorithm is from tajima's paper.
[] Fumio Tajima,Statistical Method for Testing the Neutral Mutation Hypothesis by DNA polymorphism
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。