#!/usr/bin/perl BEGIN{ $home = `echo \$HOME`; chomp $home} use lib "$home/PERL_MODULES"; my $thehome = $home; use Table; use Sets; use Fasta; use ClustalW; use strict; if (scalar(@ARGV) == 0) { die "Usage: [perl] do_fastcompare_randomization -kmers -fasta1 FILE -fasta2 FILE -inmat FILE -nbrepeats INT -outz FILEcc\n"; } my $fasta1 = Sets::get_parameter(\@ARGV, "-fasta1"); my $fasta2 = Sets::get_parameter(\@ARGV, "-fasta2"); my $inmat = Sets::get_parameter(\@ARGV, "-inmat"); my $outz = Sets::get_parameter(\@ARGV, "-outz"); my $kmers = Sets::get_parameter(\@ARGV, "-kmers"); my $nbrepeats = Sets::get_parameter(\@ARGV, "-nbrepeats"); my $singlestrand = Sets::get_parameter(\@ARGV, "-singlestrand"); my $gap = undef; my $l = undef; die "please enter a list of kmers (with their scores) using -kmers\n" if (!defined($kmers)); my $fastcompare_command = ""; my $reply_fastcompare1 = system("fastcompare"); if ($reply_fastcompare1 == 0) { $fastcompare_command = "fastcompare"; } elsif ($reply_fastcompare1 == -1) { my $reply_fastcompare2 = system("./fastcompare"); if ($reply_fastcompare2 == 0) { $fastcompare_command = "./fastcompare"; } else { die "please have fastcompare installed either in your \$PATH or in the current directory\n"; } } else { die "pb with fastcompare ..\n"; } my %SCORES = (); # # read in kmers # my $ta = Table->new; $ta->loadFile($kmers); my $a_ref_res = $ta->getIndex(0); my $a_ref_kmers = $ta->getColumn(0); foreach my $k (@$a_ref_kmers) { if ($k =~ /^(.+?)(N+?)(.+?)$/) { $gap = length($2); if (length($1) != length($3)) { $l = length($1); } $k =~ s/N+//g; } } Sets::writeSet($a_ref_kmers, "kmers"); # # read in sequences # my $f1 = Fasta->new; my $f2 = Fasta->new; $f1->setFile($fasta1); $f2->setFile($fasta2); my $nseq = 0; my @NAMES1 = (); my @NAMES2 = (); my @SEQS1 = (); my @SEQS2 = (); while (1) { my $a_ref1 = $f1->nextSeq; my $a_ref2 = $f2->nextSeq; last if (!$a_ref1 || !$a_ref2); my ($name1, $seq1) = @{$a_ref1}; my ($name2, $seq2) = @{$a_ref2}; push @NAMES1, $name1; push @NAMES2, $name2; push @SEQS1 , $seq1; push @SEQS2 , $seq2; $nseq ++; } # # read in frequency files # my $ta = Table->new; $ta->loadFile($inmat); my $h_ref_freqs = $ta->getIndex(0); my %freq12 = (); my %freq21 = (); foreach my $k ( keys(%$h_ref_freqs) ) { my @a = @{ $h_ref_freqs->{$k} }; shift @a; foreach my $r (@a) { my @b = split /\//, $r; $freq12{ $k } -> { $b[0] } -> { $b[1] } = $b[2]; $freq21{ $k } -> { $b[1] } -> { $b[0] } = $b[2]; } } srand; # # traverse sequences # for (my $i=0; $i<$nbrepeats; $i++) { open OUTR1, ">random1"; open OUTR2, ">random2"; for (my $j=0; $j<$nseq; $j++) { # pick one of the sequences my $f = rand; my $newseq1 = undef; my $newseq2 = undef; if ($f < 0.5) { # mutate second sequence $newseq1 = $SEQS1[ $j ]; $newseq2 = mutate($SEQS1[ $j ], $freq12{ $NAMES1[ $j ] }); } else { # mutate first sequence $newseq1 = mutate($SEQS2[ $j ], $freq21{ $NAMES1[$j] }); $newseq2 = $SEQS2[ $j ]; } print OUTR1 ">$NAMES1[$j]\n$newseq1\n\n"; print OUTR2 ">$NAMES2[$j]\n$newseq2\n\n"; } close OUTR1; close OUTR2; # # run fastcompare # my $todo = "./fastcompare -fasta1 random1 -fasta2 random2 -kmers kmers"; if (defined($gap)) { $todo .= " -gap $gap"; if (defined($l)) { $todo .= " -l $l"; } } my $out = `$todo`; my @b = split /[\n\r]+/, $out; foreach my $l (@b) { my @a = split /\t/, $l, -1; push @{ $SCORES { $a[0] } }, $a[4]; } print "run " . ($i+1) . " done \r"; } my @Z = (); foreach my $k ( keys(%SCORES) ) { next if ($a_ref_res->{ $k }->[ 3 ] < 10); my $avg = Sets::average( $SCORES{ $k } ); my $std = Sets::stddev ( $SCORES{ $k } ); $std += 0.1; my $z = ($a_ref_res->{ $k }->[ 4 ] - $avg) / $std; #print "$k\t$z\n"; my @a_tmp = ($k, $z); push @Z, \@a_tmp; } @Z = sort { $b->[1] <=> $a->[1] } @Z; open OUTZ, ">$outz"; foreach my $r (@Z) { print OUTZ "$r->[0]\t$r->[1]\n"; } close OUTZ; sub mutate { my ($s, $h_ref_freq) = @_; my @a = split //, $s; my $newseq2 = ""; foreach my $n (@a) { my $c = Sets::generateRandomSymbol($h_ref_freq->{$n}); if ($c eq "") { $c = 'N'; } $newseq2 .= $c; } return $newseq2; }