#!/usr/bin/perl BEGIN{ $home = `echo \$HOME`; chomp $home} use lib "$home/PERL_MODULES"; my $thehome = $home; use Table; use Sets; use Fasta; use ClustalW; use strict; if (scalar(@ARGV) == 0) { die "Usage: do_fastcompare_alignment -fasta1 FILE -fasta2 FILE\n"; } my $fasta1 = Sets::get_parameter(\@ARGV, "-fasta1"); my $fasta2 = Sets::get_parameter(\@ARGV, "-fasta2"); my $outmat = Sets::get_parameter(\@ARGV, "-outmat"); my $f1 = Fasta->new; my $f2 = Fasta->new; $f1->setFile($fasta1); $f2->setFile($fasta2); open OUTM, ">$outmat" or die "cannot open $outmat for writing\n"; while (1) { my $a_ref1 = $f1->nextSeq; my $a_ref2 = $f2->nextSeq; last if (!$a_ref1 || !$a_ref2); my ($name1, $seq1) = @{$a_ref1}; my ($name2, $seq2) = @{$a_ref2}; my $len1 = length($seq1); my $len2 = length($seq2); my %freq = (); # global alignment (CLUSTALW) my $cl = ClustalW->new; $cl->setSequencesToAlign([ $name1 . "_1", $name2 . "_2" ], [ $seq1 , $seq2 ]); $cl->run; my $a_ref_aln = $cl->getInputOrderedSeqArray; my $aln1 = shift @$a_ref_aln; my $aln2 = shift @$a_ref_aln; #print "$aln1\n$aln2\n"; # calculate the frequency of change, for each letter my @a1 = split //, $aln1; my @a2 = split //, $aln2; my $l = length($aln1); my $l_actual = 0; for (my $i=0; $i<$l; $i++) { next if (($a1[$i] =~ /[\-\.N]/) || ($a2[$i] =~ /[\-\.N]/)) ; $freq{ $a1[$i] } { $a2[$i] } ++; $l_actual ++; } foreach my $k1 (keys(%freq)) { my $sum = 0.0; foreach my $k2 (keys(%{$freq{$k1}})) { $freq{ $k1 } { $k2 } /= $l_actual; $sum += $freq{ $k1 } { $k2 }; } foreach my $k2 (keys(%{$freq{$k1}})) { $freq{ $k1 } { $k2 } /= $sum; } } my $str = "$name1\t"; foreach my $k1 (keys(%freq)) { foreach my $k2 (keys(%{$freq{$k1}})) { $str .= "$k1/$k2/" . $freq{ $k1 } { $k2 } . "\t"; } } chop $str; print OUTM "$str\n"; print "aligned $name1 and $name2.\n"; } close OUTM;