package ClustalW;
use Sets;
sub new {
my ($self) = {};
$self->{FILE} = undef;
$self->{TEXT} = undef;
#my %tmp = ();
$self->{SEQS} = undef; #\%tmp;
$self->{STARS} = "";
$self->{NAMES} = undef;
$self->{SEQ_ARRAY} = [];
$self->{SEQ_UA} = [];
$self->{NAM_UA} = [];
bless($self);
return $self;
}
sub setSequences {
my ($self, $a_ref_n, $a_ref_s) = @_;
$self->{NAMES} = $a_ref_n;
$self->{SEQ_ARRAY} = $a_ref_s;
}
sub setSequencesToAlign {
my ($self, $a_ref_n, $a_ref_s) = @_;
$self->{NAM_UA} = $a_ref_n;
$self->{SEQ_UA} = $a_ref_s;
}
sub getClustalWformat {
my ($self) = @_;
my $l = length($self->{SEQ_ARRAY}->[0]);
my $n = scalar(@{ $self->{NAMES} });
my $txt = "CLUSTAL W(1.4) multiple sequence alignment\n";
$txt .= "\n\n";
my $nb_blocks = int($l / 50) + 1;
for (my $i=0; $i<$nb_blocks; $i++) {
for (my $j=0; $j<$n; $j++) {
$txt .= $self->{NAMES}->[$j];
$txt .= substr(" ", 0, 16 - length($self->{NAMES}->[$j]));
for (my $k=0; $k<5; $k++) {
$txt .= substr($self->{SEQ_ARRAY}->[$j], $i*50 + $k*10, 10);
# $txt .= " " if ($k != 4);
}
$txt .= "\n";
}
$txt .= "\n\n";
}
return $txt;
}
sub setFile {
my ($self, $s) = @_;
$self->{FILE} = $s;
$self->{HANDLE} = Sets::getRandomString("HANDLE");
open $self->{HANDLE}, $self->{FILE} or die "Cannot open $self->{FILE} ..\n";
#system("cat $self->{FILE}\n");
my %tmp = ();
$self->{SEQS} = \%tmp;
$self->{NAMES} = [];
$self->{STARS} = "";
$self->{TEXT} = undef;
$self->_process;
close $self->{HANDLE};
}
sub setText {
my ($self, $s) = @_;
$self->{TEXT} = $s;
my @a = split /[\r\n]/, $s;
$self->{TEXT_ARRAY} = \@a;
$self->{CNT} = 0;
$self->_process;
}
sub _readline {
my ($self) = @_;
if ($self->{FILE}) {
my $tmp = $self->{HANDLE};
return <$tmp>;
} else {
return $self->{TEXT_ARRAY}->[ $self->{CNT} ++];
}
}
sub run {
my ($self) = @_;
if (defined($self->{SEQ_UA})) {
# create a temp file
my $tmpfile = Sets::getTempFile("/tmp/toto");
$tmpfile .= ".seq";
open OUTCL, ">$tmpfile";
my $i = 0;
foreach my $s (@{ $self->{SEQ_UA} }) {
my $n = $self->{NAM_UA}->[$i];
print OUTCL ">$n\n$s\n\n";
$i++;
}
close OUTCL;
my $todo = "clustalw $tmpfile -OUTORDER=INPUT > /dev/null";
system($todo);
my $outfile = $tmpfile;
$outfile =~ s/\.seq$/\.aln/;
$self->setFile($outfile);
} else {
die "no sequences to align\n";
}
}
sub _process {
my ($self) = @_;
$self->{STARS} = "";
my $cnt = 0;
while (defined(my $l = $self->_readline)) {
chomp $l;
next if ($l =~ /CLUSTAL/);
next if ($l eq "");
#print "L=$l\n";
my ($n, $s) = $l =~ /^(.{16})(.+)$/;
$n =~ s/\s//g;
$s =~ s/\s//g;
if ($n eq "") {
$self->{STARS} .= $s;
} else {
if (!defined($self->{SEQS}->{$n})) {
$self->{NAMES}->[ $cnt ] = $n;
$cnt ++;
}
#print "adding \"$s\"\n";
$self->{SEQS}->{$n} .= $s;
}
}
#
# check
#
}
sub lc {
my ($self) = @_;
foreach my $k (keys(%{ $self->{SEQS} })) {
$self->{SEQS}->{$k} = lc ($self->{SEQS}->{$k});
}
}
sub getHTMLAlignment {
my ($self) = @_;
my $i_linewidth = 90;
my $i = 0;
my $s_output = "";
my @a = keys(%{ $self->{SEQS} });
my $l = length($self->{SEQS}->{$a[0]});
$s_output .= "
\n";
while ($i < $l) {
my $i_toend = $l - $i;
foreach my $k (keys(%{ $self->{SEQS} })) {
$s_output .= "\n";
$s_output .= "| \n";
$s_output .= $k;
$s_output .= " | \n";
$s_output .= "| ";
my $subseq = substr($self->{SEQS}->{$k}, $i, ($i_linewidth<$i_toend?$i_linewidth:$i_toend));
if ($self->{REDIFY} == 1) {
$subseq =~ s/([ATGC]{1,})/\<\/td\>\ | \$1\<\/font\>\<\/td\>\ | /g;
}
$s_output .= $subseq;
$s_output .= " |
| \n";
$s_output .= "\n";
$s_output .= "
\n";
}
$s_output .= "| | ";
my $s_colorized = substr($self->{STARS}, $i, ($i_linewidth<$i_toend?$i_linewidth:$i_toend));
$s_colorized =~ s/\ /\ \;/g;
$s_output .= $s_colorized;
$s_output .= " |
\n";
$s_output .= "\n";
$s_output .= "\n";
$i += $i_linewidth;
}
$s_output .= "
\n";
return $s_output;
}
sub kmerize {
my ($self, $a_ref_kmers) = @_;
foreach my $k (keys(%{ $self->{SEQS} })) {
foreach my $r (@$a_ref_kmers) {
my $se = $r->[0];
my $suc = uc($se);
$self->{SEQS}->{$k} =~ s/$suc/$se/ig;
my $se = Sets::getComplement($se);
my $suc = uc($se);
$self->{SEQS}->{$k} =~ s/$suc/$se/ig;
}
}
}
sub redify {
my ($self) = @_;
$self->{REDIFY} = 1;
}
sub getSeqArray {
my ($self) = @_;
my @a = ();
foreach my $k (keys(%{$self->{SEQS}})) {
push @a, $self->{SEQS}->{$k};
}
return \@a; #$self->{SEQS};
}
sub getInputOrderedSeqArray {
my ($self) = @_;
my @a = ();
foreach my $k (@{ $self->{NAMES} }) {
push @a, $self->{SEQS}->{$k};
}
return \@a;
}
sub getSeqs {
my ($self) = @_;
return $self->{SEQS};
}
sub getStars {
my ($self) = @_;
return $self->{STARS};
}
1;