-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnussinov.pl
128 lines (108 loc) · 4.27 KB
/
nussinov.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/perl -w
# BioE131 HW 5: Nussinov algortithm
# Input:
# Takes in both a one line RNA argument, reads directly from STDIN, or a -f followed by a file to read from.
# Output:
# Standard output is just the lowest score that the algorithm finds.
# Included are some commented out lines which print out the complete plot of the Nussinov algorithm and the time that the program took to run.
# These lines make the program *slightly* slower, so I commented them out in the spirit of competitiveness :).
#use strict;
#use Time::HiRes qw( gettimeofday ); # Time this program
#my $startTime = gettimeofday;
# Straight forward algorithm
sub NussinovBasic
{
# Init method variables
my $seqRef = pop @_;
#my $length = length($$seqRef);
my @seq = split("", $$seqRef);
my $length = scalar(@seq);
my %WCpairing = ( "A" => "U", "U" => "A", "G" => "C", "C" => "G" );
($length >= 2 or die "Sequence is only one base long!");
my(@plot, $i, $j, $k, $temp);
# Know the size wanted for array; so give the memory manager a hand and allocate it
$#plot = $length - 1;
# Numbering starts at 0,0 in the top right corner, down to $length-2 at the left and bottom ends of the plot
# This way avoids negative indexes (for good principles), because the code doesn't explicitly error check for the end of the arrays.
for($i = $length-2; $i >= 0; --$i)
{
# $length-(1+$i) is the width of the current row
# $#{$plot[$i]} = $length-(1+$i); # uncomment to set row length (didn't seem to be worth is)
for($j = $length-(2+$i); $j >= 0; --$j)
{
# Store the diagonal step (delta(i,j) + score(i-1,j-1)) in value / converts undefined to 0 in one step (save a scrap of memory)
my $value = $plot[$i+1][$j+1];
$value += $WCpairing{$seq[$i]} eq $seq[$length-(1+$j)];
if($plot[$i][$j+1] || $plot[$i+1][$j])
{
if($plot[$i][$j+1] > $value)
{
++$value;
}
# This slow (k) step should really be avoided, if possible
# So it is buried past the conditions that allow the other steps
# $plot[$i][$j+2] both checks if that cell is defined AND nonzero
if($plot[$i+1][$j] == $plot[$i][$j+1] && $plot[$i+1][$j] == $value)
{
for($k = $length - ($i + 2 + $j); $k > 1; $k -= $temp + 1)
{
if(($temp = $value - ($plot[$i][$j+$k] + $plot[($length - $j) - $k][$j])) < 0)
{
++$value;
last; # Again, it is proveable that the score will only increase by 1 at most. If we find a k step that is one more than the adjacent cells, increment $value and stop.
}
}
}
elsif($plot[$i+1][$j] > $value)
{
++$value;
}
}
$plot[$i][$j] = $value;
}
}
# Uncomment to print plot out for kicks
#my $printBufferSpace = 2;
#print STDERR "", (" " x $printBufferSpace), join((" " x $printBufferSpace),@seq), "\n";
#for($i = 0; $i <= $length-1; ++$i)
#{
# print STDERR "", (" " x $printBufferSpace), (" " x (($printBufferSpace+1)*($i))), "0";
# for($j = $length-(2+$i); $j >= 0; --$j)
# {
# print STDERR (" " x (($printBufferSpace+1)-length($plot[$i][$j])));
# print STDERR $plot[$i][$j];
# }
# print STDERR "", (" " x $printBufferSpace), "$seq[$i]\n";
#}
print "Score: $plot[0][0] ";
}
# Allow input from STDIN or Filename (with -f) or Sequence in @ARGV
my $sequence;
# File
if($ARGV[0])
{
if($ARGV[0] eq "-f")
{
open(FILE, $ARGV[1]) or die $!." \"$ARGV[1]\"";
$sequence = <FILE>;
}
else
{
$sequence = $ARGV[0];
}
}
# STDIN
else
{
$sequence = <>;
}
# Trim the string and make it caps
$sequence =~ s/\s+$//;
$sequence = uc($sequence);
# Verify the RNA string
if( $sequence =~ /([^AUGC])/ )
{
die "Sequence contains non-ribonucleotide at index ".length($`)."! '$1' \n\t";
}
NussinovBasic(\$sequence);
#print STDERR " Executed in ", gettimeofday - $startTime, "\n";