#!/usr/bin/perl -w
# -*-Perl-*-
# Last changed Time-stamp: <2008-12-22 12:24:14 jana>
#
# @motif = ( [i,j, {"AU" => score, "GC" => score,... }], ... );
#
use strict;
use Getopt::Long;
use constant { INF => -99999};
use Data::Dumper;
use vars qw/$opt_debug/;

my $usage= << "JUS";

  USAGE:    perl $0 -i sequence-file [-a [-f][-s]] -m Motif
    
  options:  -i file:    file containing the sequence(s)
            -m motif:   either 'KTurn' or 'CLoop' (not implemented yet) [REQUIRED]
    
  note:     The input file is a Fasta file that contains the name of the
            sequence in the first line, the sequence itself in the second,
            and the last line must contain the structure information.

  results:  Program $0 searches a single sequence for 3 dimensional motifs.
            If the motif could be found, the positions relative to the
            alignment are returned.
    
  example: perl $0 -i 18S.structures.fasta
    
JUS


my ($in_file, $is_aln, $is_fasta, $s_file, $motif, $ok);
$is_aln = '';
$is_fasta = '';
$s_file = "";

#a=s a must be String
#b=i b must be Integer
#d   d = 1 =>  if 'd' is defined, esle d = undef 
usage() unless GetOptions("d|debug" => \$opt_debug,
                          "i=s" => \$in_file,
                          "a" => \$is_aln,
                          "f" => \$is_fasta,
			  "s=s" => \$s_file, # external structure file
			  "m=s" => \$motif,
			  "k" => \$ok
                          );

if($opt_debug || !$in_file || !$motif) {
  usage();
  exit(1);
}

$ok = 1;

# Watson-Crick+GU
my %WCGU = ("AU" => 100/6, "UA" => 100/6,
	    "CG" => 100/6, "GC" => 100/6,
	    "GU" => 100/6, "UG" => 100/6);
# Any base pair
my %ANY;
for my $x ('A','C','G','U') {
  for my $y ('A','C','G','U') {
    $ANY{$x.$y} = 100/16;
  }
}

# select motif
my (@Minsert, @Mlog, @Motif);

if($motif eq "CLoop") {
  @Minsert = ([[2,0,1]],[[1,0,2],[0,0,1]]);
  @Motif = ( [0,3, \%WCGU],
	     [1,2, {"GC" => 92.7, "CG" => 2.7, "AU" => 2.7, "UA" => 0.3}],
	     [2,1, {"CA" => 94.7}],
	     [4,2, {"AC" =>  0.3, "CC" => 0.1, "CG" => 22.0, "GA" => 69.3}],
	     [5,1, {"UA" => 96.8, "GC" => 2.4, "CG" => 0.4, "CA" => 0.1}],
	     [6,0, \%WCGU],
	     [3,-1, {"A" => 100 }]
	     );
}
elsif($motif eq "KTurn") {
  @Minsert = ([[3,0,2]],[[4,0,1]]);
  @Motif = ( [0,8, \%WCGU],
	     [1,7, \%ANY],
	     [2,6, {"GA" => 88.3, "AA" => 3.8, "AC" => 6}],
	     [3,5, {"AG" => 66.3, "AA" => 27.7, "AC" => 3.1, "AU" => 2.5}],
	     [4,1, {"GC" => 57.2, "AU" => 41.1}],
	     [3,2, {"AG" => 23.4, "AU" => 67.3, "AC" => 2.9, "AA" => 6.1}],
	     [4,6, {"GA" => 57.7, "AA" => 40.9}],
	     [5,0, \%WCGU]);
}
@Mlog = logify(\@Motif);
#print Dumper(\@Mlog);

#printf "first\n";
#printf "scoreM:  %s\n",score_motif("CGCAAUG", "UACG", \@Mlog);
#printf "23S C-50:%s\n",score_motif("AGCACUG", "CACU", \@Mlog);
#printf "23S C-38:%s\n",score_motif("AGCACCG", "CACU", \@Mlog);
#printf "16S C-15:%s\n",score_motif("CGCAAUG", "UACG", \@Mlog);
#printf "23S C-96:%s\n",score_motif("GCCACUG", "CAGC", \@Mlog);
#print score_motif("GUAACGC", "GCAGU", \@Mlog), "\n";
#print score_motif("GGGAGC","GCGAAGAAC", \@Mlog), "\n";
#print score_motif("GGGAGG","CCGAAGAAC", \@Mlog), "\n";
#printf "\n";
#my $nope=0;
#for (1..100000) {
#    my ($s1,$s2);
#    for my $p (1..7) {
#       $s1 .= ('A','C','G','U')[rand(4)];
#    }
#    for my $p (1..4) {
#       $s2 .= ('A','C','G','U')[rand(4)];
#    }
#    my $s = score_motif($s1, $s2, \@Mlog);
#
#    if ($s>INF) {
#       printf "%f %s %s\n",$s,$s1,$s2;
#    } else {
#       $nope++;
#    }
#}
#print $nope/10000,"\n";

#die;

my (@input, @output, @o, @gapLess, %gap2seq);



# RNAfold file is input file
if($ok) {

  my (%data, $key);

  open(F, $in_file) or die $!; my @s = <F>; close(F);

  if(substr($s[0],0,1) ne ">") {
    printf "\nERROR: no RNAfold output (1)\n\n";
    exit(0);
  }
  
  foreach my $s (@s) {
    chomp($s);
    if($s =~ /^>/) {
      $key = $s;
    }
    elsif($s =~ /^([AUGCNTaugcnt]+)/) {
      ${$data{$key}}[0] = $1;
    }
    elsif($s =~ /^([\(\.\)]*)\s+\S+/) {
      ${$data{$key}}[1] = $1;
    }
    else {
      printf "*%s*\n",$s;
      printf "\nERROR: no RNAfold output (2)\n\n";
      exit(0); 
    }
  }

  foreach my $key (keys %data) {

    my @output = searchSeq(${$data{$key}}[0],${$data{$key}}[1]);
    if(scalar @output != 0) { 
      printf "%s\n",$key;
      print @output;
    }
  }

}
else {
if($is_aln) {
   if($is_fasta) {
    # read fasta format
    # printf "\nAlignment search (FASTA):\n";
    @input = readFastaAln($in_file,$s_file);
  }
  else {
    # read clustal format
    @input = readClwAln($in_file,$s_file);
  }
  @output = search(@input);
  if(scalar @output > 0 && $output[0] ne "") {
    printf ">%s\n",$in_file;
    print @output;
    printf "\n";
  }
}
else {
  # do single sequence search
  # printf "\nSingle sequence search:\n\n";
  @input = readSingleSeq($in_file,$s_file);
  @output = searchSeq($input[0], $input[1]);
  print @input; die;
  if(scalar @output > 0) {
    print @output;
  }
  else { printf "Motif not found\n"; }
  printf "\n";
}
}

# --- start FUNCTIONS ---

###
# --- input reading functions
#
# 1) input is FASTA formatted alignment
sub readFastaAln {
  my ($file, $str_f) = @_;
  my (@ret, $is_struc, $struc);
  my $i = -1;
  my $length = 0;

  open(F, $file) or die $!; my @lines = <F>; close(F);

  foreach my $l (@lines) {
    if($l =~ /^>structure/) {
      $is_struc = 1;
      $struc = "";
    }
    elsif($l =~ /^>\S+/) {
      $is_struc = 0;
      $i++;
      $ret[$i] = "";
    }
    elsif($l =~ /^([augctnAUGCTN-]+)$/ && $is_struc == 0) {
      $ret[$i] .= $1;
      $ret[$i] =~ s/T/U/g;
    }
    elsif($l =~ /^([\.\(\)]+)$/ && $is_struc == 1) {
      $struc .= $1;
    }
  }

  if($is_struc == 0 && $str_f eq "") {
    printf STDERR "\nERROR: Your fasta file does not contains a structure \n";
    printf STDERR "       and no external structure file is given\n\n";
    usage();
    exit(0);
  }
  elsif($is_struc == 0 && $str_f ne "") {
    my @x = split /\//,$file;
    open(S, $str_f) or die $!; my @murx = <S>; close(S);
    for(my $m=0; $m<scalar @murx; $m++) {
      if($murx[$m] =~ /^>$x[scalar @x -1]/) {
        my @k = split /\s+/,$murx[$m+2];
	$struc = $k[0];
	last;
      }
    }
    close(S);
  }

  push @ret, $struc;

  $length = length $ret[0];
  foreach my $s (@ret) {
    if(length $s != $length) {
      printf STDERR "\nERROR: Unequal sequence/structure length in alignment file\n\n";
      print $usage;
      exit(0);
    }
  }

  return @ret;
  
}

# ---------------------------------------------------------------------------------------

# 2) input is CLUSTAL W formatted alignment
sub readClwAln {
  my ($file,$str_f) = @_;
  my $length = 0;
  my (@ret,%hash,$struc);
  
  if(length $str_f > 0) {
    my @x = split /\//,$file;
#    my $grep = `grep -A2 '$x[scalar @x -1]' $str_f`;
    my $bla = $file;
    $bla =~ s/\//\\\//g;
    my $grep = `grep -A2 '$bla' $str_f | awk '{if(NF>1) print \$1;}'`;
#    print $grep;
    $struc = $grep;
    $struc =~ s/\n$//;
    open(F, $file) or die $!; my @lines = <F>; close(F);
    my ($key,$seq);
    foreach my $l (@lines) {
      next if($l =~ /CLUSTAL/ || $l =~ /^\s*$/ || $l =~ /^[\s\*]*$/);
      if($l =~ /^(\S+)\s+(\S+)$/) {
	if(exists($hash{$1})) {
	  $hash{$1} .= $2;
	  $hash{$1} =~ s/T/U/g;
	}
	else {
	  $hash{$1} = $2;
	  $hash{$1} =~ s/T/U/g;
	}
      }
    }
  }
  else {
    open(F, $file) or die $!; my @lines = <F>; close(F);
    my ($key,$seq, $struc);
    $struc = "";
    foreach my $l (@lines) {
      next if($l =~ /CLUSTAL/ || $l =~ /^\s*$/ || $l =~ /^[\s\*]*$/);
      next if($l=~ /^$/);
      if($l =~ /^structure\s+(\S+)/) { $struc .= $1; }
      elsif($l =~ /^(\S+)\s+(\S+)$/) {
	if(exists($hash{$1})) {
	  $hash{$1} .= $2;
	  $hash{$1} =~ s/T/U/g;
	}
	else {
	  $hash{$1} = $2;
	  $hash{$1} =~ s/T/U/g;
	}
      }
    }
    if(length $struc == 0) {
      printf STDERR "\nERROR: Your alignment file does not contain secondary\n";
      printf STDERR "structrue information and no external structure file is given\n\n";
      print $usage;
      exit(0);
    }
  }

  @ret = values %hash;
  push @ret,$struc;    

  $length = length $ret[0];
  printf "%d\n",$length;
  #foreach my $s (@ret) {
  #  if(length($s) != $length) {
  #	printf "%d\n",length($s);
  #    printf STDERR "ERROR: Unequal sequence/structure length in alignment file\n\n";
  #    exit(0);
  #  }
  #  else {printf "%s\n",$s;}
  #}

  return @ret;
  
}

# ---------------------------------------------------------------------------------------

# 2) input is single sequence
sub readSingleSeq {
  my ($file,$str_f) = @_;
  my ($key,@ret);

  open(F, $file) or die $!; my @lines = <F>; close(F);
  if($lines[0] =~ /^>(.*)$/) { $ret[0] = ""; }
  else { print STDERR "\nInput file not a single sequence FASTA file\n\n"; }
  for(my $i=1; $i<scalar @lines; $i++) {
    if($lines[$i] =~ /^([augctAUGCT]+)$/) {
      $ret[0] .= $1;
      $ret[0] =~ s/T/U/g;
    }
    elsif($lines[$i] =~ /^>structure/) {
      $ret[1] = "";
    }
    elsif($lines[$i] =~ /^([\(\)\.]+)$/) {
      $ret[1] .= $1;
    }
  }
  if(!exists($ret[1]) && $str_f eq "") {
    printf STDERR "\nERROR: Your fasta file contains no structure \n";
    printf STDERR "       and no external structure file is given\n\n";
    print $usage;
    exit(0);
  }
  elsif(!exists($ret[1]) && $str_f ne "") {
    my @x = split /\//,$file;
    open(S, $str_f) or die $!; my @murx = <S>; close(S);
    for(my $m=0; $m<scalar @murx; $m++) {
      if($murx[$m] =~ /^>$x[scalar @x -1]/) {
        my @k = split /\s+/,$murx[$m+2];
	$ret[1] = $k[0];
	last;
      }
    }
    close(S);
  }

  return @ret;

}

# ---------------------------------------------------------------------------------------

###
# --- search functions
#

# 1) search Single sequence for Kink-turn motif
sub searchSeq {
  # searches a single sequence for the Motif motif
  # args: seqence, structure
  # return: uniq @list of positions were motif was found
  my ($seq, $struc) = @_;


#  printf "%s\n%s\n",$seq,$struc;
#  printf "%-51s%s"," ",substr($seq,51);
#  chomp $seq;
#  $_ = $struc;
#  $struc = (split)[0];
  
 # print $struc; die;
  my @pt = make_pair_table($struc);

  my ($minl, $maxl, $minr, $maxr, $nl, $nr) = (0,0,0,0,0,0);
  foreach my $i (@{$Minsert[0]}) {
    $minl += $i->[1];
    $maxl += $i->[2];
  } 
  foreach my $i (@{$Minsert[1]}) {
    $minr += $i->[1];
    $maxr += $i->[2];
  }
  foreach my $p (@Motif) {
    $nl = $p->[0] if $p->[0]>$nl;
    $nr = $p->[1] if $p->[1]>$nr;
  }
  
  my ($l_ins,$r_ins) = explode_inserts(@Minsert);

  
  
  my $res = "";
  my @ret;
  for my $i (0..length($seq)-1) {
    my $j = $pt[$i];
    next if $j<0;
    for my $il ($minl .. $maxl) {
      my $k = $i+$nl+$il;
      next if $k>=$j;
      my $l = $pt[$k];
      next if $l<$k;
      my $ir = $j-$l-$nr;
      next if (($ir<$minr) || ($ir>$maxr));
      foreach my $lins (@{$l_ins->[$il]}) {
	foreach my $rins (@{$r_ins->[$ir]}) {
	  if($i< (length $seq) - ($nl+$il+1) && $j<(length $seq) - ($nr+$ir+1)) {
	    my ($s1,$s2) = splice_ins(substr($seq,$i,$nl+$il+1),
				      substr($seq,$l,$nr+$ir+1),
				      $lins, $rins);
	    my $score = score_motif($s1,$s2,\@Mlog);
	    # scores fuer mehr motife adden + verarbeiten
	    $res = "$i,$j,$k,$l, $s1, $s2, $score\n" if $score>-10;
	    # i: start motif 5'
	    # j: end motif 3'
	    # k: end motif 5'
	    # l: start motif 3'
	    # s1: 5' sequence
	    # s2: 3' sequence
	    # score: score
	   # print $res;
	    push @ret, $res;
	  }
	}
      }
    }
  }
  return removeDbl(@ret);
  
}

# ---------------------------------------------------------------------------------------

# 2) search aligned sequence for Kink-turn motif
###
# 1. remove structure information on all positions were gaps occur in the aligned sequences
# 2. search motif in all sequences (remove double entries)
# 3. parse back to alignment coords, create cluster for each sequence
# 4. check if hits overlap with respect to the alignment
#   if yes: retain alignment as positive
# args: array of aligned sequences, such that consensus structure is last entry
# 
sub search {
  my @arr = @_;

# 1. remove structure information on all positions were gaps occur in the aligned sequences
  my $cons_struc = $arr[scalar @arr-1];
  my (@singles, $tmpSeq, $tmpStr, @gapLess, @cluster, @cl_all);

#  print join "\n",@arr;
  
  splice @arr,scalar @arr - 1,1;

  foreach my $seq (@arr) {

    my @seq = split //,$seq;
    my @struc = split //,$cons_struc;
    my %gap2seq;

    for(my $i=0; $i<scalar @seq; $i++) {
      $gap2seq{$i} = 1 if($seq[$i] eq "-");
      $gap2seq{$i} = 0 if($seq[$i] ne "-");
    }

    my $s = 0; my $m = 0;
    my ($p,$l,$r) = (0,0,0);
    do {
#      print join "",@seq;
#      printf " (%d)\n",$s;
#      print join "",@struc;
#      printf " (%d)\n",$s;
      if($seq[$s] eq "-") {
	$seq[$s] = "x";
	if($struc[$s] eq ")") {
	  $p = $s; $l = 0; $r = 1;
	  while(($r-$l)>0) {
	    $p--;
	    $l++ if($struc[$p] eq "(");
	    $r++ if($struc[$p] eq ")");
	  }
	  $struc[$p] = ".";
	  $struc[$s] = "y";
	  $s = $p;
	}
	elsif($struc[$s] eq "(") {
	  $p = $s; $l = 1; $r = 0;
	  while(($l-$r)>0) {
	    $p++;
	    $l++ if($struc[$p] eq "(");
	    $r++ if($struc[$p] eq ")");
	  }
	  $struc[$p] = ".";
	  $struc[$s] = "y";
	}
      }
      $s++;
    }while((join "",@seq) =~ /\-/);
    
    my $sequence  = join "",@seq;
    my $structure = join "",@struc;
    $sequence  =~ s/x//g;
    $structure =~ s/y//g;

#    printf "%s\n%s\n%s\n%s\n",$seq,$cons_struc,$sequence,$structure;

    # 2. search motif in current sequence (remove double entries)
    my @t = searchSeq($sequence,$structure);
    if(scalar @t == 0) {
      return "";
    }
#    print @t;
#    print "\n";
    
    # 3a. parse back to alignment coords, create cluster for each sequence
    my @parsed;
    foreach my $part (@t) {
      next if(length $part == 0);
      my @coords = split /\,/,$part;
      # before 5' stem => change all values
      my $add = 0;
      for(my $p=0; $p<=$coords[0]; $p++) {
	if($gap2seq{$p} == 1) { $add++; }
      }
      if($add > 0) {
	for(my $c=0; $c<4; $c++) { $coords[$c] += $add; }
      }
      # in 5' stem => change only last 3 values
      $add = 0;
      for(my $p=$coords[0]; $p<=$coords[2]; $p++) {
	if($gap2seq{$p} == 1) { $add++; }
      }
      if($add > 0) {
	for(my $c=1; $c<4; $c++) { $coords[$c] += $add; }
      }
      # inbetween => change only second and fourth value
      $add = 0;
      for(my $p=$coords[2]; $p<=$coords[3]; $p++) {
	if($gap2seq{$p} == 1) { $add++; }
      }
      if($add > 0) {
	$coords[1] += $add;
	$coords[3] += $add;
      }
      # in 3' stem => change only second value
      $add = 0;
      for(my $p=$coords[3]; $p<=$coords[1]; $p++) {
	if($gap2seq{$p} == 1) { $add++; }
      }
      if($add > 0) {
	$coords[1] += $add;
      }
      push @parsed,join " ",@coords;
    }
    if(scalar @parsed == 0) {
      return "";
    }

    # 3b. create cluster for each sequence
    my ($t);
    my @coords = split /\s+/,$parsed[0];
    my $s5 = scalar $coords[0]; my $e5 = scalar $coords[2];
    my $s3 = scalar $coords[3]; my $e3 = scalar $coords[1];
    my $sum_score = $coords[6];
    for(my $p=1; $p<scalar @parsed; $p++) {
      my @coords = split /\s+/,$parsed[$p];
      if(overlap($s5,$e5,scalar $coords[0],scalar $coords[2]) &&
	 overlap($s3,$e3,scalar $coords[3],scalar $coords[1])) {
	$sum_score += scalar $coords[6];
	$s5 = min($s5,scalar $coords[0]); $e5 = max($e5,scalar $coords[2]);
	$s3 = min($s3,scalar $coords[3]); $e3 = max($e3,scalar $coords[1]);
      }
      else {
	$t = $s5." ".$e5." ".$s3." ".$e3." ".$sum_score/scalar @parsed;
	push @cluster,$t;
	
      }
    }
    $t = $s5." ".$e5." ".$s3." ".$e3." ".$sum_score/scalar @parsed;
    push @cluster,$t;

  }

  # 4. check if hits overlap with respect to the alignment
  my $ol_count = 0;
  my @c = split /\s+/,$cluster[0];
  my $s5 = scalar $c[0]; my $e5 = scalar $c[1];
  my $s3 = scalar $c[2]; my $e3 = scalar $c[3];
  my $sum_score = $c[4];
  my $t;
  my $c_count = 1;
  for(my $p=1; $p<scalar @cluster; $p++) {
    my @c = split /\s+/,$cluster[$p];
    if(overlap($s5,$e5,scalar $c[0],scalar $c[1]) &&
       overlap($s3,$e3,scalar $c[2],scalar $c[3])) {
      $sum_score += scalar $c[4];
      $s5 = min($s5,scalar $c[0]); $e5 = max($e5,scalar $c[1]);
      $s3 = min($s3,scalar $c[2]); $e3 = max($e3,scalar $c[3]);
      $c_count++;
    }
    else {
      $t = $s5." ".$e5." ".$s3." ".$e3." ".$sum_score/$c_count;
      push @cl_all,$t;
      
    }
  }
  $t = $s5." ".$e5." ".$s3." ".$e3." ".$sum_score/$c_count;
  push @cl_all,$t;

  if($c_count == scalar @arr) {
    return @cl_all;
  }
  else { return ""; }
  
}

# ---------------------------------------------------------------------------------------

###
# --- utility functions
#

sub usage {
  print STDERR $usage;
  exit(0);
}

sub min {
  my ($a,$b) = @_;

  if(scalar $a < scalar $b) { return $a; }
  else { return $b; }
}

sub max {
  my ($a,$b) = @_;

  if(scalar $a > scalar $b) { return $a; }
  else { return $b; }
}

sub removeDbl {
  my @arr = @_;

  my %map;
  foreach my $a (@arr) {
    $map{$a} = 1;
  }
  return keys %map;
}

# PURPOSE: Find out if region s2-e2 overlaps to x% with region s1-e1
#          x% in (0,1)
# RETURNS: 1 in case they ovelap, otherwise 0.
sub overlap {
  my($s1, $e1, $s2, $e2) = @_;
  my $overlap = 0;
  my $type = '';
  if($e1<$s2 || $e2<$s1){ # no overlapping
    $type = "no ol"; #return (0, $type, 0, 0);
    $overlap = 0;
  }
  elsif($s1<=$s2 && $e1>=$e2){ # complete overlapping
    $overlap = $e2-$s2+1;
    $type = 'compl';
  }
  elsif($s2<=$s1 && $e2>=$e1){ # complete overlapping
    $overlap = $e1-$s1+1;
    $type = 'compl';
  }
  elsif($s1>=$s2 && $s1<=$e2){ # partly overlapping
    $overlap = $e2-$s1+1;
    $type = 'part';
  }
  elsif($s2>=$s1 && $s2<=$e1){ # partly overlapping
    $overlap = $e1-$s2+1;
    $type = 'part';
  }
  else{
    die "ERROR: One case of overlapping not detected (sub
'overlapping')!\n\n";
  }
  
  if($overlap == 0) { return 0;}
  else { return 1; }
}

# ---------------------------------------------------------------------------------------

###
# --- motif functions
#

# 1) score found motif
sub score_motif {
    # score the core motif without insertions 
    # (i.e. splice out insertions before calling score_motif())
    # $s1 = 5' stem sequence
    # $s2 = 3' stem sequence
    my ($s1, $s2, $motif) = @_;
    my $score = 0;

    foreach my $p (@$motif) {
	my ($i,$j,$hr) = @$p;
#	if($j< (length $s2) - 2 && $j<(length $s1) -2) {
	if($j< (length $s2) && $i<(length $s1)) {
	  if($i == -1) {
	    return INF unless exists($hr->{substr($s2,$j,1)});
	    $score += $hr->{substr($s2,$j,1)};
	    next;
	  }
	  if($j == -1) {
	    return INF unless exists($hr->{substr($s1,$i,1)});
	    $score += $hr->{substr($s1,$i,1)};
	    next;
	  }
	  return INF unless exists($hr->{substr($s1,$i,1) . substr($s2,$j,1)});
	  $score += $hr->{substr($s1,$i,1) . substr($s2,$j,1)};
#	  printf "score +: %f %f %s %d %d\n",$score,$hr->{substr($s1,$i,1) . substr($s2,$j,1)},substr($s1,$i,1) . substr($s2,$j,1), $i, $j;
#	 print Dumper($hr);
	}
    }

    return $score;
}

# ---------------------------------------------------------------------------------------

# 2) convert probs to log odd scores, does a deep copy on the way 
sub logify {
    my $motif = shift;
    my @new;
    foreach my $p (@$motif) {
	my ($i,$j,$hr) = @$p;
	my %h;
	my $factor = 16;
	if($i == -1 || $j == -1) {$factor = 4;}
	foreach my $k (keys %$hr) {
	    $h{$k} = log($factor*$hr->{$k}/100);
	}
	push @new, [$i,$j,\%h];
    }
    return @new;
}

# ---------------------------------------------------------------------------------------

# 3)  produce the list of all possible insertions (left and right)
sub explode_inserts {
    my ($lins, $rins) = @_;
    my @left  = explode_once(@$lins);
    my @right = explode_once(@$rins);
    return (\@left, \@right);
}

# ---------------------------------------------------------------------------------------

# 4) get all possible insertions 
sub explode_once {
    my @ins = @_;
    my @list = ();
    my $tot = 1;
    
    # compute total number of combinations
    foreach my $in (@ins) {
	my ($pos,$min,$max) = @$in;
	$tot *= $max-$min+1;
    }
    for my $i (0..$tot-1) {
	# produce combination number $index
	my $index = $i;
	my @stack = ();
	my $sum = 0;
	foreach my $in (@ins) {
	    use integer;
	    my ($pos,$min,$max) = @$in;
            # number of insertions at this position
	    my $k = ($index % ($max-$min+1)) + $min; 
	    push @stack, [$pos, $k];
	    $sum += $k;
	    $index /=  $max-$min+1;
	}
	# add this combination to list indexed by total length of insertion
	push @{$list[$sum]}, [@stack];
    }
    return @list;
} 

# ---------------------------------------------------------------------------------------

# 5) splice insertions
sub splice_ins {
    my ($s1, $s2 ,$lins, $rins) = @_;
    my @lins = sort {$a->[0] <=> $b->[0]} @{$lins};
    my @rins = sort {$a->[0] <=> $b->[0]} @{$rins};
    foreach my $i (@lins) {
	substr($s1,$i->[0],$i->[1],'');
    }
    foreach my $i (@rins) {
	substr($s2,-$i->[0],$i->[1],''); 
    }
    return ($s1,$s2);
}

# ---------------------------------------------------------------------------------------

# 6) base pair table
sub make_pair_table {
   my($structure) = shift;
   #indices start at 0 in this version!
   my($j, @olist, @table);

#   print $structure; die;
   my ($hx,$i) = (0,0);
   my @st_ar = split //,$structure;
   
   foreach my $c (@st_ar) {
     if ($c eq '.') {
       $table[$i]= -1;
     } elsif ($c eq '(') {
       $olist[$hx++]=$i;
     } elsif ($c eq ')') {
       $j = $olist[--$hx];
       if($hx<0) {
	 printf STDERR "ERROR:\nUnbalanced brackets in make_pair_table\n";
	 exit(0);
       }
       $table[$i]=$j;
       $table[$j]=$i;
     }
     $i++;
   }
   if($hx!= 0) {
     printf STDERR "ERROR:\nToo few closed brackets in make_pair_table\n";
     exit(0);
   }
   
   
   
   return @table;
}

# --- end FUNCTIONS ---
# --- end PROGRAM ---


