# License Start: 
#                    Carnegie Mellon University                      
#                      Copyright (c) 2004                            
#                       All Rights Reserved.                         
#
# Permission is hereby granted, free of charge, to use and distribute
# this software and its documentation without restriction, including 
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of this work, and to    
# permit persons to whom this work is furnished to do so, subject to 
# the following conditions:                                          
#  1. The code must retain the above copyright notice, this list of  
#     conditions and the following disclaimer.                       
#  2. Any modifications must be clearly marked as such.              
#  3. Original authors' names are not deleted.                       
#  4. The authors' names are not used to endorse or promote products 
#     derived from this software without specific prior written      
#     permission.                                                    
#
# CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK       
# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING    
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT 
# SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE    
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES  
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,        
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF     
# THIS SOFTWARE.                                                     
#
# Author: Satanjeev "Bano" Banerjee satanjeev@cmu.edu
# Author: Alon Lavie alavie@cs.cmu.edu
#
# License End.



package exact;

require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(setUpDataStructures);

sub setUpDataStructures
{
    my $firstStringWordsRef = shift;
    my $secondStringWordsRef = shift;
    my $string2OriginalPosRef = shift;
    my $string2MatchedPosRef = shift;
    my $multiChoiceWordIndexesRef = shift;
    my $posChoicesRef = shift;
    my $alreadyAlignedFirstStringRef = shift;
    my $alreadyAlignedSecondStringRef = shift;

    my $i;
    
    # Take the first string and create a hash that maps words to their
    # (list of) positions in the string. Do so only for words that have 
    # not yet been aligned in previous stages

    my %string1Pos = ();
    for ($i = 0; $i <= $#{$firstStringWordsRef}; $i++) 
    { 
	# skip if this word has already been aligned in previous stages
	next if (defined ${$alreadyAlignedFirstStringRef}{$i});
	push @{$string1Pos{${$firstStringWordsRef}[$i]}}, $i; 
    }
    
    # Do the same for the second string
    my %string2Pos = ();
    for ($i = 0; $i <= $#{$secondStringWordsRef}; $i++) 
    { 
       	next if (defined ${$alreadyAlignedSecondStringRef}{$i});
	push @{$string2Pos{${$secondStringWordsRef}[$i]}}, $i; 
    }

    # Now to construct the data structures for the alignment module 
    my $index = 0;
    for ($i = 0; $i <= $#{$secondStringWordsRef}; $i++)
    {
        # skip if this word has already been aligned
       	next if (defined ${$alreadyAlignedSecondStringRef}{$i});

	# skip if this word doesn't occur in first string
	next unless (defined $string1Pos{${$secondStringWordsRef}[$i]}); 
    
        ${$string2OriginalPosRef}[$index] = $i; # position in original 2nd string

	# check if occurs only once in both first and second string
	if (($#{$string1Pos{${$secondStringWordsRef}[$i]}} == 0) && 
	    ($#{$string2Pos{${$secondStringWordsRef}[$i]}} == 0))
	{
	    ${$string2MatchedPosRef}[$index] = ${$string1Pos{${$secondStringWordsRef}[$i]}}[0]; 
        }
        else 
	{ 
	    # okay, so this word has multiple pos choices
	    ${$multiChoiceWordIndexesRef}{$index} = 1;
	    
	    ${$string2MatchedPosRef}[$index] = ${$secondStringWordsRef}[$i]; # this will be the key into the $posChoicesRef hash
	    
	    # create pos choices hash element for this word, if not already created!
	    unless (defined ${$posChoicesRef}{${$string2MatchedPosRef}[$index]})
	    {
		# put in all the first-string positions for this word
		@{${$posChoicesRef}{${$string2MatchedPosRef}[$index]}} = @{$string1Pos{${$string2MatchedPosRef}[$index]}};
		
		# let d = number of occurrences in second string minus num
		# occ in first string for this word. Need d instances of
		# "-1" (skip) choices in the choices array.
		my $k; 
		for ($k = 0; $k < ($#{$string2Pos{${$string2MatchedPosRef}[$index]}} - $#{$string1Pos{${$string2MatchedPosRef}[$index]}}); $k++)
		{
		    push @{${$posChoicesRef}{${$string2MatchedPosRef}[$index]}}, "-1";
		}
	    }
	}
    
        $index++;
    }
}

1;
