# License Start:
#                    Carnegie Mellon University
#                      Copyright (c) 2004
#                       All Rights Reserved.
#
# Permission is hereby granted, free of charge, to use and distribute
# this software and its documentation without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of this work, and to
# permit persons to whom this work is furnished to do so, subject to
# the following conditions:
#  1. The code must retain the above copyright notice, this list of
#     conditions and the following disclaimer.
#  2. Any modifications must be clearly marked as such.
#  3. Original authors' names are not deleted.
#  4. The authors' names are not used to endorse or promote products
#     derived from this software without specific prior written
#     permission.
#
# CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK
# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
# SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
# THIS SOFTWARE.
#
# Author: Satanjeev "Bano" Banerjee satanjeev@cmu.edu
# Author: Alon Lavie alavie@cs.cmu.edu
# Author: Abhaya Agarwal abhayaa@cs.cmu.edu
#
# License End.

package wn_synonymy;

require Exporter;
@ISA    = qw(Exporter);
@EXPORT = qw(setUpDataStructures);

my $wn;                         # will contain the wordnet object
my $wnValidFormsRef;            #cache of validForms lookups from wn
my $wnSynsetOffsetHashesRef;    #cache of Synset lookups from wn

# this will create the alignment-ready data structures by matching
# words that are synonyms of each other
sub setUpDataStructures {
    my $firstStringWordsRef           = shift;
    my $secondStringWordsRef          = shift;
    my $string2OriginalPosRef         = shift;
    my $string2MatchedPosRef          = shift;
    my $multiChoiceWordIndexesRef     = shift;
    my $posChoicesRef                 = shift;
    my $alreadyAlignedFirstStringRef  = shift;
    my $alreadyAlignedSecondStringRef = shift;
    my $lang = shift;
    $wn                      = shift;
    $wnValidFormsRef         = shift;
    $wnSynsetOffsetHashesRef = shift;

    my $i;

    #Let us create all the hashes first and then look for synonymy
    my %secondStringSynsetHash = ();
    for ( $i = 0 ; $i <= $#{$secondStringWordsRef} ; $i++ ) {
        next if ( defined ${$alreadyAlignedSecondStringRef}{$i} );
        my $offsetHashref = create_offset_hash( $$secondStringWordsRef[$i] );
        $secondStringSynsetHash{$i} = $offsetHashref;
    }

    my %firstStringSynsetHash = ();
    for ( $i = 0 ; $i <= $#{$firstStringWordsRef} ; $i++ ) {
        next if ( defined ${$alreadyAlignedFirstStringRef}{$i} );
        my $offsetHashref = create_offset_hash( $$firstStringWordsRef[$i] );
        $firstStringSynsetHash{$i} = $offsetHashref;
    }

    my %firstStringWordsUsedUp = ();
    my $index                  = 0;

    for ( $i = 0 ; $i <= $#{$secondStringWordsRef} ; $i++ ) {
        next if ( defined ${$alreadyAlignedSecondStringRef}{$i} );
        for ( $j = 0 ; $j <= $#{$firstStringWordsRef} ; $j++ ) {
            next if ( defined ${$alreadyAlignedFirstStringRef}{$j} );
            next if ( defined $firstStringWordsUsedUp{$j} );

            # Check for synonymy
            my $offsets1ref = $secondStringSynsetHash{$i};
            my $offsets2ref = $firstStringSynsetHash{$j};
            my $syn         = 0;

            # check if there is any overlap
            foreach ( keys %$offsets1ref ) {
                if ( defined $offsets2ref->{$_} ) {
                    $syn = 1;
                    last;
                }
            }

            if ( 1 == $syn ) {

                #       print $$secondStringWordsRef[$i]." ".$i." ".$$firstStringWordsRef[$j]." ".$j."\n";
                # ah so they are synonyms
                $$string2OriginalPosRef[$index] = $i;
                $$string2MatchedPosRef[$index]  = $j;
                $index++;

                $firstStringWordsUsedUp{$j} = 1;
                last;
            }
        }
    }
}

# Given a word, create a hash table representing the synsets
# to which the word belongs. This is not entirely accurate
# since it doesn't differentiate between various pos of a word.
sub create_offset_hash {
    my $w = shift;

    return synset_offset_hash_lookup($w);
}

# For doing the validForms lookup, this routine first checks in the cache
# and if not available performs a lookup from wordnet.
sub valid_forms_lookup {
    my $w = shift;

    # Let's do the lookup and put that in the cache
    ${$wnValidFormsRef}{$w} = [ $wn->validForms($w) ] unless ( exists ${$wnValidFormsRef}{$w} );

    return ${$wnValidFormsRef}{$w};
}

# This routine checks if the SynsetOffset of the word is available
# in cache. If not, it creates one and puts it in cache.
sub synset_offset_hash_lookup {
    my $w = shift;
    if($w eq ""){
    	${$wnSynsetOffsetHashesRef}{$w} = ();
    	return;
    }
    unless ( exists ${$wnSynsetOffsetHashesRef}{$w} ) {
        my %offsets = ();

        # get all the possible forms

        # Remember that this is a ref to actual cache entry.
        # So if you need to make any changes to these, make
        # a seperate copy so that cache remains untouched for
        # other modules to use.

        my $formsref = valid_forms_lookup($w);
        my $form     = "";
        foreach $form ( @{$formsref} ) {
            my @synsets = $wn->querySense( $form, "syns" );
            my $synset = "";
            foreach $synset (@synsets) {
                my $offset = $wn->offset($synset);
                $offsets{$offset} = 1;
            }
        }
        ${$wnSynsetOffsetHashesRef}{$w} = \%offsets;
    }

    return ${$wnSynsetOffsetHashesRef}{$w};
}

1;
