# License Start: 
#                    Carnegie Mellon University                      
#                      Copyright (c) 2004                            
#                       All Rights Reserved.                         
#
# Permission is hereby granted, free of charge, to use and distribute
# this software and its documentation without restriction, including 
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of this work, and to    
# permit persons to whom this work is furnished to do so, subject to 
# the following conditions:                                          
#  1. The code must retain the above copyright notice, this list of  
#     conditions and the following disclaimer.                       
#  2. Any modifications must be clearly marked as such.              
#  3. Original authors' names are not deleted.                       
#  4. The authors' names are not used to endorse or promote products 
#     derived from this software without specific prior written      
#     permission.                                                    
#
# CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK       
# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING    
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT 
# SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE    
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES  
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,        
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF     
# THIS SOFTWARE.                                                     
#
# Author: Satanjeev "Bano" Banerjee satanjeev@cmu.edu
# Author: Alon Lavie alavie@cs.cmu.edu
#
# License End.



# This is a stand alone program that shows how to use the matcher
# module "mStageMatcher.pm". This program takes as input a text file
# containing two sentences (each on a line of its own) and uses
# mStageMatcher.pm to find an alignment between the two
# sentences. This program outputs the number of words matched and the
# alignment score at each stage, the total number of chunks and the
# average number of words per chunk overal. The main aim of this
# program though is to show how to use mStageMatcher.pm.


# How to use mStageMatcher.pm:
# ============================
# To create alignments and to compute matches and chunks etc between
# two strings of words, call subroutine "match" which is implemented
# in the matcher module mStageMatcher. Subroutine "match" takes as
# input a reference to a hash. This hash should have the following
# key-value pairs.
#
# Mandatory keys: 
# firstString -> the first string of words
# secondString -> the second string of words
# modules -> array of module names (1 or more)
#
# Optional keys: 
# wn -> WordNet object / undefined 
# maxComputations -> integer/undefined
# details -> 0/1/undefined
# debug -> 0/1/undefined
# prune -> 0/undefined=off, 1=on
#
# Output keys (these keys and their values will get defined as the
# output of the match subroutine): 
# matchScore -> double dimensioned array with as many rows as modules. For row i, col 0 = number of matches, col 1 = score, for module i
# numChunks -> number of chunks in final match
# avgChunkLength -> average words in each chunk
# detailString -> string containing all text output due to "details" field being set to 1
# debugString -> string containing all text output due to "debug" field being set to 1


use mStageMatcher; # the actual matcher module 
use Getopt::Long; # for processing the command line
use WordNet::QueryData; # to interface with WordNet

# Check for no arguments
if($#ARGV == -1) {
  print "Usage: perl standAloneMatcher.pl [--details] [--maxComputations N] <filename> <module 1> <module 2> ...\n";
  print "  Input file must have the two strings each on a line of its own.\n";
  print "  The second string will be aligned to the first one.\n";
  exit;
}

# now get the options!
GetOptions("details", "maxComputations=i");

# if details has been requested, switch on detailed view!
if (defined $opt_details) { 
  $inputHash{"details"} = 1;
}

# if maxComputations has been specified, set it up
if (defined $opt_maxComputations) {
  $inputHash{"maxComputations"} = $opt_maxComputations;
}

# Extract the two strings from the supplied input file
my $fileName = shift;
open (FILE, $fileName) || die "Couldn't open $fileName\n";
$inputHash{"firstString"} = <FILE>;
chomp $inputHash{"firstString"};
$inputHash{"secondString"} = <FILE>;
chomp $inputHash{"secondString"};
close(FILE);

# The rest of the string will be a sequence of module names
@{$inputHash{"modules"}} = @ARGV;

# Check the list of modules to see if we need to initialize WordNet
foreach (@{$inputHash{"modules"}}) {
  if (/^wn_/) { # WordNet based modules are assumed to start with "wn_"
    $inputHash{"wn"} = WordNet::QueryData->new;
    last; 
  }
}

# Make sure pruning is set to "on"
$inputHash{"prune"} = 1;

# Now do the actual matching!
match(\%inputHash);

# Print the number of modules
print $#{$inputHash{"modules"}} + 1, "\n";

# Print the # matches and the alignment score from each stage
for (my $i = 0; $i <= $#{$inputHash{"matchScore"}}; $i++) {
  print ${$inputHash{"matchScore"}}[$i][0], " ", ${$inputHash{"matchScore"}}[$i][1], "\n";
}

# Print num chunks and avg words per chunk
print $inputHash{"numChunks"}, " ", $inputHash{"avgChunkLength"}, "\n";

# If details were requested, print details
print $inputHash{"detailString"} if (defined $opt_details);