//
// @(#)DislodgedParser.java 1.00 6/2002
//
// Copyright 2002 Zachary DelProposto. All rights reserved.
// Use is subject to license terms.
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
// Or from http://www.gnu.org/
//
package dip.judge.parser;
import dip.world.Phase;
import dip.misc.Log;
import java.io.*;
import java.util.regex.*;
import java.util.*;
/**
Parses the Dislodged block
*/
public class DislodgedParser
{
// CONSTANTS
// empty string
private static final String[] EMPTY = new String[0];
/** Header text to look for */
public static final String HEADER_REGEX = "(?i)the following units were dislodged:";
/** End of header text to look for */
public static final String HEADER_END_REGEX = "(?i)the next phase of";
/** Dislodged line start text; all dislodged lines must start with this. Lowercase. NOT a REGEX!. */
public static final String DISLODGED_LINE_START = "the";
/**
* capture groups: 1:power, 2:unit, 3:unit location, 4:retreat list predicate (example: "xxx or yyy or zzz") w/o trailing '.'<br>
* the location list predicate is split by the DISLODGED_SPLIT_REGEX<br>
* Lines are trimmed prior to parsing.<br>
* "in" or "over" used (Wing units are "over" provinces; armies/fleets "in")<br>
*/
public static final String DISLODGED_REGEX = "(?i)^the\\s+(\\p{Graph}*)\\s+(\\p{Graph}*)\\s*(?:in|over)\\s+(?:the)?\\s*([\\p{Graph}\\p{Blank}]*)\\s*can\\s+retreat\\s+to\\s+((.*))\\.$";
/** splits the retreat list predicate; must be suitable for String.split() */
public static final String DISLODGED_SPLIT_REGEX = "\\s+or\\s+";
/**
* capture groups: 1:power, 2:unit, 3:unit location (may be multi-word, needs to be trim()'d)<br>
* NOTE: this may span >1 line; it ends with the period ('.')<br>
* Lines are trimmed prior to parsing.<br>
* "in" or "over" used (Wing units are "over" provinces; armies/fleets "in")<br>
*/
public static final String DESTROYED_REGEX = "(?i)^the\\s+(\\p{Graph}*)\\s+(\\p{Graph}*)\\s*(?:in|over)\\s+(?:the)?\\s*(([\\p{Graph}\\p{Blank}]*))with\\s+no\\s+valid\\s+retreats.*\\.";
// INSTANCE VARIABLES
private DislodgedInfo[] dislodgedInfo = null;
private Phase phase = null;
private String inputText = null;
/* // TEST PATTERN: DO NOT DELETE
public static void main(String args[])
throws IOException
{
// this is a GOOD test pattern
//
String in =
"Italy: Fleet Naples SUPPORT Fleet Tunis -> Ionian Sea.\n"+
"Italy: Fleet Tunis -> Ionian Sea.\n"+
"Italy: Army Rome SUPPORT Army Venice -> Apulia.\n"+
"\n"+
"Russia: Army Smyrna -> Constantinople.\n"+
"\n"+
"\n"+
"The following units were dislodged:\n"+
"\n"+
"The 1Austrian Army in Apulia with no valid retreats was destroyed.\n"+
"The 2Austrian Fleet in the Ionian Sea can retreat to Tyrrhenian Sea or Albania\n"+
"or Greece or Eastern Mediterranean.\n"+
"The 3French Army in Paris with no valid retreats was destroyed.\n"+
"The 4German Fleet in the North Sea can retreat to Norwegian Sea or Skagerrak or\n"+
"Belgium or London.\n"+
"The 5Italian Fleet in Spain (south coast) can retreat to Portugal (north coast) or Western\n"+
"Mediterranean.\n"+
"The 6Chinese Fleet in the South Atlantic Ocean with no valid retreats was\n"+
"destroyed.\n"+
"The 7Russian Army in Constantinople can retreat to Ankara or Smyrna.\n"+
"The 8Soviet Army in St. Petersburg can retreat to Here or There.\n"+
"The 9Unlucky Army in Badlands can retreat to St. Elsewhere or Picardy.\n"+
"The 10Germany Army in AAAAAA can retreat to BBBBBBB.\n"+
"The 11Germany Army in NewAAAA can retreat to St. BBBBBBB.\n"+
"The 12Germany Army in ReallyNewAAAA can retreat to St.\n"+
"BBBBBBB.\n"+
"\n"+
"The next phase of 'ferret' will be Retreats for Fall of 1906.\n"+
"The deadline for orders will be Wed Apr 10 2002 17:54:23 -0500.\n";
DislodgedParser dp = new DislodgedParser(in);
DislodgedInfo[] di = dp.getDislodgedInfo();
System.out.println("di = "+di);
System.out.println("length = "+di.length);
for(int i=0; i<di.length; i++)
{
System.out.println(di[i]);
System.out.println("");
}
}
*/
/** Creates a DislodgedParser object, which parses the given input for a Dislodged (retreat) information block */
public DislodgedParser(Phase phase, String input)
throws IOException
{
this.phase = phase;
this.inputText = input;
parseInput(input);
}// DislodgedParser()
/** Returns the dislodged units, or a zero-length array if no units were dislodged */
public DislodgedInfo[] getDislodgedInfo()
{
return dislodgedInfo;
}// getRetreatInfo()
/**
* A DislodgedInfo object is created for each dislodged unit.
* <p>
* Dislodged units may be destroyed (and have no valid retreat locations) or
* may have a retreat location
*
*
*/
public static class DislodgedInfo
{
private final String power;
private final String src;
private final String unit;
private final String[] retreatLocs; // zero-length if destroyed
/** Create a DislodgedInfo object */
public DislodgedInfo(String power, String unit, String src, String[] retreatLocs)
{
this.power = power;
this.unit = unit;
this.src = src;
this.retreatLocs = (retreatLocs == null) ? EMPTY : retreatLocs;
}// DislodgedInfo()
/** Name of the Power */
public String getPowerName() { return power; }
/** Location of the unit */
public String getSourceName() { return src; }
/** Type Name of the unit (e.g., "Fleet") */
public String getUnitName() { return unit; }
/** Names of valid retreat locations; zero-length if no valid retreats */
public String[] getRetreatLocationNames() { return retreatLocs; }
/** Indicates if unit was destroyed */
public boolean isDestroyed() { return (retreatLocs.length == 0); }
/** String output for debugging; may change between versions. */
public String toString()
{
StringBuffer sb = new StringBuffer();
sb.append("DislodgedInfo[power=");
sb.append(power);
sb.append(", src=");
sb.append(src);
sb.append(", unit=");
sb.append(unit);
sb.append(", locNames=");
for(int i=0; i<retreatLocs.length; i++)
{
sb.append(retreatLocs[i]);
sb.append(',');
}
sb.append(']');
return sb.toString();
}// toString()
}// nested class DislodgedInfo
private void parseInput(String input)
throws IOException
{
// Create HEADER_REGEX pattern, HEADER_END_REGEX pattern
Pattern header = Pattern.compile(HEADER_REGEX);
Pattern endHeader = Pattern.compile(HEADER_END_REGEX);
// search for HEADER_REGEX
// keep searching until we find an empty line, or HEADER_END_REGEX.
//
BufferedReader br = new BufferedReader(new StringReader(input));
StringBuffer accum = new StringBuffer(2048);
String line = br.readLine();
while(line != null)
{
Matcher m = header.matcher(line);
if(m.lookingAt())
{
boolean inBlock = false;
line = br.readLine();
while(line != null)
{
line = line.trim().toLowerCase();
if(line.length() > 0)
{
// if we are 'end header regex', we end
// though typically having a zero-length trimmed line will do that too
//
Matcher endM = endHeader.matcher(line);
if(endM.lookingAt())
{
break;
}
// If a trimmed line doesn't start with "the" (DISLODGED_LINE_START) (ignoring case)
// then add it to the line above. Otherwise, start a new line.
// this makes regex matching MUCH easier, since all lines are
// 'normalized'; recognized patterns are not split
if(line.startsWith(DISLODGED_LINE_START))
{
accum.append('\n');
}
else
{
accum.append(' ');
}
accum.append(line);
}
else
{
if(inBlock)
{
inBlock = false;
break; // escape inner while
}
else
{
inBlock = true;
}
}
line = br.readLine();
}
accum.append('\n'); // end-of-text newline
break; // escape outer while
}
line = br.readLine();
}
// cleanup
br.close();
line = null;
header = null;
//System.out.println("(DislodgedParser) text:");
//System.out.println("||>>"+accum.toString()+"<<||");
//System.out.println("-----");
// create a list of Dislodged units
List disList = new LinkedList();
// Create patterns
Pattern destroyed = Pattern.compile(DESTROYED_REGEX);
Pattern dislodged = Pattern.compile(DISLODGED_REGEX);
// parse accum line-by-line, looking for DESTROYED_REGEX and
// DISLODGED_REGEX.
//
StringTokenizer st = new StringTokenizer(accum.toString(), "\n");
while(st.hasMoreTokens())
{
line = st.nextToken();
//System.out.println("LINE: "+line);
Matcher m = destroyed.matcher(line);
if(m.lookingAt())
{
disList.add(new DislodgedInfo(
m.group(1),
m.group(2),
ParserUtils.filter( m.group(3).trim() ),
null));
}
else
{
m = dislodged.matcher(line);
if(m.lookingAt())
{
// parse location-list predicate
String[] retreatLocs = m.group(4).split(DISLODGED_SPLIT_REGEX);
for(int i=0; i<retreatLocs.length; i++)
{
retreatLocs[i] = ParserUtils.filter(retreatLocs[i]);
}
disList.add(new DislodgedInfo(
m.group(1),
m.group(2),
ParserUtils.filter(m.group(3).trim()),
retreatLocs));
}
}
}
dislodgedInfo = (DislodgedInfo[]) disList.toArray(new DislodgedInfo[disList.size()]);
}// parseInput()
}// class RetreatParser