package edu.harvard.wcfia.yoshikoder.util;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import edu.harvard.wcfia.yoshikoder.concordance.Concordance;
import edu.harvard.wcfia.yoshikoder.concordance.ConcordanceImpl;
import edu.harvard.wcfia.yoshikoder.concordance.ConcordanceLine;
import edu.harvard.wcfia.yoshikoder.concordance.ConcordanceLineImpl;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.Token;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenImpl;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenListImpl;
/**
* @author will
*/
public class ConcordanceHandler extends DefaultHandler {
private Concordance conc;
private int windowSize;
private List clist;
private List lhs;
private List rhs;
private boolean inRhs;
private String target;
public ConcordanceHandler(){
clist = new ArrayList();
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (qName.equals("concordance")){ //$NON-NLS-1$
String style = attributes.getValue("style"); //$NON-NLS-1$
if (!style.equals("050805")) //$NON-NLS-1$
throw new SAXException(Messages.getString("wrongFormat")); //$NON-NLS-1$
String ws = attributes.getValue("windowsize"); //$NON-NLS-1$
if (ws == null)
windowSize = -1; // guess it...
else {
try {
windowSize = Integer.parseInt(ws);
} catch (NumberFormatException nfe){
windowSize = -1;
throw new SAXException(nfe);
}
}
} else if (qName.equals("line")) { //$NON-NLS-1$
inRhs = false;
lhs = new ArrayList();
rhs = new ArrayList();
} else if (qName.equals("w")) { //$NON-NLS-1$
String tar = attributes.getValue("target"); //$NON-NLS-1$
String txt = attributes.getValue("txt"); //$NON-NLS-1$
if (tar != null){
target = txt;
inRhs = true;
} else {
if (inRhs)
rhs.add(txt);
else
lhs.add(txt);
}
}
}
public void endElement(String uri, String localName, String qName){
if (qName.equals("line")) { //$NON-NLS-1$
TokenList tl = new TokenListImpl();
for (Iterator iter = lhs.iterator(); iter.hasNext();) {
String lhs = (String) iter.next();
Token t = new TokenImpl(lhs, 0, 0);
tl.add(t);
}
TokenList rl = new TokenListImpl();
for (Iterator iter = rhs.iterator(); iter.hasNext();) {
String rhs = (String) iter.next();
Token t = new TokenImpl(rhs, 0, 0);
rl.add(t);
}
Token targ = new TokenImpl(target, 0, 0);
ConcordanceLine line = new ConcordanceLineImpl(tl, targ, rl);
clist.add(line);
}
}
public Concordance getConcordance(){
if (windowSize == -1){
// not specified - we have to guess it.
int max = 0;
for (Iterator iter = clist.iterator(); iter.hasNext();) {
ConcordanceLine line = (ConcordanceLine) iter.next();
int m = Math.max(line.getLeftHandSide().size(),
line.getRightHandSide().size());
if (m > max)
max = m;
}
windowSize = max;
}
conc = new ConcordanceImpl(windowSize);
for (Iterator iter = clist.iterator(); iter.hasNext();) {
ConcordanceLine line = (ConcordanceLine) iter.next();
conc.addLine(line);
}
return conc;
}
public static void main(String[] args) {
try {
System.out.println(
ImportUtil.importConcordance(new File("/home/will/fooconc.ykc")) ); //$NON-NLS-1$
} catch (Exception e){
e.printStackTrace();
}
}
}