package edu.harvard.wcfia.yoshikoder.dictionary;
import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.swing.tree.DefaultTreeModel;
import javax.swing.tree.TreeNode;
import javax.swing.tree.TreePath;
import edu.harvard.wcfia.yoshikoder.concordance.Concordance;
import edu.harvard.wcfia.yoshikoder.concordance.ConcordanceImpl;
import edu.harvard.wcfia.yoshikoder.concordance.ConcordanceLine;
import edu.harvard.wcfia.yoshikoder.concordance.ConcordanceLineImpl;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.Token;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenImpl;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenListImpl;
import edu.harvard.wcfia.yoshikoder.util.Messages;
public abstract class AbstractYKDictionary extends DefaultTreeModel
implements YKDictionary{
protected File location;
protected PatternEngine patternEngine;
protected Comparator comparator = new Comparator(){
public int compare(Object o1, Object o2) {
String o1name = ((Node)o1).getName();
String o2name = ((Node)o2).getName();
return o1name.compareTo(o2name);
}
};
public AbstractYKDictionary(String dictionaryName, String patternEngineType){
super(new CategoryNodeImpl(dictionaryName));
patternEngine = PatternEngineFactory.createEngine(patternEngineType);
}
public CategoryNode getDictionaryRoot(){
return (CategoryNode)getRoot();
}
public void setDictionaryRoot(CategoryNode node){
setRoot(node);
}
public File getLocation() {
return location;
}
public void setLocation(File f) {
location = f;
}
/*
public int getWindowSize() {
return windowSize;
}
public void setWindowSize(int winsize) {
windowSize = winsize;
}
*/
public String getName(){
return getDictionaryRoot().getName();
}
public void setName(String n){
getDictionaryRoot().setName(n);
}
public TreePath getPath(Node n){
TreeNode[] path = getPathToRoot(n);
TreePath p = new TreePath(path);
return p;
}
protected void add(Node child, Node parent) throws DuplicateException {
for (Enumeration en = parent.children(); en.hasMoreElements();){
Node n = (Node)en.nextElement();
String name = n.getName();
if (name.equals(child.getName())){
throw new DuplicateException();
}
}
insertNodeInto(child, parent);
}
public void addCategory(String name, Double score, String desc, CategoryNode parent)
throws DuplicateException {
CategoryNode n = new CategoryNodeImpl(name, score, desc);
add(n, parent);
}
public void addPattern(String name, Double score, CategoryNode parent)
throws PatternSyntaxException, DuplicateException {
PatternNode n =
new PatternNodeImpl(name, score, patternEngine.makeRegexp(name));
add(n, parent);
}
public void addCategory(CategoryNode cat, CategoryNode parent)
throws DuplicateException {
add(cat, parent);
}
public void addPattern(PatternNode pattern, CategoryNode parent)
throws DuplicateException{
add(pattern, parent);
}
public void remove(Node node) {
if (node.getParent() != null)
removeNodeFromParent(node);
}
public void replace(Node cat, Node repl) throws DuplicateException{
Node parent = (Node)cat.getParent();
String ourname = cat.getName();
String repname = repl.getName();
if (!ourname.equals(repname)){ // no clash if they are the same
if (parent != null){
for (Enumeration en = parent.children(); en.hasMoreElements();){
Node n = (Node)en.nextElement();
String name = n.getName();
if (name.equals(repname)){
throw new DuplicateException();
}
}
}
}
List l = new ArrayList(); // can't reparent in enumeration w/out losing nodes
for (Enumeration en = cat.children(); en.hasMoreElements();){
Node child = (Node)en.nextElement();
l.add(child);
}
for (Iterator iter = l.iterator(); iter.hasNext();) {
Node child = (Node)iter.next();
insertNodeInto(child, repl);
}
remove(cat);
if (parent==null) // we're root then
setRoot(repl);
else {
insertNodeInto(repl, parent);
}
}
public TokenList getMatchingTokens(TokenList tl, Node node) {
if (node instanceof CategoryNode)
return getMatchingTokens(tl, (CategoryNode)node);
else
return getMatchingTokens(tl, (PatternNode)node);
}
protected TokenList getMatchingTokens(TokenList tl, CategoryNode node){
TokenList list = new TokenListImpl();
for (Enumeration enumeration = node.children(); enumeration.hasMoreElements();) {
Object o = enumeration.nextElement();
TokenList locs = new TokenListImpl();
// yuck
if (o instanceof CategoryNode)
locs = getMatchingTokens(tl, (CategoryNode)o);
else
locs = getMatchingTokens(tl, (PatternNode)o);
list.addAll(locs);
}
return list;
}
protected TokenList getMatchingTokens(TokenList tl, PatternNode node){
TokenList list = new TokenListImpl();
Pattern p = node.getPattern();
for (Iterator titer = tl.iterator(); titer.hasNext();) {
Token token = (Token) titer.next();
if (p.matcher(token.getText()).matches()){
list.add(token);
}
}
return list;
}
protected Concordance getConcordance(TokenList tokens, PatternNode pnode, int wsize){
Pattern p = pnode.getPattern();
int tlength = tokens.size();
Concordance conc = new ConcordanceImpl( wsize );
int counter = -1;
for (Iterator iter = tokens.iterator(); iter.hasNext();) {
Token token = (Token) iter.next();
counter++;
if (p.matcher(token.getText()).matches()){
int lhsStart = Math.max(counter - wsize, 0);
int rhsStart = Math.min(counter+1, tlength);
int rhsEnd = Math.min(counter+1 + wsize, tlength);
TokenList lhs = new TokenListImpl();
for (int ii=lhsStart; ii<counter; ii++)
lhs.add( tokens.get(ii) );
TokenList rhs = new TokenListImpl();
for (int ii=rhsStart; ii<rhsEnd; ii++)
rhs.add( tokens.get(ii) );
ConcordanceLine line =
new ConcordanceLineImpl(lhs, token, rhs);
conc.addLine(line);
}
}
return conc;
}
public Concordance getConcordance(TokenList tl, Node node, int wsize){
if (node instanceof CategoryNode){
return getConcordance(tl, (CategoryNode)node, wsize);
} else {
return getConcordance(tl, (PatternNode)node, wsize);
}
}
protected Concordance getConcordance(TokenList tokens, CategoryNode cnode, int wsize){
Concordance conc = new ConcordanceImpl( wsize );
for (Enumeration enumeration = cnode.children(); enumeration.hasMoreElements();) {
Object o = enumeration.nextElement();
Concordance c = new ConcordanceImpl( wsize );
// yuck
if (o instanceof CategoryNode)
c = getConcordance(tokens, (CategoryNode)o, wsize);
else
c = getConcordance(tokens, (PatternNode)o, wsize);
conc.addConcordance(c);
}
return conc;
}
// alphabetizing implementation of insertNodeInto
protected int findIndexFor(Node child, Node parent){
int cc = parent.getChildCount();
if (cc==0){
return 0;
}
if (cc==1){
return comparator.compare(child, parent.getChildAt(0))
<= 0 ? 0 :1;
}
return findIndexFor(child, parent, 0, cc-1); // first and last
}
protected int findIndexFor(Node child, Node parent, int i1, int i2){
if (i1==i2){
return comparator.compare(child, parent.getChildAt(i1))
<= 0 ? i1 : i1+1;
}
int half = (i1 + i2) / 2;
if (comparator.compare(child, parent.getChildAt(half)) <= 0){
return findIndexFor(child, parent, i1, half);
}
return findIndexFor(child, parent, half+1, i2);
}
public void insertNodeInto(Node child, Node parent){
int index = findIndexFor(child, parent);
super.insertNodeInto(child, parent, index);
}
public void insertNodeInto(Node child, Node parent, int ind){
insertNodeInto(child, parent);
}
private void recurse(StringBuffer sb, Node n){
sb.append(getPath(n).toString() + "\n");
for (Enumeration enumeration=n.children(); enumeration.hasMoreElements();){
Node child = (Node)enumeration.nextElement();
recurse(sb, child);
}
}
public String toString(){
StringBuffer sb = new StringBuffer();
CategoryNode n = getDictionaryRoot();
recurse(sb, n);
return sb.toString();
}
public PatternEngine getPatternEngine(){
return patternEngine;
}
public void setPatternEngine(PatternEngine pe){
patternEngine = pe;
}
public static void main(String[] args) {
//A test to see how fast we can make the matching
class Dict extends AbstractYKDictionary{
public Dict(String name, String type){
super(name, type);
}
public void setPatternEngine(PatternEngine eng){}
public PatternEngine getPatternEngine(){return null;}
public long[] test(){
TokenList tl = new TokenListImpl();
for (int ii = 0; ii < 1000; ii++) {
tl.add(new TokenImpl("china", 0, 5));
tl.add(new TokenImpl("sausage", 0, 5));
}
PatternNode p = new PatternNodeImpl("chin*", null, Pattern.compile("chin*"));
System.out.println(p.getPattern());
long start1 = new Date().getTime();
List l = getMatchingTokens(tl, p);
long end1 = new Date().getTime();
//List ll = getMatchingTokens2(tl, p);
long end2 = new Date().getTime();
return new long[]{end1-start1, end2-end1};
}
};
Dict d = new Dict("Name", PatternEngine.SUBSTRING);
long[] tres = d.test();
System.out.println(tres[0] + ", " + tres[1]);
}
}