package org.juxtasoftware.diff.util; import java.io.IOException; import java.net.URI; import java.util.List; import java.util.Set; import org.juxtasoftware.diff.Token; import org.juxtasoftware.diff.TokenSource; import org.juxtasoftware.diff.impl.SimpleToken; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import eu.interedition.text.Name; import eu.interedition.text.Range; import eu.interedition.text.Text; import eu.interedition.text.mem.SimpleAnnotation; import eu.interedition.text.mem.SimpleName; import eu.interedition.text.mem.SimpleText; /** * @author <a href="http://gregor.middell.net/" title="Homepage">Gregor Middell</a> */ public class SimpleTokenSource implements TokenSource { private static final Name TEST_TOKEN_NAME = new SimpleName((URI) null, "testToken"); @Override public List<Token> tokensOf(Text text, Set<Range> ranges) throws IOException { Preconditions.checkArgument(text instanceof SimpleText); final String textContent = ((SimpleText) text).getContent(); final List<Token> tokens = Lists.newArrayList(); int start = -1; StringBuffer token = new StringBuffer(); for ( int offset=0; offset<textContent.length(); offset++) { char read = textContent.charAt(offset); if ( isTokenChar( read )) { if ( start == -1 ) { start = offset; } token.append( read ); } else { if ( start != -1 ) { Range tokenRange = new Range(start, offset); final SimpleAnnotation a = new SimpleAnnotation(text, TEST_TOKEN_NAME, tokenRange, null); tokens.add(new SimpleToken(a, token.toString())); token = new StringBuffer(); start = -1; } } } if (start > -1 ) { Range tokenRange = new Range(start, textContent.length()-1); final SimpleAnnotation a = new SimpleAnnotation(text, TEST_TOKEN_NAME, tokenRange, null); tokens.add(new SimpleToken(a, token.toString())); } return tokens; } private boolean isTokenChar(int c) { if (Character.isWhitespace(c)) { return false; } if (Character.isLetter(c) || Character.isDigit(c)) { return true; } return false; } }