/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
*/
package org.opensolaris.opengrok.analysis;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
/**
* Tokenizer for paths filenames and extensions Input:
*
* <pre>
* /topdir/subdir/filename.ext
* </pre>
*
* Output:
*
* <pre>
* topdir
* subdir
* filename
* .
* ext
* </pre>
*/
public class PathTokenizer extends Tokenizer {
// below should be '/' since we try to convert even windows file separators
// to unix ones
public static final char DEFAULT_DELIMITER = '/';
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private int startPosition = 0;
private final char delimiter;
private int charsRead = 0;
private boolean dot = false;
private static final char cdot = '.';
public PathTokenizer() {
this.delimiter = DEFAULT_DELIMITER;
}
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
if (dot) {
dot = false;
termAtt.setEmpty();
termAtt.append(cdot);
termAtt.setLength(1);
offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + 1));
startPosition++;
return true;
}
char buf[] = new char[64];
int c;
int i = 0;
do {
c = input.read();
charsRead++;
if (c == -1) {
return false;
}
} while (c == delimiter);
do {
if (i >= buf.length) {
buf = Arrays.copyOf(buf, buf.length * 2);
}
buf[i++] = Character.toLowerCase((char) c);
c = input.read();
charsRead++;
} while (c != delimiter && c != cdot && !Character.isWhitespace(c) && c != -1);
if (c == cdot) {
dot = true;
}
termAtt.copyBuffer(buf, 0, i);
termAtt.setLength(i);
offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + i));
startPosition = startPosition + i + 1;
return true;
}
@Override
public final void end() throws IOException {
super.end();
// set final offset
int finalOffset = correctOffset(charsRead);
offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
public void reset() throws IOException {
super.reset();
dot = false;
charsRead = 0;
startPosition = 0;
}
@Override
public final void close() throws IOException {
super.close();
}
}