package org.basex.util.ft;
import static org.basex.data.DataText.*;
import static org.basex.util.Token.*;
import java.io.IOException;
import org.basex.core.Prop;
import org.basex.data.Data;
import org.basex.data.MemData;
import org.basex.io.IO;
import org.basex.io.IOFile;
import org.basex.io.in.DataInput;
import org.basex.io.out.DataOutput;
import org.basex.util.Util;
import org.basex.util.hash.TokenSet;
/**
* Simple stop words set for full-text requests.
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
*/
public final class StopWords extends TokenSet {
/**
* Default constructor.
*/
public StopWords() { }
/**
* Constructor, reading stopword list from disk.
* And creating database stopword file.
* @param data data reference
* @param file stopword list file
* @throws IOException I/O exception
*/
public StopWords(final Data data, final String file) throws IOException {
if(!data.meta.prop.get(Prop.STOPWORDS).isEmpty()) read(IO.get(file), false);
final DataOutput out = new DataOutput(data.meta.dbfile(DATASWL));
write(out);
out.close();
}
/**
* Compiles the stop word list.
* @param data data reference
*/
public void comp(final Data data) {
// no data reference, or stop words have already been defined..
if(data == null || size() != 0 || data instanceof MemData) return;
// try to parse the stop words file of the current database
try {
final IOFile file = data.meta.dbfile(DATASWL);
if(!file.exists()) return;
final DataInput in = new DataInput(data.meta.dbfile(DATASWL));
try {
read(in);
} finally {
in.close();
}
} catch(final Exception ex) {
Util.debug(ex);
}
}
/**
* Reads a stop words file.
* @param fl file reference
* @param e except flag
* @return true if everything went alright
*/
public boolean read(final IO fl, final boolean e) {
try {
final byte[] content = norm(fl.read());
final int s = contains(content, ' ') ? ' ' : '\n';
for(final byte[] sl : split(content, s)) {
if(e) delete(sl);
else if(id(sl) == 0) add(sl);
}
return true;
} catch(final IOException ex) {
return false;
}
}
}