package railo.runtime.search.lucene2.docs;
import java.io.IOException;
import java.io.InputStream;
import org.apache.lucene.document.Document;
import org.textmining.text.extraction.WordExtractor;
import railo.commons.io.IOUtil;
import railo.commons.io.res.Resource;
import railo.commons.lang.StringUtil;
import railo.runtime.op.Caster;
/** A utility for making Lucene Documents from a File. */
public final class WordDocument {
private static final int SUMMERY_SIZE=20;
//private static final char FILE_SEPARATOR = System.getProperty("file.separator").charAt(0);
/** Makes a document for a File.
<p>
The document has three fields:
<ul>
<li><code>path</code>--containing the pathname of the file, as a stored,
tokenized field;
<li><code>modified</code>--containing the last modified date of the file as
a keyword field as encoded by <a
href="lucene.document.DateField.html">DateField</a>; and
<li><code>contents</code>--containing the full contents of the file, as a
Reader field;
* @param res
* @return matching document
* @throws IOException
*/
public static Document getDocument(Resource res) throws IOException {
// make a new, empty document
Document doc = new Document();
InputStream is =null;
try{
is=IOUtil.toBufferedInputStream(res.getInputStream());
addContent(null,doc,is);
}
finally{
IOUtil.closeEL(is);
}
return doc;
}
public static Document getDocument(StringBuffer content, InputStream is) throws IOException {
Document doc = new Document();
addContent(content,doc,is);
return doc;
}
private static void addContent(StringBuffer content, Document doc, InputStream is) throws IOException {
FieldUtil.setMimeType(doc, "application/msword");
WordExtractor extractor = new WordExtractor();
String contents;
try {
contents = extractor.extractText(is);
if(content!=null)content.append(contents);
} catch (Exception e) {
if(e instanceof IOException) throw (IOException)e;
throw new IOException(e.getMessage());
}
doc.add(FieldUtil.Text("size", Caster.toString(contents.length())));
FieldUtil.setRaw(doc,contents);
FieldUtil.setContent(doc, contents);
//doc.add(FieldUtil.Text("contents", contents.toLowerCase()));
FieldUtil.setSummary(doc, StringUtil.max(contents,SUMMERY_SIZE),false);
//doc.add(FieldUtil.UnIndexed("summary",));
}
private WordDocument() {}
}