package railo.runtime.search.lucene2;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
import railo.commons.io.IOUtil;
import railo.commons.io.res.ContentType;
import railo.commons.io.res.ContentTypeImpl;
import railo.commons.io.res.Resource;
import railo.commons.io.res.util.ResourceUtil;
import railo.commons.net.http.HTTPResponse;
import railo.runtime.op.Caster;
import railo.runtime.search.lucene2.docs.FieldUtil;
import railo.runtime.search.lucene2.docs.FileDocument;
import railo.runtime.search.lucene2.docs.HTMLDocument;
import railo.runtime.search.lucene2.docs.PDFDocument;
import railo.runtime.search.lucene2.docs.WordDocument;
/**
* creates a matching Document Object to given File
*/
public final class DocumentUtil {
public static Document toDocument(StringBuffer content,String root,URL url, HTTPResponse method) throws IOException {
if(method.getStatusCode()!=200)return null;
// get type and charset
Document doc=null;
ContentType ct = method.getContentType();
long len=method.getContentLength();
String charset=ct==null?"iso-8859-1":ct.getCharset();
Runtime rt = Runtime.getRuntime();
if(len>rt.freeMemory()){
Runtime.getRuntime().gc();
if(len>rt.freeMemory()) return null;
}
//print.err("url:"+url+";chr:"+charset+";type:"+type);
if(ct==null || ct.getMimeType()==null) {}
// HTML
else if(ct.getMimeType().indexOf("text/html")!=-1) {
Reader r=null;
try{
r = IOUtil.getReader(method.getContentAsStream(), charset);
doc= HTMLDocument.getDocument(content,r);
}
finally{
IOUtil.closeEL(r);
}
}
// PDF
else if(ct.getMimeType().indexOf("application/pdf")!=-1) {
InputStream is=null;
try{
is=IOUtil.toBufferedInputStream(method.getContentAsStream());
doc= PDFDocument.getDocument(content,is);
}
finally {
IOUtil.closeEL(is);
}
}
// DOC
else if(ct.getMimeType().equals("application/msword")) {
InputStream is=null;
try{
is=IOUtil.toBufferedInputStream(method.getContentAsStream());
doc= WordDocument.getDocument(content,is);
}
finally {
IOUtil.closeEL(is);
}
}
// Plain
else if(ct.getMimeType().indexOf("text/plain")!=-1) {
Reader r=null;
try{
r=IOUtil.toBufferedReader(IOUtil.getReader(method.getContentAsStream(), charset));
doc= FileDocument.getDocument(content,r);
}
finally {
IOUtil.closeEL(r);
}
}
if(doc!=null){
String strPath=url.toExternalForm();
doc.add(FieldUtil.UnIndexed("url", strPath));
doc.add(FieldUtil.UnIndexed("key", strPath));
doc.add(FieldUtil.UnIndexed("path", strPath));
//doc.add(FieldUtil.UnIndexed("size", Caster.toString(file.length())));
//doc.add(FieldUtil.Keyword("modified",DateField.timeToString(file.lastModified())));
}
return doc;
}
/**
* translate the file to a Document Object
* @param file
* @return
* @throws InterruptedException
* @throws IOException
*/
public static Document toDocument(Resource file,String url,String charset) throws IOException {
String ext = ResourceUtil.getExtension(file,null);
Document doc=null;
if(ext!=null) {
ext=ext.toLowerCase();
//String mimeType=new MimetypesFileTypeMap().getContentType(f);
// HTML
if(ext.equals("htm") || ext.equals("html") || ext.equals("cfm") || ext.equals("cfml") || ext.equals("php") || ext.equals("asp") || ext.equals("aspx")) {
doc= HTMLDocument.getDocument(file,charset);
}
// PDF
else if(ext.equals("pdf")) {
doc= PDFDocument.getDocument(file);
}
// DOC
else if(ext.equals("doc")) {
doc= WordDocument.getDocument(file);
}
}
else {
ContentTypeImpl ct = (ContentTypeImpl) ResourceUtil.getContentType(file);
String type = ct.getMimeType();
String c=ct.getCharset();
if(c!=null) charset=c;
//String type=ResourceUtil.getMimeType(file,"");
if(type==null) {}
// HTML
else if(type.equals("text/html")) {
doc= HTMLDocument.getDocument(file,charset);
}
// PDF
else if(type.equals("application/pdf")) {
doc= PDFDocument.getDocument(file);
}
// DOC
else if(type.equals("application/msword")) {
doc= WordDocument.getDocument(file);
}
}
if(doc==null) doc= FileDocument.getDocument(file,charset);
String strPath=file.getPath().replace('\\', '/');
String strName=strPath.substring(strPath.lastIndexOf('/'));
doc.add(FieldUtil.UnIndexed("url", strName));
doc.add(FieldUtil.UnIndexed("key", strPath));
doc.add(FieldUtil.UnIndexed("path", file.getPath()));
doc.add(FieldUtil.UnIndexed("size", Caster.toString(file.length())));
doc.add(FieldUtil.UnIndexed("modified",DateField.timeToString(file.lastModified())));
return doc;
}
}