/**
* Copyright (c) 2009 Juwi MacMillan Group GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.juwimm.cms.search.res;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.rtf.RTFEditorKit;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.springframework.beans.factory.annotation.Autowired;
import org.tizzit.util.XercesHelper;
import de.juwimm.cms.model.DocumentHbmDao;
/**
* @author <a href="mailto:carsten.schalm@juwimm.com">Carsten Schalm</a>
* company Juwi|MacMillan Group Gmbh, Walsrode, Germany
* @version $Id$
*/
public class RTFDocumentLocator {
private static Logger log = Logger.getLogger(RTFDocumentLocator.class);
public static final String MIME_TYPE = "application/rtf";
@Autowired
DocumentHbmDao documentHbmDao;
public Document getDocument(de.juwimm.cms.model.DocumentHbm document) throws IOException {
Document doc = new Document();
InputStream bis = new ByteArrayInputStream(documentHbmDao.getDocumentContent(document.getDocumentId()));
DefaultStyledDocument styledDoc = new DefaultStyledDocument();
String contents = null;
try {
new RTFEditorKit().read(bis, styledDoc, 0);
contents = styledDoc.getText(0, styledDoc.getLength());
} catch (BadLocationException e) {
log.warn("Error parsing rtf-doc: " + e.getMessage(), e);
throw new IOException("Error parsing rtf-doc: " + e.getMessage());
}
doc.add(new Field("contents", contents, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("documentId", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("uid", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
String docName = document.getDocumentName();
if (docName == null) docName = "";
doc.add(new Field("documentName", docName, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", docName, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("unitId", document.getUnit().getUnitId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("unitName", document.getUnit().getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("mimeType", document.getMimeType(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("timeStamp", document.getTimeStamp().toString(), Field.Store.YES, Field.Index.NO));
int summarySize = Math.min(contents.length(), 500);
String summary = contents.substring(0, summarySize);
if (summary != null && summary.length() > 0) {
try {
summary = XercesHelper.html2nodeUTF8(summary);
} catch (Exception e) {
// ignore
}
}
if (summary == null) summary = "";
doc.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO));
return doc;
}
public Document getResource(de.juwimm.cms.model.DocumentHbm document) throws IOException {
Document resource = new Document();
InputStream bis = new ByteArrayInputStream(documentHbmDao.getDocumentContent(document.getDocumentId()));
resource = getContent(resource, bis);
resource.add(new Field("documentId", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("uid", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
String docName = document.getDocumentName();
if (docName == null) docName = "";
resource.add(new Field("documentName", docName, Field.Store.YES, Field.Index.ANALYZED));
resource.add(new Field("title", docName, Field.Store.YES, Field.Index.ANALYZED));
resource.add(new Field("unitId", document.getUnit().getUnitId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("unitName", document.getUnit().getName(), Field.Store.YES, Field.Index.ANALYZED));
resource.add(new Field("mimeType", document.getMimeType(), Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("timeStamp", document.getTimeStamp().toString(), Field.Store.YES, Field.Index.NO));
resource.add(new Field("siteId", document.getUnit().getSite().getSiteId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
return resource;
}
private Document getContent(Document resource, InputStream in) throws IOException {
DefaultStyledDocument styledDoc = new DefaultStyledDocument();
String contents = null;
try {
new RTFEditorKit().read(in, styledDoc, 0);
contents = styledDoc.getText(0, styledDoc.getLength());
in.close();
} catch (BadLocationException e) {
log.warn("Error parsing rtf-doc: " + e.getMessage(), e);
throw new IOException("Error parsing rtf-doc: " + e.getMessage());
}
resource.add(new Field("contents", contents, Field.Store.YES, Field.Index.ANALYZED));
int summarySize = Math.min(contents.length(), 500);
String summary = contents.substring(0, summarySize);
if (summary != null && summary.length() > 0) {
try {
summary = XercesHelper.html2nodeUTF8(summary);
} catch (Exception e) {
// ignore
}
}
if (summary == null) summary = "";
resource.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO));
return resource;
}
public Document getExternalResource(String url, InputStream in) throws IOException {
Document resource = new Document();
resource.add(new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("uid", url, Field.Store.YES, Field.Index.NOT_ANALYZED));
resource = getContent(resource, in);
return resource;
}
}