/**
* Copyright (c) 2009 Juwi MacMillan Group GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.juwimm.cms.search.res;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.springframework.beans.factory.annotation.Autowired;
import de.juwimm.cms.model.DocumentHbmDao;
import de.juwimm.cms.model.UnitHbm;
import de.juwimm.cms.search.res.pdf.LucenePDFDocument;
/**
* @author <a href="mailto:carsten.schalm@juwimm.com">Carsten Schalm</a>
* company Juwi|MacMillan Group Gmbh, Walsrode, Germany
* @version $Id$
*/
public class PDFDocumentLocator {
public static final String MIME_TYPE = "application/pdf";
private static Logger log = Logger.getLogger(PDFDocumentLocator.class);
@Autowired
private DocumentHbmDao documentHbmDao;
public Document getDocument(de.juwimm.cms.model.DocumentHbm document) {
Document doc = null;
InputStream bis = new ByteArrayInputStream(documentHbmDao.getDocumentContent(document.getDocumentId()));
try {
doc = LucenePDFDocument.getDocument(bis);
} catch (IOException e) {
if (log.isInfoEnabled()) log.info("Error indexing document " + document.getDocumentId() + " (" + document.getDocumentName() + ")" + " document may be password-protected: " + e.getMessage());
if (log.isDebugEnabled()) log.debug(e.getMessage(), e);
return null;
}
doc.add(new Field("documentId", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("uid", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
String docName = document.getDocumentName();
if (docName == null) docName = "";
doc.add(new Field("documentName", docName, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", docName, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("unitId", document.getUnit().getUnitId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("unitName", document.getUnit().getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("mimeType", document.getMimeType(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("timeStamp", document.getTimeStamp().toString(), Field.Store.YES, Field.Index.NO));
return doc;
}
public Document getResource(de.juwimm.cms.model.DocumentHbm document) {
Document resource = new Document();
InputStream bis = new ByteArrayInputStream(documentHbmDao.getDocumentContent(document.getDocumentId()));
try {
String content = "";
if(document.getPassword()!=null){
content = LucenePDFDocument.getPdfContent(bis,document.getPassword());
} else {
content = LucenePDFDocument.getPdfContent(bis);
}
if (content == null) return null;
resource.add(new Field("contents", content, Field.Store.YES, Field.Index.ANALYZED));
int summarySize = Math.min(content.length(), 500);
String summary = content.substring(0, summarySize);
resource.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO));
} catch (IOException e) {
if (log.isInfoEnabled()) log.info("Error indexing document " + document.getDocumentId() + " (" + document.getDocumentName() + ")" + " document may be password-protected: " + e.getMessage());
if (log.isDebugEnabled()) log.debug(e.getMessage(), e);
return null;
}
resource.add(new Field("documentId", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("uid", document.getDocumentId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
String docName = document.getDocumentName();
if (docName == null) docName = "";
resource.add(new Field("documentName", docName, Field.Store.YES, Field.Index.ANALYZED));
resource.add(new Field("title", docName, Field.Store.YES, Field.Index.ANALYZED));
resource.add(new Field("unitId", document.getUnit().getUnitId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("unitName", document.getUnit().getName(), Field.Store.YES, Field.Index.ANALYZED));
resource.add(new Field("mimeType", document.getMimeType(), Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("timeStamp", document.getTimeStamp().toString(), Field.Store.YES, Field.Index.NO));
UnitHbm unitHbm=document.getUnit();
if(unitHbm==null){
unitHbm=document.getViewComponent().getViewComponentUnit().getAssignedUnit();
}
resource.add(new Field("siteId", unitHbm.getSite().getSiteId().toString(), Field.Store.YES, Field.Index.ANALYZED));
return resource;
}
public Document getExternalResource(String url, InputStream in) {
Document resource = new Document();
resource.add(new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED));
resource.add(new Field("uid", url, Field.Store.YES, Field.Index.NOT_ANALYZED));
try {
String content = LucenePDFDocument.getPdfContent(in);
if (content == null) return resource;
resource.add(new Field("contents", content, Field.Store.YES, Field.Index.ANALYZED));
int summarySize = Math.min(content.length(), 500);
String summary = content.substring(0, summarySize);
resource.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO));
in.close();
} catch (IOException e) {
if (log.isInfoEnabled()) log.info("Error indexing url " + url + " document may be password-protected: " + e.getMessage());
if (log.isDebugEnabled()) log.debug(e.getMessage(), e);
return null;
}
return resource;
}
}