/* * Copyright 2000-2013 Enonic AS * http://www.enonic.com/license */ package com.enonic.cms.core.content; import java.io.IOException; import java.io.InputStream; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import com.enonic.cms.framework.blob.BlobRecord; import com.enonic.cms.framework.util.MimeTypeResolver; import com.enonic.cms.api.plugin.ext.TextExtractor; import com.enonic.cms.core.content.binary.BinaryDataEntity; import com.enonic.cms.core.content.binary.ContentBinaryDataEntity; import com.enonic.cms.core.content.index.BigText; import com.enonic.cms.core.plugin.ext.TextExtractorExtensions; import com.enonic.cms.store.dao.BinaryDataDao; @Component public class BinaryDataExtractor { @Autowired TextExtractorExtensions extensions; @Autowired BinaryDataDao binaryDataDao; @Autowired MimeTypeResolver mimeTypeResolver; private static final Logger LOG = LoggerFactory.getLogger( ContentDocumentFactory.class ); public BigText extractBinaryData( ContentEntity content ) { Set<ContentBinaryDataEntity> binaryDataRef = content.getMainVersion().getContentBinaryData(); for ( ContentBinaryDataEntity cbd : binaryDataRef ) { BinaryDataEntity binaryData = cbd.getBinaryData(); try { return extractText( binaryData ); } catch ( Throwable e ) { StringBuffer sb = new StringBuffer(); sb.append( "Failed to extract full text from binary data" ); sb.append( "(key: " ).append( binaryData.getKey() ).append( ", name: " ).append( binaryData.getName() ).append( ") from content" ); sb.append( "(key: " ).append( content.getKey() ).append( ", type: " ).append( content.getContentType().getName() ); sb.append( ", category: " ).append( content.getCategory().getName() ).append( "): " ).append( e.getMessage() ); LOG.warn( sb.toString(), e ); } } return null; } BigText extractText( BinaryDataEntity binaryData ) throws IOException { final String mimeType = mimeTypeResolver.getMimeType( binaryData.getName() ); final TextExtractor textExtractor = this.extensions.getByMimeType( mimeType ); final String fullTextString; if ( textExtractor == null ) { return null; } else { BlobRecord blob = binaryDataDao.getBlob( binaryData ); //InputStream stream = new ByteArrayInputStream( blob.getAsBytes() ); InputStream stream = blob.getStream(); fullTextString = textExtractor.extractText( mimeType, stream, "UTF-8" ); } return fullTextString != null ? new BigText( fullTextString ) : null; } }