/* * This library is part of OpenCms - * the Open Source Content Management System * * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * For further information about Alkacon Software GmbH, please see the * company website: http://www.alkacon.com * * For further information about OpenCms, please see the * project website: http://www.opencms.org * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.opencms.search.extractors; import org.opencms.util.CmsStringUtil; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.HashMap; import java.util.Map; /** * The result of a document text extraction.<p> * * This data structure contains the extracted text as well as (optional) * meta information extracted from the document.<p> * * @since 6.0.0 */ public class CmsExtractionResult implements I_CmsExtractionResult, Serializable { /** UID required for safe serialization. */ private static final long serialVersionUID = 1465447302192195154L; /** The extracted individual content items. */ private Map<String, String> m_contentItems; /** The serialized version of this object. */ private byte[] m_serializedVersion; /** * Creates a new extraction result without meta information and without additional fields.<p> * * @param content the extracted content */ public CmsExtractionResult(String content) { this(content, null); m_contentItems.put(ITEM_RAW, content); } /** * Creates a new extraction result.<p> * * @param content the extracted content * @param contentItems the individual extracted content items */ public CmsExtractionResult(String content, Map<String, String> contentItems) { if (contentItems != null) { m_contentItems = contentItems; } else { m_contentItems = new HashMap<String, String>(); } if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(content)) { m_contentItems.put(ITEM_CONTENT, content); } } /** * Creates an extraction result from a serialized byte array.<p> * * @param bytes the serialized version of the extraction result * * @return extraction result created from the serialized byte array */ public static final CmsExtractionResult fromBytes(byte[] bytes) { Object obj = null; if (bytes != null) { // create an object out of the byte array try { ByteArrayInputStream in = new ByteArrayInputStream(bytes); ObjectInputStream oin = new ObjectInputStream(in); obj = oin.readObject(); oin.close(); } catch (Exception e) { // ignore, null is not an instance of CmsExtractionResult } if (obj instanceof CmsExtractionResult) { CmsExtractionResult result = (CmsExtractionResult)obj; result.m_serializedVersion = bytes; } } return null; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getBytes() */ public byte[] getBytes() { // check if we have a cached version of the serialized object available if (m_serializedVersion != null) { return m_serializedVersion; } try { // serialize this object and return ByteArrayOutputStream out = new ByteArrayOutputStream(512); ObjectOutputStream oout = new ObjectOutputStream(out); oout.writeObject(this); oout.close(); m_serializedVersion = out.toByteArray(); } catch (Exception e) { // ignore, serialized version will be null } return m_serializedVersion; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getContent() */ public String getContent() { return m_contentItems.get(ITEM_CONTENT); } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getContentItems() */ public Map<String, String> getContentItems() { return m_contentItems; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#release() */ public void release() { if (!m_contentItems.isEmpty()) { m_contentItems.clear(); } m_contentItems = null; m_serializedVersion = null; } }