/* * This library is part of OpenCms - * the Open Source Content Management System * * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * For further information about Alkacon Software, please see the * company website: http://www.alkacon.com * * For further information about OpenCms, please see the * project website: http://www.opencms.org * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.opencms.search.galleries; import org.opencms.file.CmsFile; import org.opencms.file.CmsObject; import org.opencms.file.CmsPropertyDefinition; import org.opencms.file.CmsResource; import org.opencms.main.CmsException; import org.opencms.main.CmsLog; import org.opencms.search.CmsIndexException; import org.opencms.search.CmsSearchIndex; import org.opencms.search.documents.CmsDocumentXmlContent; import org.opencms.search.documents.Messages; import org.opencms.search.extractors.CmsExtractionResult; import org.opencms.search.extractors.I_CmsExtractionResult; import org.opencms.search.fields.CmsSearchField; import org.opencms.util.CmsStringUtil; import org.opencms.xml.A_CmsXmlDocument; import org.opencms.xml.content.CmsXmlContentFactory; import org.opencms.xml.content.I_CmsXmlContentHandler; import org.opencms.xml.types.I_CmsXmlContentValue; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.lucene.document.Document; /** * Special document text extraction factory for the gallery index that creates multiple fields for the content * in all the languages available in an XML content.<p> * * @since 8.0.0 */ public class CmsGalleryDocumentXmlContent extends CmsDocumentXmlContent { /** The log object for this class. */ private static final Log LOG = CmsLog.getLog(CmsDocumentXmlContent.class); /** * Creates a new instance of this Lucene document factory.<p> * * @param name name of the document type */ public CmsGalleryDocumentXmlContent(String name) { super(name); } /** * Generates a new lucene document instance from contents of the given resource for the provided index.<p> * * For gallery document generators, we never check for {@link CmsSearchIndex#isExtractingContent()} since * all these classes are assumed to be written with optimizations special to gallery search indexing anyway.<p> * * @see org.opencms.search.fields.CmsSearchFieldConfiguration#createDocument(CmsObject, CmsResource, CmsSearchIndex, I_CmsExtractionResult) * @see org.opencms.search.documents.I_CmsDocumentFactory#createDocument(CmsObject, CmsResource, CmsSearchIndex) */ @Override public Document createDocument(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsException { // extract the content from the resource I_CmsExtractionResult content = null; // extraction result has not been attached to the resource try { content = extractContent(cms, resource, index); } catch (Exception e) { // text extraction failed for document - continue indexing meta information only LOG.error(Messages.get().getBundle().key(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } // create the Lucene document according to the index field configuration return index.getFieldConfiguration().createDocument(cms, resource, index, content); } /** * Returns the raw text content of a given VFS resource of type <code>CmsResourceTypeXmlContent</code>.<p> * * All XML nodes from the content for all locales will be stored separately in the item map * which you can access using {@link CmsExtractionResult#getContentItems()}. The XML elements will be * accessible using their xpath. The xpath will start with the locale and have the form like for example * <code>de/Text[1]</code> or <code>en/Nested[1]/Text[1]</code>.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ @Override public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsException { logContentExtraction(resource, index); try { CmsFile file = readFile(cms, resource); A_CmsXmlDocument xmlContent = CmsXmlContentFactory.unmarshal(cms, file); Map<String, String> items = new HashMap<String, String>(); StringBuffer locales = new StringBuffer(); for (Locale locale : xmlContent.getLocales()) { locales.append(locale.toString()); locales.append(' '); StringBuffer content = new StringBuffer(); for (String xpath : xmlContent.getNames(locale)) { I_CmsXmlContentValue value = xmlContent.getValue(xpath, locale); if (value.getContentDefinition().getContentHandler().isSearchable(value)) { // create the content value for the locale by adding all String values in the XML nodes String extracted = value.getPlainText(cms); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { content.append(extracted); content.append('\n'); } } List<String> mappings = xmlContent.getHandler().getMappings(value.getPath()); if ((mappings != null) && (mappings.size() > 0)) { // mappings are defined, lets check if we have mappings that interest us for (String mapping : mappings) { if (mapping.startsWith(I_CmsXmlContentHandler.MAPTO_PROPERTY)) { // this is a property mapping String propertyName = mapping.substring(mapping.lastIndexOf(':') + 1); if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName) || CmsPropertyDefinition.PROPERTY_DESCRIPTION.equals(propertyName)) { String extracted = value.getPlainText(cms); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { // search index field names and property names are different ["Title" vs. "title"] String fieldName = null; if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName)) { // field is title fieldName = CmsSearchField.FIELD_TITLE_UNSTORED; } else { // if field is not title, it must be description fieldName = CmsSearchField.FIELD_DESCRIPTION; } // append language individual property field items.put( CmsGallerySearchFieldConfiguration.getLocaleExtendedName(fieldName, locale), extracted); } } } } } } if (content.length() > 0) { // append language individual content field items.put( CmsGallerySearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale), content.toString()); } // store the locales items.put(CmsGallerySearchFieldMapping.FIELD_RESOURCE_LOCALES, locales.toString()); } // return the extraction result return new CmsExtractionResult(null, items); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } } /** * Gallery index content is stored in multiple languages, so the result is NOT locale dependent.<p> * * @see org.opencms.search.documents.CmsDocumentXmlContent#isLocaleDependend() */ @Override public boolean isLocaleDependend() { return false; } }