/** * Copyright (c) Cohesive Integrations, LLC * Copyright (c) Codice Foundation * * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 3 of the License, or any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. A copy of the GNU Lesser General Public License is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. * **/ package net.di2e.ecdr.libs.result.relevance; import org.apache.commons.lang.StringUtils; import org.codehaus.stax2.XMLInputFactory2; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import java.io.StringReader; public final class TextParser { private static final Logger LOGGER = LoggerFactory.getLogger( TextParser.class ); private static XMLInputFactory xmlInputFactory = null; static { ClassLoader tccl = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader( TextParser.class.getClassLoader()); xmlInputFactory = XMLInputFactory2.newInstance(); xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE); xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE); xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE); xmlInputFactory.setProperty( XMLInputFactory.IS_VALIDATING, Boolean.FALSE); } finally { Thread.currentThread().setContextClassLoader(tccl); } } private TextParser() { } /** * Given xml as a string, this method will parse out element text and CDATA text. It separates * each by one space character. * * @param xmlData * XML as a {@code String} * @return parsed CDATA and element text */ protected static String parseTextFrom(String xmlData) { StringBuilder builder = new StringBuilder(); XMLStreamReader xmlStreamReader; try { // xml parser does not handle leading whitespace xmlStreamReader = xmlInputFactory .createXMLStreamReader(new StringReader(xmlData)); while (xmlStreamReader.hasNext()) { int event = xmlStreamReader.next(); if (event == XMLStreamConstants.CHARACTERS || event == XMLStreamConstants.CDATA) { String text = xmlStreamReader.getText(); if ( StringUtils.isNotBlank( text )) { builder.append(" " + text.trim()); } } if (event == XMLStreamConstants.START_ELEMENT) { for (int i = 0; i < xmlStreamReader.getAttributeCount(); i++) { String text = xmlStreamReader.getAttributeValue(i); if (StringUtils.isNotBlank(text)) { builder.append(" " + text.trim()); } } } } } catch (XMLStreamException e1) { LOGGER.warn( "Failure occurred in parsing the xml data (" + xmlData + "). No data has been stored or indexed.", e1); } return builder.toString(); } }