/* * This program is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software * Foundation. * * You should have received a copy of the GNU Lesser General Public License along with this * program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html * or from the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * Copyright (c) 2001 - 2013 Object Refinery Ltd, Pentaho Corporation and Contributors.. All rights reserved. */ package org.pentaho.reporting.engine.classic.core.modules.output.table.html; import org.pentaho.reporting.engine.classic.core.AttributeNames; import org.pentaho.reporting.engine.classic.core.InvalidReportStateException; import org.pentaho.reporting.engine.classic.core.ReportAttributeMap; import org.pentaho.reporting.engine.classic.core.layout.model.BlockRenderBox; import org.pentaho.reporting.engine.classic.core.layout.model.CanvasRenderBox; import org.pentaho.reporting.engine.classic.core.layout.model.InlineRenderBox; import org.pentaho.reporting.engine.classic.core.layout.model.LayoutNodeTypes; import org.pentaho.reporting.engine.classic.core.layout.model.ParagraphRenderBox; import org.pentaho.reporting.engine.classic.core.layout.model.RenderBox; import org.pentaho.reporting.engine.classic.core.layout.model.RenderNode; import org.pentaho.reporting.engine.classic.core.layout.model.RenderableComplexText; import org.pentaho.reporting.engine.classic.core.layout.model.RenderableReplacedContent; import org.pentaho.reporting.engine.classic.core.layout.model.RenderableReplacedContentBox; import org.pentaho.reporting.engine.classic.core.layout.model.RenderableText; import org.pentaho.reporting.engine.classic.core.layout.model.SpacerRenderNode; import org.pentaho.reporting.engine.classic.core.layout.model.context.BoxDefinition; import org.pentaho.reporting.engine.classic.core.layout.output.OutputProcessorMetaData; import org.pentaho.reporting.engine.classic.core.layout.process.text.RichTextSpec; import org.pentaho.reporting.engine.classic.core.layout.text.GlyphList; import org.pentaho.reporting.engine.classic.core.modules.output.table.base.DefaultTextExtractor; import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlOutputProcessingException; import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlTagHelper; import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlTextExtractorHelper; import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.HtmlTextExtractorState; import org.pentaho.reporting.engine.classic.core.modules.output.table.html.helper.StyleBuilder; import org.pentaho.reporting.engine.classic.core.style.ElementStyleKeys; import org.pentaho.reporting.engine.classic.core.style.StyleSheet; import org.pentaho.reporting.engine.classic.core.util.InstanceID; import org.pentaho.reporting.libraries.repository.ContentIOException; import org.pentaho.reporting.libraries.xmlns.common.AttributeList; import org.pentaho.reporting.libraries.xmlns.writer.CharacterEntityParser; import org.pentaho.reporting.libraries.xmlns.writer.HtmlCharacterEntities; import org.pentaho.reporting.libraries.xmlns.writer.XmlWriter; import org.pentaho.reporting.libraries.xmlns.writer.XmlWriterSupport; import java.io.IOException; /** * Creation-Date: 02.11.2007, 15:58:29 * * @author Thomas Morgner */ public class HtmlTextExtractor extends DefaultTextExtractor { private static final String DIV_TAG = "div"; private static final String BR_TAG = "br"; private XmlWriter xmlWriter; private StyleBuilder styleBuilder; private CharacterEntityParser characterEntityParser; private boolean result; private HtmlTextExtractorState processStack; private HtmlTextExtractorHelper textExtractorHelper; public HtmlTextExtractor( final OutputProcessorMetaData metaData, final XmlWriter xmlWriter, final HtmlContentGenerator contentGenerator, final HtmlTagHelper tagHelper ) { super( metaData ); if ( xmlWriter == null ) { throw new NullPointerException(); } if ( contentGenerator == null ) { throw new NullPointerException(); } this.xmlWriter = xmlWriter; this.styleBuilder = tagHelper.getStyleBuilder(); this.characterEntityParser = HtmlCharacterEntities.getEntityParser(); this.textExtractorHelper = new HtmlTextExtractorHelper( tagHelper, xmlWriter, metaData, contentGenerator ); } public boolean performOutput( final RenderBox content, final StyleBuilder.StyleCarrier[] cellStyle ) throws IOException { styleBuilder.clear(); clearText(); setRawResult( null ); result = false; processStack = new HtmlTextExtractorState( null, false, cellStyle ); textExtractorHelper.setFirstElement( content.getInstanceId(), processStack ); try { final int nodeType = content.getNodeType(); if ( nodeType == LayoutNodeTypes.TYPE_BOX_PARAGRAPH ) { processInitialBox( (ParagraphRenderBox) content ); } else if ( nodeType == LayoutNodeTypes.TYPE_BOX_CONTENT ) { processRenderableContent( (RenderableReplacedContentBox) content ); } else { processBoxChilds( content ); } } finally { processStack = null; } return result; } /** * Prints the contents of a canvas box. This can happen only once per cell, as every canvas box creates its own cell * at some point. If for some strange reason a canvas box appears in the middle of a box-structure, your layouter is * probably a mess and this method will treat the box as a generic content container. * * @param box * the canvas box * @return true, if the child content will be processed, false otherwise. */ protected boolean startCanvasBox( final CanvasRenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return false; } return textExtractorHelper.startBox( box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box .getBoxDefinition(), false ); } protected void finishCanvasBox( final CanvasRenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return; } textExtractorHelper.finishBox( box.getInstanceId(), box.getAttributes() ); } /** * Prints a paragraph cell. This is a special entry point used by the processContent method and is never called from * elsewhere. This method assumes that the attributes of the paragraph have been processed as part of the table-cell * processing. * * @param box * the paragraph box * @throws IOException * if an IO error occured. */ protected void processInitialBox( final ParagraphRenderBox box ) throws IOException { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return; } final StyleSheet styleSheet = box.getStyleSheet(); final String target = (String) styleSheet.getStyleProperty( ElementStyleKeys.HREF_TARGET ); if ( target != null ) { textExtractorHelper.handleLinkOnElement( styleSheet, target ); processStack = new HtmlTextExtractorState( processStack, true ); } else { processStack = new HtmlTextExtractorState( processStack, false ); } if ( Boolean.TRUE.equals( box.getAttributes().getAttribute( AttributeNames.Html.NAMESPACE, AttributeNames.Html.SUPPRESS_CONTENT ) ) == false ) { processParagraphChilds( box ); } if ( processStack.isWrittenTag() ) { xmlWriter.writeCloseTag(); } processStack = processStack.getParent(); } protected void addEmptyBreak() { try { xmlWriter.writeText( " " ); } catch ( final IOException e ) { throw new HtmlOutputProcessingException( "Failed to perform IO", e ); } } protected void addSoftBreak() { try { xmlWriter.writeText( " " ); } catch ( final IOException e ) { throw new HtmlOutputProcessingException( "Failed to perform IO", e ); } } protected void addLinebreak() { try { result = true; xmlWriter.writeTag( HtmlPrinter.XHTML_NAMESPACE, BR_TAG, XmlWriterSupport.CLOSE ); } catch ( final IOException e ) { throw new HtmlOutputProcessingException( "Failed to perform IO", e ); } } protected boolean startBlockBox( final BlockRenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return false; } return textExtractorHelper.startBox( box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box .getBoxDefinition(), true ); } protected void finishBlockBox( final BlockRenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return; } textExtractorHelper.finishBox( box.getInstanceId(), box.getAttributes() ); } /** * Like a canvas box, a row-box should be split into several cells already. Therefore we treat it as a generic content * container instead. */ protected boolean startRowBox( final RenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return false; } return textExtractorHelper.startBox( box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box .getBoxDefinition(), true ); } protected void finishRowBox( final RenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return; } textExtractorHelper.finishBox( box.getInstanceId(), box.getAttributes() ); } protected boolean startInlineBox( final InlineRenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return false; } return textExtractorHelper.startInlineBox( box.getInstanceId(), box.getAttributes(), box.getStyleSheet(), box .getBoxDefinition() ); } protected void finishInlineBox( final InlineRenderBox box ) { if ( box.getStaticBoxLayoutProperties().isVisible() == false ) { return; } textExtractorHelper.finishBox( box.getInstanceId(), box.getAttributes() ); } protected void processOtherNode( final RenderNode node ) { try { final int nodeType = node.getNodeType(); if ( nodeType == LayoutNodeTypes.TYPE_NODE_TEXT || nodeType == LayoutNodeTypes.TYPE_NODE_COMPLEX_TEXT ) { super.processOtherNode( node ); return; } if ( node.isVirtualNode() ) { return; } if ( nodeType == LayoutNodeTypes.TYPE_NODE_SPACER ) { final SpacerRenderNode spacer = (SpacerRenderNode) node; final int count = Math.max( 1, spacer.getSpaceCount() ); for ( int i = 0; i < count; i++ ) { xmlWriter.writeText( " " ); } } } catch ( final IOException e ) { throw new RuntimeException( "Failed", e ); } } protected void processRenderableContent( final RenderableReplacedContentBox node ) { try { final ReportAttributeMap map = node.getAttributes(); final AttributeList attrs = new AttributeList(); HtmlTagHelper.applyHtmlAttributes( map, attrs ); if ( attrs.isEmpty() == false ) { xmlWriter.writeTag( HtmlPrinter.XHTML_NAMESPACE, DIV_TAG, attrs, XmlWriterSupport.OPEN ); } textExtractorHelper.writeLocalAnchor( node.getStyleSheet() ); final StyleSheet styleSheet = node.getStyleSheet(); final String target = (String) styleSheet.getStyleProperty( ElementStyleKeys.HREF_TARGET ); if ( target != null ) { textExtractorHelper.handleLinkOnElement( styleSheet, target ); } processReplacedContent( node ); if ( target != null ) { xmlWriter.writeCloseTag(); } if ( attrs.isEmpty() == false ) { xmlWriter.writeCloseTag(); } } catch ( final IOException e ) { throw new RuntimeException( "Failed", e ); } catch ( final ContentIOException e ) { throw new RuntimeException( "Failed", e ); } } /** * @noinspection StringConcatenation */ private void processReplacedContent( final RenderableReplacedContentBox node ) throws IOException, ContentIOException { final RenderableReplacedContent rc = node.getContent(); final ReportAttributeMap attrs = node.getAttributes(); final long width = node.getWidth(); final long height = node.getHeight(); final long contentWidth = rc.getContentWidth(); final long contentHeight = rc.getContentHeight(); final StyleSheet styleSheet = node.getStyleSheet(); final Object rawObject = rc.getRawObject(); // We have to do three things here. First, we have to check what kind // of content we deal with. if ( textExtractorHelper.processRenderableReplacedContent( attrs, styleSheet, width, height, contentWidth, contentHeight, rawObject ) ) { result = true; } } protected void drawText( final RenderableText renderableText, final long contentX2 ) { try { if ( renderableText.getLength() == 0 ) { // This text is empty. return; } if ( renderableText.isNodeVisible( getParagraphBounds(), isOverflowX(), isOverflowY() ) == false ) { return; } final String text; final GlyphList gs = renderableText.getGlyphs(); final int maxLength = renderableText.computeMaximumTextSize( contentX2 ); text = gs.getText( renderableText.getOffset(), maxLength, getCodePointBuffer() ); if ( text.length() > 0 ) { xmlWriter.writeText( characterEntityParser.encodeEntities( text ) ); if ( text.trim().length() > 0 ) { result = true; } clearText(); } } catch ( final IOException ioe ) { throw new InvalidReportStateException( "Failed to write text", ioe ); } } protected void drawComplexText( final RenderableComplexText renderableComplexText ) { try { if ( renderableComplexText.getRichText().isEmpty() ) { // This text is empty. return; } if ( renderableComplexText.isNodeVisible( getParagraphBounds(), isOverflowX(), isOverflowY() ) == false ) { return; } // iterate through all inline elements for ( final RichTextSpec.StyledChunk styledChunk : renderableComplexText.getRichText().getStyleChunks() ) { RenderNode node = styledChunk.getOriginatingTextNode(); InstanceID dummy = node.getInstanceId(); textExtractorHelper.startInlineBox( dummy, styledChunk.getOriginalAttributes(), styledChunk.getStyleSheet(), BoxDefinition.EMPTY ); if ( node instanceof RenderableReplacedContentBox ) { processRenderableContent( (RenderableReplacedContentBox) node ); result = true; } else { String text = styledChunk.getText(); xmlWriter.writeText( characterEntityParser.encodeEntities( text ) ); if ( text.trim().length() > 0 ) { result = true; } } textExtractorHelper.finishBox( dummy, styledChunk.getOriginalAttributes() ); clearText(); } } catch ( final IOException ioe ) { throw new InvalidReportStateException( "Failed to write text", ioe ); } } }