/* * Copyright 2006-2012 ICEsoft Technologies Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.icepdf.core.pobjects.graphics.text; import java.awt.geom.AffineTransform; import java.util.ArrayList; /** * Page text represents the root element of a page's text hierarchy which * looks something like this. * <ul> * PageText -> LineText* -> WordText* -> GlyphText* * </ul> * The hierarchy elements are build by the content parser when text extraction * is enabled. It is build to seperate the huristics used to calculate * word and line detection which is used for text extraction/search, * search highlighting and text highlighting. * <p/> * It very important to note that all coordinates system represented in this * hierarchy of object has been normalized to the page space. This allows for * object to be sorted and drawn. Also this structure is not used for page * layout and painting. It is is used for painting text selectin via UI input * or search. The seperation is needed so that the text represented in Page * text can be padded and sorted to aid in text extraction readability. * * @since 4.0 */ public class PageText implements TextSelect { // pointer to current line during document parse, no other use. private LineText currentLine; private ArrayList<LineText> pageLines; public PageText() { pageLines = new ArrayList<LineText>(50); } public void newLine() { // make sure we don't insert a new line if the previous has no words. if (currentLine != null && currentLine.getWords().size() == 0){ return; } currentLine = new LineText(); pageLines.add(currentLine); } public void addGlyph(GlyphText sprite) { if (currentLine == null) { newLine(); } currentLine.addText(sprite); } public ArrayList<LineText> getPageLines() { return pageLines; } /** * Utility method to normalize text created in a Xform content stream * and is only called from the contentParser when parsing 'Do' token. * * @param transform do matrix tranform */ public void applyXObjectTransform(AffineTransform transform) { for (LineText lineText : pageLines) { lineText.clearBounds(); for (WordText wordText : lineText.getWords()) { wordText.clearBounds(); for (GlyphText glyph : wordText.getGlyphs()) { glyph.normalizeToUserSpace(transform); } } } } public void clearSelected(){ for (LineText lineText : pageLines) { lineText.clearSelected(); } } public void clearHighlighted(){ for (LineText lineText : pageLines) { lineText.clearHighlighted(); } } public StringBuilder getSelected() { StringBuilder selectedText = new StringBuilder(); for (LineText lineText : pageLines) { selectedText.append(lineText.getSelected()); } return selectedText; } public void selectAll() { for (LineText lineText : pageLines) { lineText.selectAll(); } } public void deselectAll() { for (LineText lineText : pageLines) { lineText.clearSelected(); } } public void dispose() { if (pageLines != null) { pageLines.clear(); pageLines.trimToSize(); } } public String toString() { StringBuilder extractedText = new StringBuilder(); for (LineText lineText : pageLines) { for (WordText wordText : lineText.getWords()) { extractedText.append(wordText.getText()); } extractedText.append('\n'); } return extractedText.toString(); } }