/************************************************************************************************** * Copyright (c) 2010 Mihail Atanassov and others. All rights reserved. This program and the * accompanying materials are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * <p/> * Contributors: <br/> * Mihail Atanassov - initial API and implementation <br/> * Fabian Steeg - Refactored for PdfBox *************************************************************************************************/ package de.uni_koeln.ub.drc.reader; /** * Represents a chunk of text, it's location points and the character space * width. */ public final class ExtractedWord { /** the text of the chunk */ private String text; /** the starting location of the chunk */ private Point startLocation; /** the ending location of the chunk */ private Point endLocation; private boolean isParagraphStart; private Float fontSize; Double pageWidth; Double pageHeight; /** * @param text * The extracted text chunk of the PDF document * @param startLocation * {@link Point} start location of the text chunk * @param endLocation * {@link Point} end location of the text chunk * @param isParagraphStart * boolean * @param fontSize * The font size * @param pageWidth * The page width * @param pageHeight * The page height */ public ExtractedWord(final String text, final Point startLocation, final Point endLocation, final boolean isParagraphStart, final Float fontSize, final Double pageWidth, final Double pageHeight) { setText(text); this.startLocation = startLocation; this.endLocation = endLocation; setParagraphStart(isParagraphStart); setFontSize(fontSize); this.pageHeight = pageHeight; this.pageWidth = pageWidth; } /** * @return The end point of the chunk */ public Point getEndPoint() { return endLocation; } /** * @return The font size */ public Float getFontSize() { return fontSize; } /** * @param height * The height of the JPG image * @return The scaled font size of the line */ public int getFontSizeScaled(final int height) { return (int) ((height * getFontSize()) / pageHeight); } /** * @return The start point of the chunk */ public Point getStartPoint() { return startLocation; } /** * @param width * The width of the JPG image * @param height * The height of the JPG image * @return The scaled starting point of the line */ public Point getStartPointScaled(final int width, final int height) { return scaled(width, height, getStartPoint()); } /** * @param width * The width of the JPG image * @param height * The height of the JPG image * @return The scaled end point of the line */ public Point getEndPointScaled(final int width, final int height) { return scaled(width, height, getEndPoint()); } private Point scaled(final int width, final int height, final Point boxPoint) { int x = getX(boxPoint.getX(), width); int y = getY(boxPoint.getY(), height); Point scaledPoint = new Point(x, y); return scaledPoint; } /** * @return the text of the text chunk */ public String getText() { return text; } /** * @return {@code true} if it's the first text chunk of an {@link Paragraph} * otherwise {@code false} */ public boolean isParagraphStart() { return isParagraphStart; } @Override public String toString() { return "'" + text + "' | " + getStartPoint() + " | " + getEndPoint();//$NON-NLS-1$//$NON-NLS-2$//$NON-NLS-3$ } private int getX(final double xStart, final int width) { return (int) ((((xStart) * width)) / pageWidth); } private int getY(final double yStart, final int height) { return (int) ((((pageHeight - yStart) * height)) / pageHeight); } private void setFontSize(final Float fontSize) { this.fontSize = fontSize; } private void setParagraphStart(final boolean isParagraphStart) { this.isParagraphStart = isParagraphStart; } private void setText(final String text) { this.text = text; } }