/*
* Copyright 2010-2011 Øyvind Berg (elacin@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.elacin.pdfextract.datasource.graphics;
import org.apache.log4j.Logger;
import org.elacin.pdfextract.content.GraphicContent;
import org.elacin.pdfextract.geom.Rectangle;
import org.elacin.pdfextract.geom.Sorting;
import org.jetbrains.annotations.NotNull;
import java.awt.*;
import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* Created by IntelliJ IDEA. User: elacin Date: 15.01.11 Time: 21.38 To change this template use
* File | Settings | File Templates.
*/
public class DrawingSurfaceImpl implements DrawingSurface {
// ------------------------------ FIELDS ------------------------------
private static final Logger log = Logger.getLogger(DrawingSurfaceImpl.class);
/**
* These lists will hold the contents while we are drawing it. This is grouped based on physical
* properties only
*/
@NotNull
final List<GeneralPath> figurePaths = new ArrayList<GeneralPath>();
@NotNull
final List<GraphicContent> pictures = new ArrayList<GraphicContent>();
@NotNull
List<GraphicContent> combined = new ArrayList<GraphicContent>();
// ------------------------ INTERFACE METHODS ------------------------
// --------------------- Interface DrawingSurface ---------------------
public void clearSurface() {
figurePaths.clear();
pictures.clear();
combined = new ArrayList<GraphicContent>();
}
@SuppressWarnings({ "NumericCastThatLosesPrecision" })
public void drawImage(@NotNull final Image image, @NotNull final AffineTransform at,
@NotNull final Shape clippingPath) {
/* transform the coordinates by using the affinetransform. */
Point2D upperLeft = at.transform(new Point2D.Float(0.0F, 0.0F), null);
Point2D dim = new Point2D.Float((float) image.getWidth(null),
(float) image.getHeight(null));
Point2D lowerRight = at.transform(dim, null);
/* this is necessary because the image might be rotated */
float x = (float) Math.min(upperLeft.getX(), lowerRight.getX());
float endX = (float) Math.max(upperLeft.getX(), lowerRight.getX());
float y = (float) Math.min(upperLeft.getY(), lowerRight.getY());
float endY = (float) Math.max(upperLeft.getY(), lowerRight.getY());
/* respect the bound if set */
final Rectangle2D bounds = clippingPath.getBounds2D();
x = (float) Math.max(bounds.getMinX(), x);
y = (float) Math.max(bounds.getMinY(), y);
if (bounds.getMaxX() > 0.0) {
endX = (float) Math.min(bounds.getMaxX(), endX);
}
if (bounds.getMaxY() > 0.0) {
endY = (float) Math.min(bounds.getMaxY(), endY);
}
/* build the finished position - this will also do some sanity checking */
org.elacin.pdfextract.geom.Rectangle pos;
try {
pos = new org.elacin.pdfextract.geom.Rectangle(x, y, endX - x, endY - y);
} catch (Exception e) {
log.warn("LOG00590:Error while adding graphics: " + e.getMessage());
return;
}
pictures.add(new GraphicContent(pos, true, Color.BLACK));
}
public void fill(@NotNull final GeneralPath originalPath, @NotNull final Color color,
Shape currentClippingPath) {
addVectorPath(originalPath, color, currentClippingPath);
}
@NotNull
public List<GraphicContent> getGraphicContents() {
if (!combined.isEmpty()) {
return combined;
}
for (GeneralPath figurePath : figurePaths) {
try {
final org.elacin.pdfextract.geom.Rectangle pos = convertRectangle(
figurePath.getBounds());
final GraphicContent newFigure = new GraphicContent(pos, false, Color.BLACK);
/*
* some times bounding boxes around text might be drawn twice, in white and in another colour
* .
* take advantage of the fact that figures with equal positions are deemed equal for the set,
* find an existing one with same position, and combine them. Prefer to keep that which
* stands
* out from the background, as that is more useful :)
*/
if (newFigure.getColor().equals(Color.WHITE)) {
continue;
}
combined.add(newFigure);
} catch (Exception e) {
log.warn("LOG00580:Error while filling path " + figurePath + ": ", e);
}
}
combineGraphics(combined);
if (pictures.isEmpty()) {
if (log.isDebugEnabled()) {
log.debug("LOG01210:no pictures to combine");
}
} else {
combineGraphics(pictures);
}
combined.addAll(pictures);
return combined;
}
public void strokePath(@NotNull final GeneralPath originalPath, @NotNull final Color color,
Shape currentClippingPath) {
addVectorPath(originalPath, color, currentClippingPath);
}
// -------------------------- STATIC METHODS --------------------------
private static void combineGraphics(@NotNull final List<GraphicContent> list) {
/**
* Segment images
*
* We segment figures and pictures separately.
*
* The segmentation is done by first finding a list of graphical content which contains
* a certain amount of text which is then excluded from segmentation (because we later on
* use these graphics to separate text, so that information is most probably useful).
*
* Then we try to identify clusters of graphics, and combine them
*
*/
final long t0 = System.currentTimeMillis();
final int originalSize = list.size();
for (Iterator<GraphicContent> iterator = list.iterator(); iterator.hasNext(); ) {
final GraphicContent content = iterator.next();
if (content.isFigure() && content.isBackgroundColor()) {
iterator.remove();
}
}
Collections.sort(list, Sorting.sortByLowerYThenLowerX);
for (int i = 0; i < list.size(); i++) {
final GraphicContent current = list.get(i);
/*
* for every current - check the rest of the graphics in the list
* to see if its possible to combine
*/
for (int j = i + 1; j < list.size(); j++) {
float minX = current.getPos().x;
float minY = current.getPos().y;
float maxX = current.getPos().endX;
float maxY = current.getPos().endY;
Color c = current.getColor();
final int firstCombinable = j;
/* since we sorted the elements there might be several in a row - combine them all */
while ((j < list.size()) && current.canBeCombinedWith(list.get(j))) {
minX = Math.min(minX, list.get(j).getPos().x);
minY = Math.min(minY, list.get(j).getPos().y);
maxX = Math.max(maxX, list.get(j).getPos().endX);
maxY = Math.max(maxY, list.get(j).getPos().endY);
if (!Color.WHITE.equals(c)) {
c = list.get(j).getColor();
}
j++;
}
/**
* combine if we found some
*/
/*
* i = 0
* firstCombinable = 2
* j = 3
* --
* combine 0 and 2 only, j is one too high.
*/
if (firstCombinable != j) {
/* first remove */
final int numToCombine = j - firstCombinable;
for (int u = 0; u < numToCombine; u++) {
list.remove(firstCombinable); // removing elements from the first one
}
list.remove(i);
/* then add the new graphic */
list.add(new GraphicContent(new Rectangle(minX, minY, maxX - minX, maxY - minY),
current.isPicture(), c));
i = -1; // start over
break;
}
}
}
if (log.isInfoEnabled() && (originalSize != list.size())) {
log.info("LOG01310:Combined " + originalSize + " graphical elements into " + list.size()
+ " in " + (System.currentTimeMillis() - t0) + "ms");
}
}
@NotNull
private static Rectangle convertRectangle(@NotNull final java.awt.Rectangle bounds) {
return new Rectangle((float) bounds.x, (float) bounds.y, (float) bounds.width,
(float) bounds.height);
}
// -------------------------- OTHER METHODS --------------------------
private void addVectorPath(@NotNull GeneralPath originalPath, @NotNull Color color,
Shape clippingPath) {
if (color.equals(Color.WHITE)) {
return;
}
// if (!clippingPath.contains(originalPath.getBounds())) {
// return;
// }
List<GeneralPath> paths = PathSplitter.splitPath(originalPath);
for (GeneralPath path : paths) {
boolean addedPath = false;
for (GeneralPath figurePath : figurePaths) {
if (figurePath.intersects(path.getBounds())) {
figurePath.append(path, true);
addedPath = true;
break;
}
}
if (!addedPath) {
GeneralPath newPath = new GeneralPath(path);
figurePaths.add(newPath);
}
}
}
}