package edu.isi.bmkeg.lapdf.model.ordering; import java.util.Comparator; import edu.isi.bmkeg.lapdf.features.ChunkFeatures; import edu.isi.bmkeg.lapdf.model.Block; import edu.isi.bmkeg.lapdf.model.ChunkBlock; import edu.isi.bmkeg.lapdf.model.PageBlock; import edu.isi.bmkeg.lapdf.model.spatial.SpatialEntity; public class SpatialOrdering implements Comparator<SpatialEntity> { public static final String HORIZONTAL_MODE = "horizontal"; public static final String VERTICAL_MODE = "vertical"; public static final String MIXED_MODE = "mixed"; public static final String MIXED_MODE_ABSOLUTE="mixedAbs"; public static final String COLUMN_AWARE_MIXED_MODE = "camd"; public static final String PAGE_COLUMN_AWARE_MIXED_MODE = "pcamd"; public String mode; public SpatialOrdering(String mode) { this.mode = mode; } @Override public int compare(SpatialEntity o1, SpatialEntity o2) { if (HORIZONTAL_MODE.equalsIgnoreCase(mode)) { return horizontalOrdering(o1, o2); } else if (VERTICAL_MODE.equalsIgnoreCase(mode)) { return verticalOrdering(o1, o2); } else if (MIXED_MODE.equalsIgnoreCase(mode)) { return mixedOrdering(o1, o2); }else if (MIXED_MODE_ABSOLUTE.equalsIgnoreCase(mode)) { return mixedOrderingAbsolute(o1, o2); } else if (COLUMN_AWARE_MIXED_MODE.equalsIgnoreCase(mode)) { return camdOrdering(o1, o2); } else if (PAGE_COLUMN_AWARE_MIXED_MODE.equalsIgnoreCase(mode)) { return pcamdOrdering(o1, o2); } else { return 0; } } private int camdOrdering(SpatialEntity o1, SpatialEntity o2) { String o1Allignment = ((Block) o1).readLeftRightMedLine(); String o2Allignment = ((Block) o2).readLeftRightMedLine(); int pageNumber = ((PageBlock) ((Block) o1).getContainer()) .getPageNumber(); int o1y1 = o1.getY1(); int o2y1 = o2.getY1(); int pageHeight = ((PageBlock) ((Block) o1).getContainer()) .getPageBoxHeight(); if (pageNumber == 1 && Math.abs(o1y1 - o2y1) >= .5 * pageHeight) { return mixedOrdering(o1, o2); } else if (executeHeaderFooterCheck((ChunkBlock) o1, (ChunkBlock) o2)) { return mixedOrdering(o1, o2); } if (o1Allignment.equals(o2Allignment)) { return mixedOrdering(o1, o2); } else if (Block.MIDLINE.equalsIgnoreCase(o1Allignment) || Block.MIDLINE.equalsIgnoreCase(o2Allignment)) { return mixedOrdering(o1, o2); } else if (Block.LEFT.equalsIgnoreCase(o1Allignment)) { return -1; } else if (Block.RIGHT.equalsIgnoreCase(o1Allignment)) { return 1; } return 0; } private int pcamdOrdering(SpatialEntity o1, SpatialEntity o2) { int pageNumbero1 = ((PageBlock) ((Block) o1).getContainer()) .getPageNumber(); int pageNumbero2 = ((PageBlock) ((Block) o2).getContainer()) .getPageNumber(); return (pageNumbero1 == pageNumbero2) ? camdOrdering(o1, o2) : pageNumbero1 - pageNumbero2; } private int horizontalOrdering(SpatialEntity o1, SpatialEntity o2) { int x1Diff = o1.getX1() - o2.getX1(); if (x1Diff == 0) return o1.getX2() - o2.getX2(); return x1Diff; } private int verticalOrdering(SpatialEntity o1, SpatialEntity o2) { Block block = (Block) o1; PageBlock page = (PageBlock) ((block.getContainer() instanceof PageBlock) ? block .getContainer() : block.getContainer().getContainer()); int y1Diff = o1.getY1() - o2.getY1(); if (Math.abs(y1Diff) < page.getMostPopularWordHeightPage()/2) return (Math.abs(o1.getY2() - o2.getY2()) < page .getMostPopularWordHeightPage()) ? 0 : o1.getY2() - o2.getY2(); return y1Diff; /*int y1Diff = o1.getY1() - o2.getY1(); if (y1Diff==0) return o1.getY2() - o2.getY2(); return y1Diff;*/ } private int verticalOrderingAbsolute(SpatialEntity o1, SpatialEntity o2) { int y1Diff = o1.getY1() - o2.getY1(); if (y1Diff==0) return o1.getY2() - o2.getY2(); return y1Diff; } /** * Note: in order to implement the fix for the superscript subscript induced bug (fix implemented by Abhishek November 10th) * replace the return mixedOrderingAbsolute with other commented code. * @param o1 * @param o2 * @return */ private int mixedOrdering(SpatialEntity o1, SpatialEntity o2) { //return mixedOrderingAbsolute(o1, o2); int y = verticalOrdering(o1, o2); if (y == 0) { return horizontalOrdering(o1, o2); } else { return y; } } private int mixedOrderingAbsolute(SpatialEntity o1, SpatialEntity o2) { int y = verticalOrderingAbsolute(o1, o2); if (y == 0) { return horizontalOrdering(o1, o2); } else { return y; } } // GULLY TODO: THIS IS VERY IMPORTANT private boolean executeHeaderFooterCheck(ChunkBlock o1, ChunkBlock o2) { if (o1.isHeaderOrFooter() != null && o1.isHeaderOrFooter()) return true; if (o2.isHeaderOrFooter() != null && o2.isHeaderOrFooter()) return true; if ((o1.isHeaderOrFooter() != null && !o1.isHeaderOrFooter()) && (o2.isHeaderOrFooter() != null && !o2.isHeaderOrFooter())) return false; boolean containsFirstLine1 = false; boolean containsFirstLine2 = false; boolean numberOfLineOne1 = false; boolean numberofLineOne2 = false; boolean containsLastLine1 = false; boolean containsLastLine2 = false; PageBlock p1 = (PageBlock) o1.getContainer(); PageBlock p2 = (PageBlock) o1.getContainer(); numberOfLineOne1 = o1.readNumberOfLine() == 1; numberofLineOne2 = o2.readNumberOfLine() == 1; if (!numberOfLineOne1 && !numberofLineOne2) { o1.setHeaderOrFooter(false); o2.setHeaderOrFooter(false); return false; } if (numberOfLineOne1) { containsFirstLine1 = Math.abs(o1.getY1() - p1.getMargin()[1]) < p1 .getDocument().readMostPopularWordHeight(); if (containsFirstLine1) { o1.setHeaderOrFooter(true); return true; } containsLastLine1 = Math.abs(o1.getY2() - p1.getMargin()[3]) < p1 .getDocument().readMostPopularWordHeight(); if (containsLastLine1) { o1.setHeaderOrFooter(true); return true; } o1.setHeaderOrFooter(false); } if (numberofLineOne2) { containsFirstLine2 = Math.abs(o2.getY1() - p2.getMargin()[1]) < p2 .getDocument().readMostPopularWordHeight(); if (containsFirstLine2) { o2.setHeaderOrFooter(true); return true; } containsLastLine2 = Math.abs(o2.getY2() - p2.getMargin()[3]) < p2 .getDocument().readMostPopularWordHeight(); if (containsLastLine2) { o2.setHeaderOrFooter(true); return true; } o2.setHeaderOrFooter(false); } return false; } public static void main(String argsd[]) { Boolean value = null; if (value != null) System.out.println("bklusd"); } }