package com.zilbo.flamingSailor.TE.model;
import org.apache.commons.lang3.StringUtils;
import java.io.PrintStream;
import java.util.Map;
/*
* Copyright 2012 Zilbo.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TextLine extends Component {
public TextLine(long id, Component tp) {
super(id);
addChild(tp);
}
public int size() {
return pieces.size();
}
@Override
public String toString() {
return this.getText();
}
public String getText() {
StringBuilder sb = new StringBuilder();
TextPiece prev = null;
for (Component component : this.getChildren()) {
if (component instanceof TextPiece) {
TextPiece p = (TextPiece) component;
if (prev != null) {
if (!prev.isNextTo(p)) {
sb.append(' ');
}
}
sb.append(p.getText());
prev = p;
}
}
return sb.toString();
}
@Override
public double density() {
if (width() == 0) {
return 1.0;
}
String text = this.getText();
text = text.replaceAll("[^\\w]", "");
return (1.0) * (text.length()) / this.width();
}
@Override
public double height() {
double height = 0.0;
double length = 0.0;
for (Component c : getChildren()) {
height += c.height() * c.getText().length();
length += c.getText().length();
}
if (length == 0.0) {
return super.height();
}
return height / length;
}
double lineIsRegularProbability = 0.0;
protected void calcLineHeightProbability(
double highestFreqSize,
double minFontSize,
double maxFontSize,
Map<String, Map<Integer, Double>> normalizedFontCounts,
Map<String, Double> normalizedFonts,
Map<Integer, Double> normalizedSizes,
double textLength) {
if (textLength == 0) {
lineIsRegularProbability = 0;
return;
}
Double probability = 0.0;
for (Component c : getChildren()) {
if (c instanceof TextPiece) {
TextPiece tp = (TextPiece) c;
/*
Map<Double, Double> fontSizes = normalizedFontCounts.get(tp.getFontName());
if (fontSizes != null) {
Double TPprob = fontSizes.get(tp.getHeight());
if (TPprob != null) {
probability += TPprob * tp.getText().length();
}
}
*/
Double TPprob = normalizedFonts.get(tp.getFontName());
Double sizeProb = normalizedSizes.get((int) Math.round(tp.getHeight()));
if (sizeProb != null) {
if (TPprob != null) {
TPprob *= sizeProb;
} else {
TPprob = sizeProb;
}
}
if (TPprob != null) {
probability += TPprob * tp.getText().length();
}
}
}
probability /= textLength;
double heightDiff = 1 - Math.abs((this.height() - highestFreqSize) / (maxFontSize - minFontSize));
lineIsRegularProbability = probability * heightDiff;
}
public void categorizeLine(double highestFreqSize,
double minFontSize,
double maxFontSize,
Map<String, Map<Integer, Double>> normalizedFontCounts,
Map<String, Double> normalizedFonts,
Map<Integer, Double> normalizedSizes,
double avgLeft,
double avgRight,
double avgWidth,
double charDensity,
double linesPerPage) {
String text = getText();
calcLineHeightProbability(
highestFreqSize,
minFontSize, maxFontSize,
normalizedFontCounts,
normalizedFonts,
normalizedSizes,
text.length());
if (text.toUpperCase().equals(text)) {
lineIsRegularProbability *= 0.90;
}
if (!text.matches(".*[A-Za-z].*")) {
// headings need some text in there.
lineIsRegularProbability *= 0.8;
}
// for regular lines we would be expecting a p() of over 40%
/*
double thisHeight = this.height();
if (lineIsRegularProbability < 0.3 && Math.round(thisHeight) >= highestFreqSize) {
isHeading = true;
}
*/
}
public double getLineIsRegularProbability() {
return lineIsRegularProbability;
}
@Override
public void dumpChildren(PrintStream out, int level) {
StringBuilder sb = new StringBuilder();
sb.append(StringUtils.repeat("..", level));
sb.append(getClass().getSimpleName());
if (isHeading()) {
sb.append(" (H) ");
}
if (sb.length() < 20) {
sb.append(StringUtils.repeat(' ', 20 - sb.length()));
}
sb.append('\t');
sb.append(getRectangleDebug()).append("\t");
out.print(sb.toString() +" " + normHistoGramToString() +
String.format(" H:%5.1f W:%6.1f D:%4.2f P:%4.2f", height(), width(), density(), getLineIsRegularProbability()) +
"\t");
String text;
text = getText().replace("\n", "\n" + StringUtils.repeat(' ', 43));
if (text.length() > 256) {
text = text.substring(0, 256 - 4) + " ...";
}
out.println(text);
/*
for (Component component : getChildren()) {
component.dumpChildren(out, level + 1);
}
*/
}
}