/*
* Copyright 2010-2011 Øyvind Berg (elacin@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.elacin.pdfextract.physical.word;
import org.apache.log4j.Logger;
import org.apache.log4j.MDC;
import org.elacin.pdfextract.content.PhysicalText;
import org.elacin.pdfextract.geom.Sorting;
import org.elacin.pdfextract.style.Style;
import org.elacin.pdfextract.util.FileWalker;
import org.testng.annotations.Test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import static org.testng.Assert.assertTrue;
/**
* Created by IntelliJ IDEA. User: elacin Date: 01.12.10 Time: 07.05 To change this template use
* File | Settings | File Templates.
*/
public class TestSpacing2 {
// ------------------------------ FIELDS ------------------------------
private static final Logger log = Logger.getLogger(TestSpacing2.class);
// -------------------------- STATIC METHODS --------------------------
private static float[] parseDistancesString(final String s, final List<Float> distances) {
String distancesString = s.trim().substring(1, s.length() - 2);
for (StringTokenizer tokenizer = new StringTokenizer(distancesString, ", ");
tokenizer.hasMoreTokens(); ) {
distances.add(Float.valueOf(tokenizer.nextToken()));
}
float[] distancesArray = new float[distances.size()];
for (int i = 0, size = distances.size(); i < size; i++) {
distancesArray[i] = distances.get(i);
}
return distancesArray;
}
// -------------------------- PUBLIC METHODS --------------------------
@Test
public void testSpacings() throws IOException {
final List<File> files = FileWalker.getFileListing(new File("target/test-classes/spacings"),
".spacing");
int correct = 0,
total = 0;
for (File file : files) {
MDC.put("testInfo", file.getName());
int fileCorrect = 0,
fileTotal = 0;
BufferedReader reader = new BufferedReader(new FileReader(file));
String s;
String[] input = new String[4];
int i = 0;
// noinspection NestedAssignment
while ((s = reader.readLine()) != null) {
input[i++] = s;
if (i == 4) {
if (processInput(input)) {
fileCorrect++;
}
fileTotal++;
i = 0;
}
}
log.info("LOG00820:File " + file + ": got " + fileCorrect + " of " + fileTotal);
correct += fileCorrect;
total += fileTotal;
MDC.remove("testInfo");
}
final int errors = total - correct;
final float ERROR_PERCENT = 0.004f;
final float errorLimit = (float) total * ERROR_PERCENT;
log.warn("###############################");
log.warn("TOTAL: = " + total);
log.warn("ERRORS: = " + errors);
log.warn("ERRORLIMIT: = " + errorLimit + " (" + ERROR_PERCENT + "%)");
log.warn("###############################");
try {
Thread.sleep(200);
} catch (InterruptedException e) {
e.printStackTrace(); // To change body of catch statement use File | Settings | File Templates.
}
assertTrue(errors < errorLimit);
}
// -------------------------- OTHER METHODS --------------------------
//private String findResult(final String base,
// final float fontSize,
// final List<Float> distances,
// final float[] distancesArray) {
// final float charspace = CharSpacingFinder.calculateCharspacingForDistances(distances,
// fontSize);
//
// StringBuilder sb = new StringBuilder();
// sb.append(base.charAt(0));
// int strIndex = 1;
// for (int i = 0; i < distancesArray.length; i++) {
// float distance = distancesArray[i];
//
// if (distance > charspace) {
// sb.append(" ");
// }
//
// final char c = base.charAt(strIndex++);
// sb.append(c);
// }
//
// return sb.toString();
//}
private boolean processInput(final String[] input) {
String answer = input[1];
String base = answer.replaceAll(" ", "");
int fontSize = (int) Math.max(8.0f, (float) Float.valueOf(input[2]));
List<Float> distances = new ArrayList<Float>();
float[] distancesArray = parseDistancesString(input[3], distances);
if (distancesArray.length != base.length() - 1) {
log.info("bad input = " + Arrays.toString(input));
return true;
}
final float width = fontSize;
List<PhysicalText> line = new ArrayList<PhysicalText>();
float currentX = 0.0f;
Style style = new Style("font", "", fontSize, fontSize, "fontid", false, false,
false);
for (int i = 0; i < base.length(); i++) {
final char c = base.charAt(i);
float distance = (i == 0)
? 0.0f
: distancesArray[i - 1];
currentX += distance;
line.add(new PhysicalText(new String(new char[] { c }), style, currentX, 0.0f, width, 1.0f,
0.0f));
currentX += width;
}
Collections.sort(line, Sorting.sortByLowerX);
final Collection<PhysicalText> ret = WordSegmentatorImpl.createWordsInLine(line);
final boolean equals = ret.size() - 1 == answer.length() - base.length();
StringBuilder sb = new StringBuilder();
for (PhysicalText physicalText : ret) {
sb.append(physicalText.getText()).append(" ");
}
sb.setLength(sb.length() - 1);
String result = sb.toString();
// final String result = findResult(base, fontSize, distances, distancesArray);
//
// final boolean equals = answer.equals(result);
if (!equals) {
log.warn("wrong result: got '" + result + "', expected: '" + answer + "'");
// findResult(base, fontSize, distances, distancesArray);
}
return equals;
}
}