package mj.ocraptor;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import mj.ocraptor.configuration.Config;
import mj.ocraptor.database.dao.FileEntry;
import mj.ocraptor.database.search.TextProcessing;
import mj.ocraptor.file_handler.TextExtractorSub;
public class ParserTest {
public static void test(String test) {
test = "3";
}
/**
* Let's test some shit!
*
* @throws IOException
*/
public static void main(String[] args) throws Exception {
String configFilePath = "src/test/resources/default.properties";
// ------------------------------------------------ //
String res = "src/test/resources/test-files/";
String fileToParse = null;
// fileToParse = res + "huckleberry.gif";
// fileToParse = res + "miscellaneous/russian_text.txt";
fileToParse = res + "huckleberry.pdf";
// fileToParse = "/home/foo/a/crypt/documents/allgemeine_infos.pdf";
// ------------------------------------------------ //
// ------------------------------------------------ //
Config.init(
false, false, true, false, false, false,
configFilePath, null, null, null);
// ------------------------------------------------ //
// ------------------------------------------------ //
final TextExtractorSub extractor = new TextExtractorSub();
FileEntry result = extractor.extractTextTika(new File(fileToParse));
if (result == null) {
System.out.println("result is null");
System.exit(0);
}
String original = result.getFullTextString();
String stripped = TextProcessing.postProcess(original);
// ------------------------------------------------ //
System.out.println("=== Stripped text " + StringUtils.repeat("=", 72));
System.out.println(stripped);
System.out.println(StringUtils.repeat("=", 90));
final String encodedXml = TextProcessing.encodePagePositions(original);
// Map<Integer, Integer> positions = TextProcessing.decodePagePositions(encodedXml);
// String stringToSearchFor = "know about me without you have";
// stringToSearchFor = "see no advantage in going";
// stringToSearchFor = "persons attempting to find";
// stringToSearchFor = "application";
// stringToSearchFor = "date";
// int index = TextProcessing.getPage(positions, stripped.indexOf(stringToSearchFor));
// System.out.println(index);
}
}