/* * Copyright 2010-2011 Øyvind Berg (elacin@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.elacin.pdfextract.formula; import org.elacin.pdfextract.content.StyledText; import org.elacin.pdfextract.geom.HasPosition; import org.elacin.pdfextract.style.TextUtils; import org.jetbrains.annotations.NotNull; import java.util.Collection; import static org.elacin.pdfextract.style.Style.GRAPHIC_MATH_BAR; /** * Created by IntelliJ IDEA. User: elacin Date: 29.11.10 Time: 01.30 To change this template use * File | Settings | File Templates. */ public class Formulas { // -------------------------- PUBLIC STATIC METHODS -------------------------- public static boolean stringContainsMath(@NotNull final String text1) { for (int i = 0; i < text1.length(); i++) { if (Character.getType(text1.codePointAt(i)) == Character.MATH_SYMBOL) { return true; } } return false; } public static boolean textContainsMath(@NotNull StyledText text) { if (text.getStyle().isMathFont()) { return true; } return stringContainsMath(text.getText()); } public static boolean textSeemsToBeFormula(@NotNull Collection<? extends HasPosition> contents) { if (contents.size() < 4) { return false; } if (!TextUtils.listContainsStyledText(contents)) { return false; } int looksLikeMath = 0; int wordCount = 0; int containedGraphics = 0; for (HasPosition content : contents) { if (!(content instanceof StyledText)) { continue; } StyledText word = (StyledText) content; if (word.getStyle().equals(GRAPHIC_MATH_BAR)) { containedGraphics++; continue; } wordCount += word.getText().length(); /* first check whether the whole word seems to be formatted in a math font */ if (word.getStyle().isMathFont()) { looksLikeMath += 3 * word.getText().length(); continue; } for (int i = 0; i < word.getText().length(); i++) { final char c = word.getText().charAt(i); if (Character.getType(c) == (int) Character.MATH_SYMBOL) { looksLikeMath += 3; } else if (Character.isDigit(c)) { looksLikeMath += 1; } } } looksLikeMath += containedGraphics * looksLikeMath * 0.1f; return looksLikeMath > wordCount; } }