package com.formulasearchengine.mathosphere.mlp.pojos; import com.google.common.collect.Multiset; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import com.formulasearchengine.mathosphere.mlp.cli.BaseConfig; import com.formulasearchengine.mathosphere.mlp.text.WikiTextUtils.MathMarkUpType; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; import java.nio.charset.StandardCharsets; import java.util.regex.Pattern; import static com.formulasearchengine.mathosphere.mlp.text.MathMLUtils.extractIdentifiers; import static com.formulasearchengine.mathosphere.mlp.text.MathMLUtils.extractIdentifiersFromMathML; public class MathTag { public final static Pattern FORMULA_PATTERN = Pattern.compile("FORMULA_[0-9a-f+]"); private static final HashFunction HASHER = Hashing.md5(); private final int position; private final String content; private final MathMarkUpType markUpType; private Multiset<String> indentifiers = null; public MathTag(int position, String content, MathMarkUpType markUp) { this.position = position; this.content = content; this.markUpType = markUp; } public int getPosition() { return position; } public String getContent() { return content; } public String getTagContent() { return content.replaceAll("<math.*?>", "").replaceAll("</math>", ""); } public String getContentHash() { return HASHER.hashString(content, StandardCharsets.UTF_8).toString(); } public String placeholder() { return "FORMULA_" + getContentHash(); } public MathMarkUpType getMarkUpType() { return markUpType; } @Deprecated public Multiset<String> getIdentifier(boolean useTeX, boolean useBlacklist) { return extractIdentifiersFromMathML(getContent(), useTeX, useBlacklist); } @Override public String toString() { return "MathTag [position=" + position + ", content=" + content + "]"; } @Override public boolean equals(Object obj) { return EqualsBuilder.reflectionEquals(this, obj); } @Override public int hashCode() { return HashCodeBuilder.reflectionHashCode(this); } public Multiset<String> getIdentifiers(BaseConfig config) { if (indentifiers == null || indentifiers.size() == 0) { indentifiers = extractIdentifiers(this, config.getUseTeXIdentifiers(), config.getTexvcinfoUrl()); } return indentifiers; } public String getKey() { return placeholder(); } }