package com.formulasearchengine.mathosphere.mathpd.pojos;
import com.formulasearchengine.mathosphere.mathpd.Distances;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.HashMap;
/**
* Represents a document with only those features and data we are interested in our later analysis pipeline.
* <p>
* Created by felix on 07.12.16.
*/
public class ExtractedMathPDDocument implements Comparable<ExtractedMathPDDocument>, Serializable {
private static final String ID_SEPARATOR = "/";
public String title;
public String text;
public String name;
private String page;
private HashMap<String, Double> histogramCn = new HashMap<>();
private HashMap<String, Double> histogramCsymbol = new HashMap<>();
private HashMap<String, Double> histogramCi = new HashMap<>();
private HashMap<String, Double> histogramBvar = new HashMap<>();
public ExtractedMathPDDocument() {
}
public ExtractedMathPDDocument(String title, String text) {
this.title = title;
this.text = text;
}
public static String getNameFromId(String id) {
return id.split(ID_SEPARATOR)[0];
}
public static String getPageFromId(String id) {
return id.split(ID_SEPARATOR)[1];
}
public HashMap<String, Double> getHistogramBvar() {
return histogramBvar;
}
public void setHistogramBvar(HashMap<String, Double> histogramBvar) {
this.histogramBvar = histogramBvar;
}
public HashMap<String, Double> getHistogramCn() {
return histogramCn;
}
public void setHistogramCn(HashMap<String, Double> histogramCn) {
this.histogramCn = histogramCn;
}
public HashMap<String, Double> getHistogramCsymbol() {
return histogramCsymbol;
}
public void setHistogramCsymbol(HashMap<String, Double> histogramCsymbol) {
this.histogramCsymbol = histogramCsymbol;
}
public HashMap<String, Double> getHistogramCi() {
return histogramCi;
}
public void setHistogramCi(HashMap<String, Double> histogramCi) {
this.histogramCi = histogramCi;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public String getId() {
return this.getName() + ID_SEPARATOR + this.getPage();
}
@Override
public String toString() {
return "[title=" + title + ", name=" + name + ", page=" + page + ", text=" + StringUtils.abbreviate(text, 100) + "]";
}
@Override
public int compareTo(ExtractedMathPDDocument o) {
return this.getText().compareTo(o.getText());
}
@Override
public int hashCode() {
return this.getTitle().hashCode() + this.getText().hashCode();
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getPage() {
//return "none";
return page;
}
public void setPage(String page) {
this.page = page;
}
public void mergeOtherIntoThis(ExtractedMathPDDocument other) {
if (!this.name.equals(other.name)) {
throw new RuntimeException("name is not equal : " + name + " vs " + other.name);
}
this.histogramBvar = Distances.histogramsPlus(this.histogramBvar, other.histogramBvar);
this.histogramCi = Distances.histogramsPlus(this.histogramCi, other.histogramCi);
this.histogramCn = Distances.histogramsPlus(this.histogramCn, other.histogramCn);
this.histogramCsymbol = Distances.histogramsPlus(this.histogramCsymbol, other.histogramCsymbol);
}
}