package ecologylab.bigsemantics.html.documentstructure;
import java.util.ArrayList;
import java.util.TreeMap;
import org.w3c.dom.Node;
import ecologylab.bigsemantics.html.DOMParserInterface;
import ecologylab.bigsemantics.html.ImgElement;
import ecologylab.bigsemantics.html.ParagraphText;
import ecologylab.net.ParsedURL;
/**
* Generate surrogates for the documents that are determined as ContentPage.
*
* @author eunyee
*
*/
public class ContentPage extends RecognizedDocumentStructure
{
public ContentPage(ParsedURL purl)
{
super(purl);
}
/**
* This is the case that the page is a content page or an index-content page.
*
* 1. Generate Image+Text surrogate inside the ArticleMain body.
* 2. Generate surrogates for other pages (with those image surrogates in the bottom or side of the pages)
*/
@Override
public void generateSurrogates(Node contentBody, ArrayList<ImgElement> imgNodes, int totalTxtLeng,
TreeMap<Integer, ParagraphText> paraTexts, DOMParserInterface htmlType)
{
findImgsInContentBodySubTree(contentBody.getParentNode(), imgNodes);
associateImageTextSurrogates(htmlType, contentBody, paraTexts); // removes from imgNodes
htmlType.setContent();
constructImgSurrogatesForOtherPages( imgNodes, totalTxtLeng, htmlType ); // act just on leftover img nodes
}
}