package ecologylab.bigsemantics.html;
import java.util.ArrayList;
import org.w3c.dom.Node;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.XMLTools;
public class DOMFragmentInformationTagger extends DOMWalkInformationTagger
{
ArrayList<ImgElement> dndImages = new ArrayList<ImgElement>();
StringBuilder dndText = new StringBuilder();
ParsedURL containerPurl = null;
public DOMFragmentInformationTagger(ParsedURL purl, DOMParserInterface tidyInterface)
{
super(purl, tidyInterface);
}
@Override
public void printTag(Node node)
{
String tagName = node.getNodeName();
if (containerPurl == null)
{
Node container = node.getAttributes().getNamedItem("container");
if (container != null)
{
String containerValue = container.getNodeValue();
if (containerValue != null && containerValue.length() > 0)
{
containerValue= XMLTools.unescapeXML(containerValue);
containerPurl = ParsedURL.getAbsolute(containerValue);
}
}
}
if( "img".equals(tagName) )
{
ImgElement imgElement = new ImgElement(node, purl);
dndImages.add(imgElement);
}
else if ("body".equals(tagName))
{
getTextInSubTree(node, true, dndText, true, true);
}
// We need to delete a link to the file write part at the end -- EUNYEE
// super.printTag(lexer, fout, mode, indent, node);
}
public ArrayList<ImgElement> getDNDImages()
{
return dndImages;
}
public String getDNDText()
{
return dndText.toString();
}
public ParsedURL getContainerPurl()
{
return containerPurl;
}
}