package com.google.sites.liberation.parsers; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.sites.liberation.util.EntryType.isPage; import static com.google.sites.liberation.parsers.ParserUtils.hasClass; import com.google.common.collect.Lists; import com.google.gdata.data.PlainTextConstruct; import com.google.gdata.data.sites.BaseContentEntry; import com.google.inject.Inject; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import java.io.File; import java.io.IOException; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * Implements PageParser to parse an html element for any contained entries. * * @author bsimon@google.com (Benjamin Simon) */ final class PageParserImpl implements PageParser { private static final Logger LOGGER = Logger.getLogger( PageParserImpl.class.getCanonicalName()); private final DocumentProvider documentProvider; private final EntryParser entryParser; /** * Creates a new PageParserImpl with the given dependencies. */ @Inject PageParserImpl(DocumentProvider documentProvider, EntryParser entryParser) { this.documentProvider = checkNotNull(documentProvider); this.entryParser = checkNotNull(entryParser); } /** * Parses the given File, returning a list of all the entries within. */ @Override public List<BaseContentEntry<?>> parsePage(File file) { Document document = null; try { document = documentProvider.getDocument(file); } catch (IOException e) { LOGGER.log(Level.WARNING, "Error parsing file: " + file); return null; } List<BaseContentEntry<?>> entries = Lists.newLinkedList(); parseElement(document.getDocumentElement(), entries); for (BaseContentEntry<?> entry : entries) { if (isPage(entry) && entry.getTitle() == null) { NodeList nodeList = document.getElementsByTagName("title"); for (int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); if (node.getNodeType() == Node.ELEMENT_NODE) { Element title = (Element) node; entry.setTitle(new PlainTextConstruct(title.getTextContent())); System.out.println(entry); } } } } return entries; } private void parseElement(Element element, List<BaseContentEntry<?>> entries) { NodeList nodeList = element.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); if (node.getNodeType() == Node.ELEMENT_NODE) { Element child = (Element) node; if (!child.getTagName().equals("q") && !child.getTagName().equals("blockquote")) { if (hasClass(child, "hentry")) { entries.add(entryParser.parseEntry(child)); } parseElement(child, entries); } } } } }