package org.talend.esb.examples.ebook.parser;
import java.net.URI;
import java.net.URISyntaxException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.talend.esb.examples.ebook.model.Book;
import org.talend.esb.examples.ebook.model.Format;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class BookParser {
private XPath xpath;
public BookParser() throws XPathExpressionException {
XPathFactory xpathfactory = XPathFactory.newInstance();
this.xpath = xpathfactory.newXPath();
NamespaceMap namespaceMapmap = new NamespaceMap();
namespaceMapmap.add("dcterms", "http://purl.org/dc/terms/");
namespaceMapmap.add("rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#");
namespaceMapmap.add("pgterms", "http://www.gutenberg.org/2009/pgterms/");
namespaceMapmap.add("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
namespaceMapmap.add("marcrel", "http://id.loc.gov/vocabulary/relators/");
namespaceMapmap.add("pgterms", "http://www.gutenberg.org/2009/pgterms/");
namespaceMapmap.add("dcam", "http://purl.org/dc/dcam/");
namespaceMapmap.add("cc", "http://web.resource.org/cc/");
xpath.setNamespaceContext(namespaceMapmap);
}
public Book parse(Document doc) throws XPathExpressionException, URISyntaxException {
Book book = new Book();
String id = xpath.evaluate("//pgterms:ebook/@rdf:about", doc);
book.setId(id);
book.setTitle(xpath.evaluate("//dcterms:title/text()", doc));
book.setCreator(xpath.evaluate("//dcterms:creator//pgterms:name/text()", doc));
NodeList files = (NodeList)xpath.evaluate("//pgterms:file", doc, XPathConstants.NODESET);
//NodeList subjects = (NodeList)xpath.evaluate("//dcterms:subject/rdf:Description/rdf:value", doc, XPathConstants.NODESET);
for (int c = 0; c < files.getLength(); c++) {
Format format = parseFormat((Element)files.item(c));
if (format.getMediaType().startsWith("image")) {
book.setCover(format.getFile());
}
book.getFormats().add(format);
}
/*
for (int c = 0; c < subjects.getLength(); c++) {
Element subjectEl = (Element)subjects.item(c);
book.getSubjects().add(new Subject(subjectEl.getTextContent()));
}
*/
return book;
}
private Format parseFormat(Element file) throws XPathExpressionException {
Format format = new Format();
String about = xpath.evaluate("@rdf:about", file);
try {
format.setFile(new URI(about));
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
format.setExtent(new Integer(xpath.evaluate("dcterms:extent", file)));
format.setMediaType(xpath.evaluate("dcterms:format/rdf:Description/rdf:value", file));
format.setModified(xpath.evaluate("dcterms:modified", file));
return format;
}
}