package org.talend.esb.examples.ebook.parser;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.junit.Assert;
import org.junit.Test;
import org.talend.esb.examples.ebook.model.Book;
import org.talend.esb.examples.ebook.model.Format;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class ParseTest {
@Test
public void testParse() throws Exception {
Document doc = readDocument("pg50180.rdf");
BookParser parser = new BookParser();
Book book = parser.parse(doc);
Assert.assertEquals("Maugham, W. Somerset (William Somerset) - Penelope\nA Comedy in Three Acts", book.toString());
Assert.assertEquals("ebooks/50180", book.getId());
Assert.assertEquals("Penelope\nA Comedy in Three Acts", book.getTitle());
Assert.assertEquals("Maugham, W. Somerset (William Somerset)", book.getCreator());
//Assert.assertEquals(4, book.getSubjects().size());
//Assert.assertEquals("Justice -- Early works to 1800", book.getSubjects().get(0).getSubject());
Assert.assertEquals("http://www.gutenberg.org/cache/epub/50180/pg50180.cover.medium.jpg", book.getCover().toString());
Assert.assertEquals(11, book.getFormats().size());
Format format = book.getFormats().get(0);
Assert.assertEquals("http://www.gutenberg.org/ebooks/50180.kindle.images", format.getFile().toString());
Assert.assertEquals("application/x-mobipocket-ebook", format.getMediaType().toString());
}
private Document readDocument(String path) throws ParserConfigurationException, SAXException, IOException {
InputStream is = this.getClass().getClassLoader().getResourceAsStream(path);
if (is == null) {
throw new FileNotFoundException(path);
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
return dbf.newDocumentBuilder().parse(is);
}
}