/**
* Copyright 2008 - 2009 Pro-Netics S.P.A.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.pronetics.madstore.crawler;
import static org.custommonkey.xmlunit.XMLAssert.assertXpathEvaluatesTo;
import it.pronetics.madstore.common.AtomConstants;
import it.pronetics.madstore.common.dom.DomHelper;
import it.pronetics.madstore.repository.CollectionRepository;
import it.pronetics.madstore.repository.EntryRepository;
import it.pronetics.madstore.repository.util.PagingList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.custommonkey.xmlunit.SimpleNamespaceContext;
import org.custommonkey.xmlunit.XMLUnit;
import org.springframework.test.AbstractDependencyInjectionSpringContextTests;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
public class CrawlerTest extends AbstractDependencyInjectionSpringContextTests {
private static final String FEED_KEY_MALFORMED = "feedkeymalformed";
private static final String FEED_KEY_TEST = "feedkeytest";
private static final String FEED_KEY_TEST2 = "feedkeytest2";
private EntryRepository entryRepository;
private CollectionRepository collectionRepository;
private static final int SLEEP_TIME = 15000;
private MadStoreCrawler madStoreCrawler;
static {
Map<String, String> ns = new HashMap<String, String>();
ns.put("atom", AtomConstants.ATOM_NS);
XMLUnit.setXpathNamespaceContext(new SimpleNamespaceContext(ns));
}
public CrawlerTest() {
setAutowireMode(AUTOWIRE_BY_TYPE);
}
@Override
protected void onTearDown() throws Exception {
List<Element> collections = collectionRepository.readCollections();
for (Element collection : collections) {
String collectionKey = collection.getAttribute(AtomConstants.ATOM_KEY);
List<Element> elements = entryRepository.readEntries(collectionKey);
for (Element element : elements) {
String entryKey = element.getAttribute(AtomConstants.ATOM_KEY);
entryRepository.delete(collectionKey, entryKey);
}
collectionRepository.delete(collectionKey);
}
}
public void testAllCrawledPage() throws Exception {
madStoreCrawler.start();
Thread.sleep(SLEEP_TIME);
Element entry1 = entryRepository.read(FEED_KEY_TEST, "entrykeytest1");
assertXpathEvaluatesTo("Test title 1", "/atom:entry/atom:title", getInputSourceFromDom(entry1));
Element entry2 = entryRepository.read(FEED_KEY_TEST, "entrykeytest2");
assertXpathEvaluatesTo("Test title 2", "/atom:entry/atom:title", getInputSourceFromDom(entry2));
Element entry3 = entryRepository.read(FEED_KEY_TEST, "entrykeytest3");
assertXpathEvaluatesTo("Test title 3", "/atom:entry/atom:title", getInputSourceFromDom(entry3));
Element entry4 = entryRepository.read(FEED_KEY_TEST2, "entrykeytest4");
assertXpathEvaluatesTo("Test title 4", "/atom:entry/atom:title", getInputSourceFromDom(entry4));
Element entry5 = entryRepository.read(FEED_KEY_TEST2, "entrykeytest5");
assertXpathEvaluatesTo("Test title 5", "/atom:entry/atom:title", getInputSourceFromDom(entry5));
Element entry6 = entryRepository.read(FEED_KEY_TEST2, "entrykeytest6");
assertXpathEvaluatesTo("Test title 6", "/atom:entry/atom:title", getInputSourceFromDom(entry6));
Element entryMalFormed = entryRepository.read(FEED_KEY_MALFORMED, "entrykeymalformed1");
assertXpathEvaluatesTo("test entry entryKeyMalFormed", "/atom:entry/atom:title", getInputSourceFromDom(entryMalFormed));
}
public void testFindInCrawledPage() throws Exception {
madStoreCrawler.start();
Thread.sleep(SLEEP_TIME);
PagingList<Element> elements = entryRepository.findEntries(FEED_KEY_TEST, Arrays.asList("uniquecode"), 0, 1);
assertEquals(1, elements.size());
assertEquals(1, elements.getTotal());
assertEquals(1, elements.getMax());
assertEquals(0, elements.getOffset());
assertXpathEvaluatesTo("uniquecode", "/atom:entry/atom:summary", getInputSourceFromDom(elements.iterator().next()));
}
public void testLimitedCrawledPages() throws Exception {
madStoreCrawler.getCrawlerConfigurations().get(0).setMaxVisitedLinks(3);
madStoreCrawler.start();
Thread.sleep(SLEEP_TIME);
List<Element> collections = collectionRepository.readCollections();
assertNotNull(collections);
assertEquals(3, collections.size());
}
private InputSource getInputSourceFromDom(Element element) throws Exception {
return new InputSource(DomHelper.getStreamFromDomElement(element));
}
public void setEntryRepository(EntryRepository entryRepository) {
this.entryRepository = entryRepository;
}
public void setCollectionRepository(CollectionRepository collectionRepository) {
this.collectionRepository = collectionRepository;
}
public void setMadStoreCrawler(MadStoreCrawler madStoreCrawler) {
this.madStoreCrawler = madStoreCrawler;
}
@Override
protected String[] getConfigLocations() {
return new String[] { "classpath:repositoryApplicationContext.xml", "classpath:crawlerApplicationContext.xml" };
}
}