package com.fpcms.service.article_crawl; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.ListIterator; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang.math.RandomUtils; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import com.github.rapid.common.util.DateRange; import com.github.rapid.common.util.page.Page; import com.github.rapid.common.util.page.PageQuery; import com.github.rapid.common.util.page.Paginator; import com.fpcms.model.CmsContent; import com.fpcms.service.CmsContentService; import com.fpcms.service.CmsKeyValueService; import com.fpcms.service.article_crawl.ArticleCrawlService.GoogleTranslateTransformer; import com.fpcms.service.article_crawl.ArticleCrawlService.NaipanTransformer; public class ArticleCrawlServiceTest extends Mockito{ private ArticleCrawlService articleCrawlService = new ArticleCrawlService(); ; private CmsContentService cmsContentService = mock(CmsContentService.class); private CmsKeyValueService cmsKeyValueService = mock(CmsKeyValueService.class); private ApplicationContext applicationContext; @Before public void setUp() throws Exception { applicationContext = new ClassPathXmlApplicationContext("classpath:/spring/applicationContext-crawler.xml"); articleCrawlService.setApplicationContext(applicationContext); articleCrawlService.setCmsContentService(cmsContentService); articleCrawlService.setCmsContentService(cmsContentService); articleCrawlService.setCmsKeyValueService(cmsKeyValueService); articleCrawlService.afterPropertiesSet(); } @Test public void test_hasFilterKeyword() { assertTrue(ArticleCrawlService.hasFilterKeyword("开贵州发票")); assertTrue(ArticleCrawlService.hasFilterKeyword("代开贵州发票")); } @Test public void test_return_countBySourceUrl_1() { when(cmsContentService.countBySourceUrl((Date)any(), (Date)any(), (String)any())).thenReturn(1); articleCrawlService.crawlAllSite(); verify(cmsContentService,atLeastOnce()).countBySourceUrl((Date)any(), (Date)any(), (String)any()); verify(cmsContentService,never()).create((CmsContent)any()); } @Test public void test_return_countBySourceUrl_0() { when(cmsContentService.countBySourceUrl((Date)any(), (Date)any(), (String)any())).thenReturn(0); articleCrawlService.crawlAllSite(); verify(cmsContentService,atLeastOnce()).countBySourceUrl((Date)any(), (Date)any(), (String)any()); verify(cmsContentService,atLeastOnce()).create((CmsContent)any()); } @Test public void getShoudVisitAnchorList() { List<String> list = articleCrawlService.getInvalidUrlList(); removeIgnoreSite(list); assertTrue(list.toString(),list.isEmpty()); } @Test public void mergeSmallArticle() { List<CmsContent> list = Arrays.asList(newCmsContent(300),newCmsContent(1400),newCmsContent(800),newCmsContent(1000),newCmsContent(300)); Page<CmsContent> page = new Page<CmsContent>(list,new Paginator(1, 100, 1000)); when(cmsContentService.findPage((PageQuery)any(), (String)any(), (String)any(), (DateRange)any())).thenReturn(page); articleCrawlService.mergeSmallArticle(); } @Test public void crawlFapiaoKeyword() { List<CmsContent> list = articleCrawlService.crawlKeyword("发票"); assertAndPrint(list); } @Test public void crawl_by_java_replace_invoice() { String[] keywords = {"java","phone","iphone","cameras","printer","notebook","refrigerator","mobile","car","game","novel","cartoon","movie","music","animation","suv","food","pet","travel","stock","money","fund"}; for(String keyword : keywords) { List<CmsContent> list = articleCrawlService.crawlByKeyword("en_fapiao,"+keyword,"en",keyword, "invoice", "en"); assertAndPrint(list); } } @Test public void replaceWithCaseInsentisive() { assertEquals("Jitterz invoice咖啡厅入室盗窃的风行照相机",articleCrawlService.replaceWithCaseInsentisive("Jitterz Java咖啡厅入室盗窃的风行照相机", "java", "invoice")); } private void assertAndPrint(List<CmsContent> list) { assertFalse(list.isEmpty()); for(CmsContent c : list) { System.out.println("----------------"+c.getTitle()+"---------------------"); System.out.println(c.getContent()); } } private CmsContent newCmsContent(int i) { String title = ""+RandomUtils.nextInt(); String content = RandomStringUtils.randomAlphanumeric(i); CmsContent c = new CmsContent(); c.setTitle(title); c.setContent(content); c.setId((long)i); return c; } private void removeIgnoreSite(List<String> list) { for(ListIterator<String> it = list.listIterator(); it.hasNext();) { String url = it.next(); if(url.contains("rfi.fr")) { it.remove(); } } } @Test public void test_GoogleTranslateTransformer() { GoogleTranslateTransformer t = new GoogleTranslateTransformer(); String str = t.transform("en", "Samsung Galaxy S4 Twice As Fast As iPhone 5, Even Galaxy S3 Proves Faster Than Apple’s Latest [Report]"); assertEquals("三星Galaxy S4快两倍, iPhone 5 ,即使是银河S3阅兵超苹果[举报]",str); for(int i = 0; i < 50; i++) { assertEquals("苹果的新“为什么发票”运动分析", new NaipanTransformer().transform("auto","Bridger Management's new invoice picks")); } } }