package com.fpcms.scheduled.job; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import org.springframework.util.Assert; import com.fpcms.common.blog_post.AccountBlogPosterDecorator; import com.fpcms.common.blog_post.Blog; import com.fpcms.common.blog_post.BlogPoster; import com.fpcms.common.blog_post.impl.ChinaUnixBlogPoster; import com.fpcms.common.blog_post.impl.CnblogBlogPoster; import com.fpcms.common.blog_post.impl.MetaWeblogBlogPoster; import com.fpcms.common.blog_post.impl.OschinaBlogPoster; import com.fpcms.common.cache.Cache; import com.fpcms.common.cache.CacheManager; import com.fpcms.common.random_gen_article.NaipanArticleGeneratorUtil; import com.fpcms.common.util.HtmlFormatUtil; import com.fpcms.common.util.RandomUtil; import com.fpcms.common.util.ThreadUtil; import com.fpcms.common.webcrawler.htmlparser.HtmlPage; import com.fpcms.common.webcrawler.htmlparser.HtmlPageCrawler; import com.fpcms.common.webcrawler.htmlparser.SinglePageCrawler; import com.fpcms.common.webcrawler.htmlparser.HtmlPage.Anchor; import com.fpcms.model.CmsDomain; import com.fpcms.model.CmsKeyValue; import com.fpcms.model.CmsSite; import com.fpcms.service.CmsDomainService; import com.fpcms.service.CmsKeyValueService; import com.fpcms.service.CmsSiteService; /** * 自动针对外网发布外链BLOG * * @author badqiu * */ @Service public class AutoPublishOuterBlogJob extends BaseCronJob{ private static Logger logger = LoggerFactory.getLogger(AutoPublishOuterBlogJob.class); private List<BlogPoster> posterList = new ArrayList<BlogPoster>(); private CmsDomainService cmsDomainService; private CmsSiteService cmsSiteService; private CmsKeyValueService cmsKeyValueService; public void setCmsDomainService(CmsDomainService cmsDomainService) { this.cmsDomainService = cmsDomainService; } public void setCmsSiteService(CmsSiteService cmsSiteService) { this.cmsSiteService = cmsSiteService; } public void setCmsKeyValueService(CmsKeyValueService cmsKeyValueService) { this.cmsKeyValueService = cmsKeyValueService; } public AutoPublishOuterBlogJob() { super("1 30 2,5 * * *"); posterList.add(new AccountBlogPosterDecorator(new CnblogBlogPoster(),"fpqqchao","abc123")); posterList.add(new AccountBlogPosterDecorator(new ChinaUnixBlogPoster(),"fpqqchao","abc123")); posterList.add(new AccountBlogPosterDecorator(new ChinaUnixBlogPoster(),"blogtg123","abc123")); posterList.add(new AccountBlogPosterDecorator(new ChinaUnixBlogPoster(),"bbstg123","abc123")); // posterList.add(new AccountBlogPosterDecorator(new OschinaBlogPoster(),"fpqqchao@gmail.com","6367c48dd193d56ea7b0baad25b19455e529f5ee")); posterList.add(new MetaWeblogBlogPoster("http://sh292did.blog.163.com/","fpqqchao@gmail.com","asdf@1234")); posterList.add(new MetaWeblogBlogPoster("http://blog.sina.com.cn/u/3099457992","fpqqchao@gmail.com","asdf@1234")); posterList.add(new MetaWeblogBlogPoster("http://blog.sina.com.cn/u/3225400392","blogtg123@gmail.com","asdf@1234")); posterList.add(new MetaWeblogBlogPoster("http://blog.sina.com.cn/u/3225401060","bbstg123@gmail.com","asdf@1234")); posterList.add(new MetaWeblogBlogPoster("http://blogtg123.blog.com/","blogtg123@gmail.com","abc123")); MetaWeblogBlogPoster cto51 = new MetaWeblogBlogPoster("http://51ctoblog.blog.51cto.com","fpqqchao@gmail.com","abc123"); cto51.setCategories("【创作类型:原创】","开发技术"); posterList.add(cto51); } @Override public synchronized void executeInternal() { final List<HtmlPage> pageList = cralwerForPageList(); postAllBlog(pageList); } void postAllBlog(final List<HtmlPage> pageList) { RandomLinkPrecessor randomLinkPrecessor = new RandomLinkPrecessor(); for(BlogPoster poster : posterList) { try { HtmlPage page = getRandomValidPage(pageList); if(page == null) { break; } String transformTitle = NaipanArticleGeneratorUtil.transformArticle(page.getTitle()); String content = randomLinkPrecessor.execute(NaipanArticleGeneratorUtil.transformArticle(page.getContent())); Assert.notNull(content,"content must be not null"); Assert.isTrue(content.length() > 300,"post blog content must great 300,title:"+transformTitle); Blog blog = new Blog(transformTitle,content); poster.postBlog(blog); }catch(RuntimeException e) { logger.error("postBlog error",e); } } } HtmlPage getRandomValidPage(final List<HtmlPage> pageList) { while(true) { HtmlPage page = RandomUtil.randomRemove(pageList); if(page == null) { return null; } if(StringUtils.length(StringUtils.remove(StringUtils.trim(page.getContent())," ")) < 300) { continue; } CmsKeyValue cmsKeyValue = newOuterBlogCmsKeyValue(page.getAnchor()); if(cmsKeyValueService.exist(cmsKeyValue)) { continue; } cmsKeyValueService.create(cmsKeyValue); return page; } } private static CmsKeyValue newOuterBlogCmsKeyValue(Anchor anchor) { CmsKeyValue cmsKeyValue = new CmsKeyValue("outerBlog", anchor.getHref()); return cmsKeyValue; } public class RandomLinkPrecessor { Set<String> useedLink = new HashSet(); private String execute(String input) { Assert.hasText(input); Assert.isTrue(input.length() > 200); String transformArticle = NaipanArticleGeneratorUtil.transformArticle(input); StringBuilder content = new StringBuilder(transformArticle); content.insert(Math.min(content.length(),200), selectRandomDomain()); content.append(selectRandomDomain()); return "<pre>"+content.toString()+"</pre>"; //TODO 该段文本如果没有<pre>格式,会存在问题, } private String selectRandomDomain() { CmsDomain domain = null; for(int i = 0; i < 10; i++) { domain = cmsDomainService.randomSelectDomain(); Assert.notNull(domain,"not found any random CmsDomain"); // String link = "http://www."+domain.getDomain(); String link = domain.getYesterdayOuterLinked(); if(useedLink.contains(link)) { continue; } useedLink.add(link); return " "+link+" "; } return domain.getYesterdayOuterLinked(); } private String selectRandomSite() { List<CmsSite> siteList = cmsSiteService.findAll(); CmsSite site = RandomUtil.randomSelect(siteList); Assert.notNull(site,"not found any random CmsDomain"); String link = site.getYesterdayOuterLinked(); return link; } } private List<HtmlPage> cralwerForPageList() { SinglePageCrawler cralwer = new SinglePageCrawler(); cralwer.setSourceLang("zh-CN"); cralwer.setAcceptUrlRegexList("http://www.oschina.net/code/snippet_.*","http://\\w+.blog.51cto.com/\\d+/\\d+","http://blog.csdn.net/.*/article/details/\\d+"); cralwer.setUrlList("http://www.oschina.net/code/list/1/java", "http://blog.csdn.net/code/index.html","http://blog.csdn.net/www/index.html", "http://blog.csdn.net/web/index.html","http://blog.csdn.net/database/index.html", "http://blog.51cto.com/original/","http://blog.51cto.com/original.php?cid=0&page=2"); final List<HtmlPage> pageList = new ArrayList<HtmlPage>(); cralwer.setHtmlPageCrawler(new HtmlPageCrawler() { @Override public void visit(HtmlPage page) { ThreadUtil.sleep(1000); pageList.add(page); } @Override public boolean shoudVisitPage(Anchor a) { CmsKeyValue keyValue = newOuterBlogCmsKeyValue(a); if(cmsKeyValueService.exist(keyValue)) { return false; } return true; } }); cralwer.execute(); return pageList; } @Override public String getJobRemark() { return "发送BLOG至其它网站"; } @Override public void afterPropertiesSet() throws Exception { super.afterPropertiesSet(); Assert.notNull(cmsKeyValueService,"cmsKeyValueService must be not null"); Assert.notNull(cmsSiteService,"cmsSiteService must be not null"); Assert.notNull(cmsDomainService,"cmsDomainService must be not null"); } }