package cn.ictgu.redis.task; import cn.ictgu.dto.VideoDTO; import cn.ictgu.redis.RedisSourceManager; import cn.ictgu.tools.JsoupUtils; import lombok.extern.log4j.Log4j; import lombok.extern.log4j.Log4j2; import org.apache.commons.lang.StringUtils; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 乐视信息爬虫 * Created by Silence on 2017/2/12. */ @Component @Log4j2 public class LetvCrawler { private static final String HOME_PAGE_PC = "http://www.le.com/"; private static final String HOME_PAGE_PHONE_TV = "http://m.le.com/tv/"; private static final String HOME_PAGE_PHONE_MOVIE = "http://m.le.com/movie/"; private static final String HOME_PAGE_PHONE_CARTOON = "http://m.le.com/comic/"; private static final String HOME_PAGE_PHONE_RECOMMEND = "http://m.le.com/zongyi/"; private static final String HOME_PAGE_PHONE_TV_HOT = "http://m.le.com/top/tv"; private static final String TAG = "LETV"; @Autowired private RedisSourceManager redisSourceManager; @Scheduled(fixedRate = 60 * 60 * 1000) public void start(){ Document pcDocument = JsoupUtils.getDocWithPC(HOME_PAGE_PC); Document phoneTVDocument = JsoupUtils.getDocWithPhone(HOME_PAGE_PHONE_TV); Document phoneMovieDocument = JsoupUtils.getDocWithPhone(HOME_PAGE_PHONE_MOVIE); Document phoneCartoonDocument = JsoupUtils.getDocWithPhone(HOME_PAGE_PHONE_CARTOON); Document phoneZongyiDocument = JsoupUtils.getDocWithPhone(HOME_PAGE_PHONE_RECOMMEND); Document phoneTvHotDocument = JsoupUtils.getDocWithPhone(HOME_PAGE_PHONE_TV_HOT); saveCarouselsToRedis(pcDocument); saveRecommendsToRedis(phoneZongyiDocument); saveTVsToRedis(phoneTVDocument); saveMoviesToRedis(phoneMovieDocument); saveCartoonsToRedis(phoneCartoonDocument); saveTVHotsToRedis(phoneTvHotDocument); } private void saveCarouselsToRedis(Document document){ List<VideoDTO> carouselVideos = new ArrayList<>(); Elements carousels = document.select("div.chart-info ul.slides li"); for (Element carousel : carousels){ VideoDTO videoDTO = new VideoDTO(); String title = carousel.select("a").attr("title"); String image = carousel.select("img").attr("data-src"); String url = carousel.select("a").attr("href"); if (url.contains("le.com")){ videoDTO.setAvailable(true); videoDTO.setTitle(title); if(StringUtils.isEmpty(image)){ image = carousel.select("img").attr("img-src"); } videoDTO.setImage(image); if(!url.contains("ptv/vplay")){ Document realDocument = JsoupUtils.getDocWithPC(url); Matcher matcher = Pattern.compile("vid:\"(.*?)\"").matcher(realDocument.html()); if (matcher.find()) url = String.format("http://www.le.com/ptv/vplay/%s.html", matcher.group(1)); } videoDTO.setValue(url); log.info("title:"+title+",image:"+image+",url:"+url); carouselVideos.add(videoDTO); } } String key = redisSourceManager.VIDEO_PREFIX_HOME_CAROUSEL_KEY + "_" + TAG; redisSourceManager.saveVideos(key, carouselVideos); } private void saveRecommendsToRedis(Document document){ String key = redisSourceManager.VIDEO_PREFIX_HOME_RECOMMEND_KEY + "_" + TAG; redisSourceManager.saveVideos(key, getVideosFromPhoneDocument(document)); } private void saveTVsToRedis(Document document){ String key = redisSourceManager.VIDEO_PREFIX_HOME_TV_KEY + "_" + TAG; redisSourceManager.saveVideos(key, getVideosFromPhoneDocument(document)); } private void saveTVHotsToRedis(Document document){ String key = redisSourceManager.VIDEO_PREFIX_HOME_TV_HOT_KEY + "_" + TAG; redisSourceManager.saveVideos(key, getHostsFromPhoneDocument(document, 8)); } private void saveMoviesToRedis(Document document){ String key = redisSourceManager.VIDEO_PREFIX_HOME_MOVIE_KEY + "_" + TAG; redisSourceManager.saveVideos(key, getVideosFromPhoneDocument(document)); } private void saveCartoonsToRedis(Document document){ String key = redisSourceManager.VIDEO_PREFIX_HOME_CARTOON_KEY + "_" + TAG; redisSourceManager.saveVideos(key, getVideosFromPhoneDocument(document)); } private List<VideoDTO> getVideosFromPhoneDocument(Document document){ List<VideoDTO> videos = new ArrayList<>(); Elements videoElements = document.select("div.column_body div a"); for (Element element : videoElements) { VideoDTO videoDTO = new VideoDTO(); String title = element.attr("title"); String image = element.select("span.a_img i").attr("style").replace("background-image:url('","").replace("')",""); if (StringUtils.isEmpty(image)){ image = element.select("span.a_img i").attr("data-src"); } String url = String.format("http://www.le.com/ptv/vplay/%s.html", element.attr("data-vid")); videoDTO.setAvailable(true); videoDTO.setTitle(title); videoDTO.setImage(image); videoDTO.setValue(url); log.info("title:"+title+",image:"+image+",url:"+url); videos.add(videoDTO); } return videos; } private List<VideoDTO> getHostsFromPhoneDocument(Document document, int size){ List<VideoDTO> videos = new ArrayList<>(); Elements videoElements = document.select("div.column.tab_cnt a"); for (int i = 0; i < size; i++) { Element element = videoElements.get(i); VideoDTO videoDTO = new VideoDTO(); String title = element.select("i.i1").text(); String image = element.select("span.a_img i").attr("data-src"); String url = String.format("http://www.le.com/ptv/vplay/%s.html", element.attr("href").replace("/vplay_", "")); videoDTO.setAvailable(true); videoDTO.setTitle(title); videoDTO.setImage(image); videoDTO.setValue(url); log.info("title:"+title+",image:"+image+",url:"+url); videos.add(videoDTO); } return videos; } }