package io.codetail.client.mover;
import android.os.Bundle;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import io.codetail.client.IParser;
import io.codetail.client.models.Channel;
import io.codetail.client.models.Comment;
import io.codetail.client.models.Video;
public abstract class MoverParser<T extends Mover> implements IParser<T> {
final Locale sLocaleRU = new Locale("ru", "RU");
final static String PROFILE_FORMAT = "d MMMM yyyy";
final static String COMMENT_FORMAT = "d MMMM yyyy, HH:mm";
/**
* Parses HTMLElement to {@link java.util.ArrayList} of {@link io.codetail.client.models.Video}
*
* @param element HTMLElement object, reduced circle of elements
* where to find video elements
*
* @return list of videos found in object
*/
public ArrayList<Video> findVideos(Element element){
return findVideos(element.select("div.video"));
}
/**
* Parses HTMLElement to {@link java.util.ArrayList} of {@link io.codetail.client.models.Video}
*
* @param elements HTMLElements object, reduced circle of elements
* where to find video elements
*
* @return list of videos found in objects
*/
public ArrayList<Video> findVideos(Elements elements){
ArrayList<Video> videos = new ArrayList<>();
for(Element element : elements){
Element link = element.select("a.image").first();
Element info = element.select("div.info").first();
Video video = new Mover.MoverVideo();
video.setId( getVideoId(link.attr("href")) );
video.setTitle(link.attr("title"));
video.setViewsCount( getViewCount(element, info) );
video.setDuration(link.select("span.length").first().text());
String user = info.select("p.owner a").first().text();
Channel channel = new Channel();
channel.setUsername(user);
video.setOwner(channel);
videos.add(video);
}
return videos;
}
public ArrayList<Video> findVideosInChannel(Elements elements){
ArrayList<Video> videos = new ArrayList<>();
for(Element element : elements){
Element link = element.select("a.image").first();
Element info = element.select("div.info").first();
//FIXME
Video video = new Mover.MoverVideo(); //new Video();
video.setId( getVideoId(link.attr("href")) );
video.setTitle(link.attr("title"));
video.setViewsCount( getViewCount(element, info) );
video.setDuration(link.select("span.length").first().text());
videos.add(video);
}
return videos;
}
public int getLastNavigationPage(Document document){
Elements elements = document.select("div.pagination .digits .ut a");
if(elements.size() > 0){
return internalGetIntegers(elements.last().text());
}
return -1;
}
/**
* Parses HTMLElement to {@link java.util.ArrayList} of {@link io.codetail.client.models.Video}
*
* @param element HTMLElement object, reduced circle of elements
* where to find comment elements
*
* @return list of comment found in object
*/
public ArrayList<Comment> findComments(Element element){
return findComments(element.select("ul#listComment li"));
}
/**
* Parses HTMLElement to {@link java.util.ArrayList} of {@link io.codetail.client.models.Video}
*
* @param elements HTMLElements object, reduced circle of elements
* where to find video elements
*
* @return list of videos found in object
*/
public ArrayList<Comment> findComments(Elements elements){
ArrayList<Comment> commentList = new ArrayList<>();
for(Element element : elements){
Comment comment = new Comment();
Channel channel = new Channel();
channel.setPicture(element.select("a.userpic img").first().attr("src"));
channel.setUsername(element.select("a.author").text());
comment.setUser(channel);
comment.setTime(parseRussianFormat(COMMENT_FORMAT, element.select("span.date").text()));
comment.setComment(element.select("p").text());
commentList.add(comment);
}
return commentList;
}
public Channel getChannelExpandedInfo(Element element){
Channel channel = new Channel();
return getChannelExpandedInfo(element, channel);
}
public Channel getChannelExpandedInfo(Element element, Channel channel){
Elements channelBox = element.select("div#channel-box");
// Parsed user picture source link
String userPicture = channelBox.select("a.userpic img").first().attr("src");
channel.setPicture(userPicture);
// Channel Display Name
String displayName = channelBox.select("div.info div.user").first().text();
channel.setDisplayName(displayName);
String videosCount = channelBox.select("div.info div.videos").first().text();
channel.setVideosCount(internalGetIntegers(videosCount));
List<TextNode> dataNodes = channelBox.select("div.data")
.first().textNodes();
// Magic 1 is to get only registrationDate text information
// Here is HARD CORE NEVER REPEAT THIS CODE CUT
String registrationDate = dataNodes.get(1).text()
.replace("Регистрация:", "").trim();
channel.setRegistrationDate(parseRussianFormat(PROFILE_FORMAT, registrationDate));
String profileViewsCount = dataNodes.get(2).text();
channel.setProfileViewsCount(internalGetIntegers(profileViewsCount));
return channel;
}
/**
* @param pattern of russian date
* @param object search
* @return parse russian date and transform it to java
* long format
*/
long parseRussianFormat(String pattern, String object){
SimpleDateFormat dateFormat = new SimpleDateFormat(pattern, sLocaleRU);
try {
Date date = dateFormat.parse(object);
return date.getTime();
} catch (ParseException e) {
e.printStackTrace();
}
return -1;
}
/**
* @param object to retrieve all integers
* @return all ints founded and transformed to integer type
*/
protected int internalGetIntegers(String object){
return Integer.parseInt(object.replaceAll("[^0-9]", ""));
}
/**
* @param text url to find video id
* @return founded id
*/
public static String getVideoId(String text){
Pattern pattern = Pattern.compile("([\\w\\d]+){6,}");
Matcher matcher = pattern.matcher(text);
if(matcher.find()){
return matcher.group();
}
return null;
}
/**
* @param element base element
* @param info div.info
*
* @return founded in info element views count and
* transform it to integer type
*/
protected int getViewCount(Element element, Element info){
String text;
if(element.classNames().contains("main")){
text = info.select("p.owner").first().childNodes().get(1).toString();
}else{
text = info.select("p.views").first().text();
}
return internalGetIntegers(text);
}
@Override
public boolean equals(Object o) {
return o instanceof String && canParse((String) o);
}
public static class DetailParserForMoverParser extends MoverParser<Mover> {
@Override
public Mover parse(Mover page, String source) {
Document document = Jsoup.parse(source);
Bundle data = new Bundle();
// FIXME
Video detail = new Mover.MoverVideo();//new Video();
// getting video description without unused data
Elements elements = document.select("div.desc p:not([class])");
detail.setDescription(elements.outerHtml());
detail.setViewsCount(internalGetIntegers(document.select(".fr.views strong").first().text()));
// if user is authenticated collect data provided by
// authentication feature
if(isUserAuthenticated()) {
Elements elements1 = document.select("table.r-desc");
detail.setLikes(internalGetIntegers(elements1.select("td.like").text()));
detail.setDislikes(internalGetIntegers(elements1.select("td.dislike").text()));
}
// data.putParcelableArrayList(COMMENTS, findComments(document));
// data.putParcelable(DETAIL, detail);
return page;
}
boolean isUserAuthenticated(){
return false;
}
@Override
public boolean canParse(String url) {
try {
URI uri = new URI(url);
return uri.getPath().matches("/watch/([\\w\\d]+)");
} catch (URISyntaxException e) {
e.printStackTrace();
}
return false;
}
}
public static class ProfileParserForMoverParser extends MoverParser<Mover> {
@Override
public Mover parse(Mover page, String source) {
Document document = Jsoup.parse(source);
Bundle data = new Bundle();
Channel channelInfo = new Channel();
Elements elements = document.select("#channel-box");
channelInfo.setUsername(elements.select("h4").first().text());
channelInfo.setDisplayName(elements.select("div.user").first().text());
channelInfo.setVideosCount(internalGetIntegers(elements.select("div.videos")
.first().text()));
// channelInfo.setPicture(MoverService.HEAD_URL + elements.select("a.userpic img").attr("src"));
//TODO: Need to solve problem with ViewCount and Registration Date
//TODO: Check for empty
Elements recs = document.select("div.video-recommended div.video");
return page;
// if(Objects.isNotNull(recs) && recs.size() > 0) {
// data.putParcelableArrayList(RECOMMENDATIONS,
// findVideosInChannel(recs));
// }
//
// Elements latest = document.select(".video-list.vertical");
// if(Objects.isNotNull(latest) && latest.size() > 0) {
// data.putParcelableArrayList(LATEST, findVideos(
// document.select(".video-list.vertical").last().select("div.video")));
// }
//
// data.putInt(PAGINATION, getLastNavigationPage(document));
// data.putParcelable(INFO, channelInfo);
// return data;
}
@Override
public boolean canParse(String url) {
try {
URI uri = new URI(url);
return uri.getPath().matches("/channel/([\\w\\d]+)") && uri.getQuery() == null;
} catch (URISyntaxException e) {
e.printStackTrace();
}
return false;
}
}
public static class PagesParser extends MoverParser<Mover.PaginatedPage> {
@Override
public Mover.PaginatedPage parse(Mover.PaginatedPage page, String source) {
Document document = Jsoup.parse(source);
Elements elements = document.select(".video-list.vertical div.video");
page.setVideos(findVideos(elements));
page.setPagesCount(getLastNavigationPage(document));
return page;
}
@Override
public boolean canParse(String url) {
try {
URI uri = new URI(url);
return uri.getPath().contains("/") && uri.getQuery() != null;
} catch (URISyntaxException e) {
e.printStackTrace();
}
return false;
}
}
public static class CategoryParser extends MoverParser<Mover.CategoryPage> {
boolean homePage;
public CategoryParser(boolean homePage) {
this.homePage = homePage;
}
@Override
public Mover.CategoryPage parse(Mover.CategoryPage response, String source) {
Document document = Jsoup.parse(source);
Elements recommended = document.select(
homePage ? "div#home-recommended div.video" : "div.video-recommended div.video");
Elements popular = document.select("#place_top_video div.video");
response.setRecommends(findVideos(recommended));
response.setPopular(findVideos(popular));
Elements latest = document.select(".video-list.vertical").last()
.select("div.video");
response.setVideos(findVideos(latest));
response.setPagesCount(getLastNavigationPage(document));
return response;
}
@Override
public boolean canParse(String url) {
try {
URI uri = new URI(url);
return (uri.getPath().equals("/") || uri.getPath().matches("/video/([\\w\\d]+)"))
&& (uri.getQuery() == null);
} catch (URISyntaxException e) {
e.printStackTrace();
}
return false;
}
}
}