package org.koroed.lepra.content.parser;
import java.util.ArrayList;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.koroed.lepra.content.LepraPost;
/**
* Author: Nikita Koroed
* E-mail: nikita@koroed.org
* Date: 26.05.2014
* Time: 19:30
*/
/**
* Author: hexonxons.
* E-mail: killgamesh666@gmail.com
* Data: 27.08.2014
*/
public class LepraPostListParser extends LepraContentParser<ArrayList<LepraPost>>
{
public static final String TAG = "LepraPostListParser";
private static String POST_SEPARATOR = "\\n\\n\\t\\t\\n\\t\\t\\t\\t";
private static LepraPostListParser sInstance = null;
private static Object sLock = new Object();
public static LepraPostListParser getInstance()
{
synchronized (sLock)
{
if (sInstance == null)
{
sInstance = new LepraPostListParser();
}
return sInstance;
}
}
private LepraPostListParser(){}
@Override
protected synchronized ArrayList<LepraPost> parseContent(String content)
{
ArrayList<LepraPost> posts = new ArrayList<>();
if(!content.contains("{"))
{
System.out.println(content);
return posts;
}
content = content.substring(content.indexOf("{"));
Integer newOffset = null;
String template = null;
try
{
JSONObject obj = new JSONObject(content);
template = obj.getString("template");
newOffset = obj.getInt("offset");
}
catch (JSONException e)
{
e.printStackTrace();
}
if(StringUtils.isBlank(template))
{
return null;
}
// if(template.indexOf(POST_SEPARATOR) < 0)
// {
// return Collections.emptyList();
// }
String[] rawPostArray = template.split(POST_SEPARATOR, -1);
for(String rawPost : rawPostArray)
{
if(StringUtils.isNotBlank(rawPost))
{
posts.add(postParser(rawPost));
}
}
return posts;
}
private LepraPost postParser(String rawPost)
{
Document doc = Jsoup.parse(rawPost);
String postId = doc.select(".post").first().attr("id");
String postLink = null;
boolean isGold = !doc.select(".golden").isEmpty();
String userLogin = doc.select(".c_user").first().text();
String userTitle = doc.select(".ddi").first().html();
if(StringUtils.isNotBlank(userTitle))
{
userTitle = userTitle.trim().substring(8, userTitle.indexOf("<a")).replaceAll("\\s", " ").trim();
}
String userGender = doc.select(".ddi").first().textNodes().get(0).text().trim().compareTo("Написал") == 0 ? "male" : "female";
long postDate = Long.valueOf(doc.select(".js-date").first().attr("data-epoch_date"));
String totalCommentsCnt = null;
String newCommentsCnt = null;
Element commentsCounts = doc.select(".b-post_comments_links").first();
if(commentsCounts != null)
{
Elements cnts = commentsCounts.getElementsByTag("a");
if(!cnts.isEmpty())
{
totalCommentsCnt = cnts.first().text();
postLink = cnts.first().attr("href");
if(cnts.size() > 1)
{
newCommentsCnt = cnts.get(1).text();
}
}
}
int postRating = Integer.parseInt(doc.select(".vote_result").first().text());
String postContent = doc.select(".dti").first().html();
int postIdInt = Integer.valueOf(postId.substring(1, postId.length()));
LepraPost post = new LepraPost();
post.id = postIdInt;
post.link = postLink;
post.isGold = isGold;
post.userLogin = userLogin;
post.userTitle = userTitle;
post.userGender = userGender;
post.date = postDate * 1000;
post.totalCommentCnt = totalCommentsCnt;
post.newCommentCnt = newCommentsCnt;
post.rating = postRating;
post.content = postContent;
return post;
}
}