package me.devsaki.hentoid.parsers;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONTokener;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import me.devsaki.hentoid.database.domains.Attribute;
import me.devsaki.hentoid.database.domains.Content;
import me.devsaki.hentoid.enums.AttributeType;
import me.devsaki.hentoid.enums.StatusContent;
import me.devsaki.hentoid.util.AttributeMap;
import me.devsaki.hentoid.util.LogHelper;
import static me.devsaki.hentoid.enums.Site.HENTAICAFE;
/**
* Created by avluis on 07/26/2016.
* Handles parsing of content from Hentai Cafe
*/
public class HentaiCafeParser {
private static final String TAG = LogHelper.makeLogTag(HentaiCafeParser.class);
private static final int TIMEOUT = 5000; // 5 seconds
public static Content parseContent(String urlString) throws IOException {
Document doc = Jsoup.connect(urlString).timeout(TIMEOUT).get();
Elements content = doc.select("div.entry-content.content");
if (urlString.contains(HENTAICAFE.getUrl() + "/78-2/") || // ignore tags page
urlString.contains(HENTAICAFE.getUrl() + "/artists/")) { // ignore artist page
return null;
}
if (content.size() > 0) {
String url = doc.select("div.x-main.full")
.select("article")
.attr("id")
.replace("post-", "/?p=");
String coverUrl = doc.select("div.x-column.x-sm.x-1-2")
.select("img")
.attr("src");
String title = doc.select("div.x-column.x-sm.x-1-2.last")
.select("h3")
.first()
.text();
AttributeMap attributes = new AttributeMap();
String info = content.select("div.x-column.x-sm.x-1-2.last")
.select("p").html();
String tags = info.substring(0, info.indexOf("<br>")).replace(HENTAICAFE.getUrl(), "");
String artists = info.substring(info.indexOf("Artists: "));
artists = artists.substring(0, artists.indexOf("<br>")).replace(HENTAICAFE.getUrl(), "");
Elements tagElements = Jsoup.parse(tags).select("a");
Elements artistElements = Jsoup.parse(artists).select("a");
parseAttributes(attributes, AttributeType.TAG, tagElements);
parseAttributes(attributes, AttributeType.ARTIST, artistElements);
return new Content()
.setTitle(title)
.setUrl(url)
.setCoverImageUrl(coverUrl)
.setAttributes(attributes)
.setQtyPages(-1)
.setStatus(StatusContent.SAVED)
.setSite(HENTAICAFE);
}
return null;
}
private static void parseAttributes(AttributeMap map, AttributeType type, Elements elements) {
for (Element a : elements) {
map.add(new Attribute()
.setType(type)
.setUrl(a.attr("href"))
.setName(a.text()));
}
}
public static List<String> parseImageList(Content content) {
String galleryUrl = content.getReaderUrl();
List<String> imgUrls = new ArrayList<>();
LogHelper.d(TAG, "Gallery URL: " + galleryUrl);
Document readerDoc = null;
Elements links = null;
try {
readerDoc = Jsoup.connect(galleryUrl).timeout(TIMEOUT).get();
} catch (IOException e) {
LogHelper.e(TAG, e, "Error parsing content page");
}
if (readerDoc != null) {
links = readerDoc.select("a.x-btn");
if (links.size() > 1) {
LogHelper.d(TAG, "Multiple chapters found!");
}
}
Document doc;
Elements contents;
Element js;
int pages = 0;
if (links != null) {
for (int i = 0; i < links.size(); i++) {
LogHelper.d(TAG, "Chapter Links: " + links.get(i).attr("href"));
try {
doc = Jsoup.connect(links.get(i).attr("href")).timeout(TIMEOUT).get();
contents = doc.select("article#content");
js = contents.select("script").last();
if (contents.size() > 0) {
pages += Integer.parseInt(
doc.select("div.text").first().text().replace(" ⤵", ""));
LogHelper.d(TAG, "Pages: " + pages);
JSONArray array = getJSONArrayFromString(js.toString());
if (array != null) {
for (int j = 0; j < array.length(); j++) {
try {
//LogHelper.d(TAG, "JSONObject: " + j + ":"
// + array.get(j).toString());
imgUrls.add(array.getJSONObject(j).getString("url"));
} catch (JSONException e) {
LogHelper.e(TAG, e, "Error while reading from array");
}
}
}
}
} catch (IOException e) {
LogHelper.e(TAG, e, "JSOUP Error");
}
}
LogHelper.d(TAG, "Total Pages: " + pages);
content.setQtyPages(pages);
}
LogHelper.d(TAG, imgUrls);
return imgUrls;
}
private static JSONArray getJSONArrayFromString(String s) {
Pattern pattern = Pattern.compile(".*\\[\\{ *(.*) *\\}\\].*");
Matcher matcher = pattern.matcher(s);
LogHelper.d(TAG, "Match found? " + matcher.find());
String results = matcher.group(1);
results = "[{" + results + "}]";
try {
return (JSONArray) new JSONTokener(results).nextValue();
} catch (JSONException e) {
LogHelper.e(TAG, e, "Couldn't build JSONArray from the provided string");
}
return null;
}
}