/*
* Copyright 2016 Hippo Seven
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hippo.ehviewer.client.parser;
import android.support.annotation.NonNull;
import android.support.annotation.Nullable;
import android.util.Log;
import com.hippo.ehviewer.client.EhUtils;
import com.hippo.ehviewer.client.data.GalleryInfo;
import com.hippo.ehviewer.client.exception.ParseException;
import com.hippo.util.JsoupUtils;
import com.hippo.yorozuya.NumberUtils;
import com.hippo.yorozuya.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GalleryListParser {
private static final String TAG = GalleryListParser.class.getSimpleName();
private static final Pattern PATTERN_RATING = Pattern.compile("\\d+px");
private static final Pattern PATTERN_THUMB_SIZE = Pattern.compile("height:(\\d+)px; width:(\\d+)px");
public static class Result {
public int pages;
public List<GalleryInfo> galleryInfoList;
}
private static int parsePages(Document d, String body) throws ParseException {
try {
Elements es = d.getElementsByClass("ptt").first().child(0).child(0).children();
return Integer.parseInt(es.get(es.size() - 2).text().trim());
} catch (Exception e) {
throw new ParseException("Can't parse gallery list pages", body);
}
}
private static String parseRating(String ratingStyle) {
Matcher m = PATTERN_RATING.matcher(ratingStyle);
int num1;
int num2;
int rate = 5;
String re;
if (m.find()) {
num1 = ParserUtils.parseInt(m.group().replace("px", ""));
} else {
return null;
}
if (m.find()) {
num2 = ParserUtils.parseInt(m.group().replace("px", ""));
} else {
return null;
}
rate = rate - num1 / 16;
if (num2 == 21) {
rate--;
re = Integer.toString(rate);
re = re + ".5";
} else
re = Integer.toString(rate);
return re;
}
@Nullable
private static GalleryInfo parseGalleryInfo(Element e) {
GalleryInfo gi = new GalleryInfo();
// Get category
Element ic = JsoupUtils.getElementByClass(e, "ic");
if (null != ic) {
gi.category = EhUtils.getCategory(ic.attr("alt").trim());
} else {
Log.w(TAG, "Can't parse gallery info category");
gi.category = EhUtils.UNKNOWN;
}
// Posted
Element itd = JsoupUtils.getElementByClass(e, "itd");
if (null != itd) {
gi.posted = itd.text().trim();
} else {
Log.w(TAG, "Can't parse gallery info posted");
gi.posted = "";
}
// Thumb
Element it2 = JsoupUtils.getElementByClass(e, "it2");
if (null != it2) {
// Thumb size
Matcher m = PATTERN_THUMB_SIZE.matcher(it2.attr("style"));
if (m.find()) {
gi.thumbWidth = NumberUtils.parseIntSafely(m.group(2), 0);
gi.thumbHeight = NumberUtils.parseIntSafely(m.group(1), 0);
} else {
Log.w(TAG, "Can't parse gallery info thumb size");
gi.thumbWidth = 0;
gi.thumbHeight = 0;
}
// Thumb url
Elements es = it2.children();
if (null != es && es.size() >= 1) {
gi.thumb = EhUtils.handleThumbUrlResolution(es.get(0).attr("src"));
} else {
String html = it2.html();
int index1 = html.indexOf('~');
int index2 = StringUtils.ordinalIndexOf(html, '~', 2);
if (index1 < index2) {
gi.thumb = EhUtils.handleThumbUrlResolution(
"http://" +StringUtils.replace(html.substring(index1 + 1, index2), "~", "/"));
} else {
Log.w(TAG, "Can't parse gallery info thumb url");
gi.thumb = "";
}
}
} else {
Log.w(TAG, "Can't parse gallery info thumb");
gi.thumbWidth = 0;
gi.thumbHeight = 0;
gi.thumb = "";
}
// Title (required)
Element it5 = JsoupUtils.getElementByClass(e, "it5");
if (null == it5) {
Log.e(TAG, "Can't parse gallery info title, step 1");
return null;
}
Elements es = it5.children();
if (null == es || es.size() <= 0) {
Log.e(TAG, "Can't parse gallery info title, step 2");
return null;
}
Element a = es.get(0);
GalleryDetailUrlParser.Result result = GalleryDetailUrlParser.parse(a.attr("href"));
if (null == result) {
Log.e(TAG, "Can't parse gallery info title, step 3");
return null;
}
gi.gid = result.gid;
gi.token = result.token;
gi.title = a.text().trim();
// Rating
Element it4r = JsoupUtils.getElementByClass(e, "it4r");
if (null != it4r) {
gi.rating = NumberUtils.parseFloatSafely(parseRating(it4r.attr("style")), -1.0f);
} else {
Log.w(TAG, "Can't parse gallery info rating");
gi.rating = -1.0f;
}
// Uploader
Element itu = JsoupUtils.getElementByClass(e, "itu");
if (null != itu) {
gi.uploader = itu.text().trim();
} else {
Log.w(TAG, "Can't parse gallery info uploader");
gi.uploader = "";
}
gi.generateSLang();
return gi;
}
public static Result parse(@NonNull String body) throws Exception {
Result result = new Result();
Document d = Jsoup.parse(body);
try {
result.pages = parsePages(d, body);
} catch (ParseException e) {
if (body.contains("No hits found</p>")) {
result.pages = 0;
//noinspection unchecked
result.galleryInfoList = Collections.EMPTY_LIST;
return result;
} else {
throw e;
}
}
try {
Elements es = d.getElementsByClass("itg").first().child(0).children();
List<GalleryInfo> list = new ArrayList<>(es.size() - 1);
for (int i = 1; i < es.size(); i++) { // First one is table header, skip it
GalleryInfo gi = parseGalleryInfo(es.get(i));
if (null != gi) {
list.add(gi);
}
}
result.galleryInfoList = list;
} catch (Exception e) {
throw new ParseException("Can't parse gallery list", body);
}
return result;
}
}