/*
* Overchan Android (Meta Imageboard Client)
* Copyright (C) 2014-2016 miku-nyan <https://github.com/miku-nyan>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package nya.miku.wishmaster.chans.sevenchan;
import java.io.IOException;
import java.io.InputStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringEscapeUtils;
import nya.miku.wishmaster.api.models.AttachmentModel;
import nya.miku.wishmaster.api.util.CryptoUtils;
import nya.miku.wishmaster.api.util.RegexUtils;
import nya.miku.wishmaster.api.util.WakabaReader;
public class SevenchanReader extends WakabaReader {
private static final char[] P_OPEN = "<p>".toCharArray();
private static final char[] P_CLOSE = "</p>".toCharArray();
private static final char[] SPAN_CLOSE = "</span>".toCharArray();
private StringBuilder commentBuffer = new StringBuilder();
private boolean inDate = false;
private StringBuilder dateBuffer = new StringBuilder();
private String lastThumbnail = null;
private String lastAdminMark = null;
private String lastModMark = null;
private static final DateFormat DATE_FORMAT, DATE_FORMAT_ALT;
static {
DATE_FORMAT = new SimpleDateFormat("yy/MM/dd(EEE)HH:mm", Locale.US);
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone(SevenchanModule.TIMEZONE));
DATE_FORMAT_ALT = new SimpleDateFormat("yy/MM/dd HH:mm:ss", Locale.US);
DATE_FORMAT_ALT.setTimeZone(TimeZone.getTimeZone(SevenchanModule.TIMEZONE));
}
private static final Pattern DATE_PATTERN = Pattern.compile("((?:[^\\s]+\\) )?[^\\s]+)\\s*<span class=\"reflink\">$");
private static final Pattern ATTACHMENT_SIZE_PATTERN = Pattern.compile("([\\.\\d]+) ?([km])?b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
private static final Pattern ATTACHMENT_PX_SIZE_PATTERN = Pattern.compile("(\\d+)[x×](\\d+)"); // \u0078 \u00D7
private static final Pattern ATTACHMENT_ORIGINAL_NAME_PATTERN = Pattern.compile("\\s*,?([^<\\)]*)");
private static final Pattern EMBEDDED_PATTERN =
Pattern.compile("<a href=\"javascript:;\" onmousedown=\"if\\(document.getElementById\\('(.*?)'\\)(?:[^\"]*?)\">");
private static final char[] NUMBER_FILTER = "<input type=\"checkbox\" name=\"post[]\" value=\"".toCharArray();
private static final char[] SUBJECT_FILTER = "<span class=\"subject\">".toCharArray();
private static final char[] ATTACHMENT_FILTER = "<p class=\"file_size\">".toCharArray();
private static final char[] ATTACHMENT_MULTI_FIRST_FILTER = "<span class=\"multithumbfirst\">".toCharArray();
private static final char[] ATTACHMENT_MULTI_FILTER = "<span class=\"multithumb\">".toCharArray();
private static final char[] COMMENT_FILTER = "<p class=\"message\">".toCharArray();
private static final char[] DATE_START_FILTER = "<input type=\"checkbox\" name=\"post[]\"".toCharArray();
private static final char[] DATE_END_FILTER = "<span class=\"reflink\">".toCharArray();
private static final char[] ADMIN_FILTER = "<span title=\"7chan administrator\" class=\"capcode\">".toCharArray();
private static final char[] MOD_FILTER = "<span title=\"7chan moderator\" class=\"capcode\">".toCharArray();
private int curNumberPos = 0;
private int curSubjectPos = 0;
private int curAttachmentPos = 0;
private int curAttachmentMultiFirstPos = 0;
private int curAttachmentMultiPos = 0;
private int curCommentPos = 0;
private int curDateStartPos = 0;
private int curDateEndPos = 0;
private int curAdminPos = 0;
private int curModPos = 0;
public SevenchanReader(InputStream in) {
super(in, DATE_FORMAT);
}
@Override
protected void customFilters(int ch) throws IOException {
if (inDate) dateBuffer.append((char) ch);
if (ch == NUMBER_FILTER[curNumberPos]) {
++curNumberPos;
if (curNumberPos == NUMBER_FILTER.length) {
currentPost.number = readUntilSequence("\"".toCharArray());
curNumberPos = 0;
}
} else {
if (curNumberPos != 0) curNumberPos = ch == NUMBER_FILTER[0] ? 1 : 0;
}
if (ch == SUBJECT_FILTER[curSubjectPos]) {
++curSubjectPos;
if (curSubjectPos == SUBJECT_FILTER.length) {
currentPost.subject = CryptoUtils.fixCloudflareEmails(StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim());
curSubjectPos = 0;
}
} else {
if (curSubjectPos != 0) curSubjectPos = ch == SUBJECT_FILTER[0] ? 1 : 0;
}
if (ch == ATTACHMENT_FILTER[curAttachmentPos]) {
++curAttachmentPos;
if (curAttachmentPos == ATTACHMENT_FILTER.length) {
parseAttachment(readUntilSequence(P_CLOSE));
curAttachmentPos = 0;
}
} else {
if (curAttachmentPos != 0) curAttachmentPos = ch == ATTACHMENT_FILTER[0] ? 1 : 0;
}
if (ch == ATTACHMENT_MULTI_FIRST_FILTER[curAttachmentMultiFirstPos]) {
++curAttachmentMultiFirstPos;
if (curAttachmentMultiFirstPos == ATTACHMENT_MULTI_FIRST_FILTER.length) {
parseAttachment(readUntilSequence(SPAN_CLOSE));
curAttachmentMultiFirstPos = 0;
}
} else {
if (curAttachmentMultiFirstPos != 0) curAttachmentMultiFirstPos = ch == ATTACHMENT_MULTI_FIRST_FILTER[0] ? 1 : 0;
}
if (ch == ATTACHMENT_MULTI_FILTER[curAttachmentMultiPos]) {
++curAttachmentMultiPos;
if (curAttachmentMultiPos == ATTACHMENT_MULTI_FILTER.length) {
parseAttachment(readUntilSequence(SPAN_CLOSE));
curAttachmentMultiPos = 0;
}
} else {
if (curAttachmentMultiPos != 0) curAttachmentMultiPos = ch == ATTACHMENT_MULTI_FILTER[0] ? 1 : 0;
}
if (ch == COMMENT_FILTER[curCommentPos]) {
++curCommentPos;
if (curCommentPos == COMMENT_FILTER.length) {
currentPost.comment = readPostComment();
if (lastAdminMark != null) {
currentPost.trip = lastAdminMark + (currentPost.trip == null ? "" : currentPost.trip);
lastAdminMark = null;
}
if (lastModMark != null) {
currentPost.trip = lastModMark + (currentPost.trip == null ? "" : currentPost.trip);
lastModMark = null;
}
finalizePost();
curCommentPos = 0;
}
} else {
if (curCommentPos != 0) curCommentPos = ch == COMMENT_FILTER[0] ? 1 : 0;
}
if (ch == DATE_START_FILTER[curDateStartPos]) {
++curDateStartPos;
if (curDateStartPos == DATE_START_FILTER.length) {
inDate = true;
dateBuffer.setLength(0);
curDateStartPos = 0;
}
} else {
if (curDateStartPos != 0) curDateStartPos = ch == DATE_START_FILTER[0] ? 1 : 0;
}
if (ch == DATE_END_FILTER[curDateEndPos]) {
++curDateEndPos;
if (curDateEndPos == DATE_END_FILTER.length) {
Matcher m = DATE_PATTERN.matcher(dateBuffer.toString().trim());
if (m.find()) {
String date = m.group(1);
parseDate(date);
if (currentPost.timestamp == 0) {
try {
date = StringEscapeUtils.unescapeHtml4(date);
date = new StringBuilder().
append((char)(date.charAt(2) - 65248)).
append((char)(date.charAt(3) - 65248)).
append('/').
append((char)(date.charAt(5) - 65248)).
append((char)(date.charAt(6) - 65248)).
append('/').
append((char)(date.charAt(8) - 65248)).
append((char)(date.charAt(9) - 65248)).
append(' ').
append((char)(date.charAt(15) - 65248)).
append((char)(date.charAt(16) - 65248)).
append(':').
append((char)(date.charAt(18) - 65248)).
append((char)(date.charAt(19) - 65248)).
append(':').
append((char)(date.charAt(21) - 65248)).
append((char)(date.charAt(22) - 65248)).toString();
currentPost.timestamp = DATE_FORMAT_ALT.parse(date).getTime();
} catch (Exception e) {}
}
}
inDate = false;
dateBuffer.setLength(0);
curDateEndPos = 0;
}
} else {
if (curDateEndPos != 0) curDateEndPos = ch == DATE_END_FILTER[0] ? 1 : 0;
}
if (ch == ADMIN_FILTER[curAdminPos]) {
++curAdminPos;
if (curAdminPos == ADMIN_FILTER.length) {
lastAdminMark = StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim();
curAdminPos = 0;
}
} else {
if (curAdminPos != 0) curAdminPos = ch == ADMIN_FILTER[0] ? 1 : 0;
}
if (ch == MOD_FILTER[curModPos]) {
++curModPos;
if (curModPos == MOD_FILTER.length) {
lastModMark = StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim();
curModPos = 0;
}
} else {
if (curModPos != 0) curModPos = ch == MOD_FILTER[0] ? 1 : 0;
}
}
@Override
protected void parseAttachment(String html) {
int before = currentAttachments.size();
super.parseAttachment(html);
if (currentAttachments.size() > before) {
currentAttachments.get(currentAttachments.size() - 1).thumbnail = lastThumbnail;
lastThumbnail = null;
}
}
@Override
protected void parseThumbnail(String imgTag) {
if (imgTag.contains("class=\"multithumbfirst\"") || imgTag.contains("class=\"multithumb\"")) {
if (currentAttachments.size() > 0) {
AttachmentModel attachment = currentAttachments.get(currentAttachments.size() - 1);
int start, end;
if ((start = imgTag.indexOf("src=\"")) != -1 && (end = imgTag.indexOf('\"', start + 5)) != -1)
attachment.thumbnail = imgTag.substring(start + 5, end);
Matcher byteSizeMatcher = ATTACHMENT_SIZE_PATTERN.matcher(imgTag);
while (byteSizeMatcher.find()) {
try {
String digits = byteSizeMatcher.group(1);
int multiplier = 1;
String prefix = byteSizeMatcher.group(2);
if (prefix != null) {
if (prefix.equalsIgnoreCase("k")) multiplier = 1024;
else if (prefix.equalsIgnoreCase("m")) multiplier = 1024 * 1024;
}
int value = Math.round(Float.parseFloat(digits) / 1024 * multiplier);
attachment.size = value;
} catch (NumberFormatException e) {}
}
Matcher pxSizeMatcher = ATTACHMENT_PX_SIZE_PATTERN.matcher(imgTag);
int indexEndPxSize = -1;
while (pxSizeMatcher.find()) {
try {
int width = Integer.parseInt(pxSizeMatcher.group(1));
int height = Integer.parseInt(pxSizeMatcher.group(2));
attachment.width = width;
attachment.height = height;
indexEndPxSize = pxSizeMatcher.end();
} catch (NumberFormatException e) {}
}
if (indexEndPxSize != -1) {
Matcher originalNameMatcher = ATTACHMENT_ORIGINAL_NAME_PATTERN.matcher(imgTag);
if (originalNameMatcher.find(indexEndPxSize)) {
String originalName = originalNameMatcher.group(1).trim();
if (originalName != null && originalName.length() > 0) {
attachment.originalName = StringEscapeUtils.unescapeHtml4(originalName);
}
}
}
}
} else if (imgTag.contains("/css/locked.gif")) {
currentThread.isClosed = true;
} else if (imgTag.contains("/css/sticky.gif")) {
currentThread.isSticky = true;
} else {
int start, end;
if ((start = imgTag.indexOf("src=\"")) != -1 && (end = imgTag.indexOf('\"', start + 5)) != -1)
lastThumbnail = imgTag.substring(start + 5, end);
}
}
@Override
protected String readPostComment() throws IOException {
commentBuffer.setLength(0);
int len1 = P_OPEN.length;
int len2 = P_CLOSE.length;
int pos1 = 0;
int pos2 = 0;
int tagCounter = 1;
int curChar;
while ((curChar = _in.read()) != -1) {
commentBuffer.append((char) curChar);
if (curChar == P_OPEN[pos1]) {
++pos1;
if (pos1 == len1) {
++tagCounter;
pos1 = 0;
}
} else {
if (pos1 != 0) pos1 = curChar == P_OPEN[0] ? 1 : 0;
}
if (curChar == P_CLOSE[pos2]) {
++pos2;
if (pos2 == len2) {
--tagCounter;
if (tagCounter == 0) break;
pos2 = 0;
}
} else {
if (pos2 != 0) pos2 = curChar == P_CLOSE[0] ? 1 : 0;
}
}
int buflen = commentBuffer.length();
if (buflen > len2) {
commentBuffer.setLength(buflen - len2);
return RegexUtils.replaceAll(CryptoUtils.fixCloudflareEmails(commentBuffer.toString()),
EMBEDDED_PATTERN, "<a href=\"http://youtube\\.com/watch?v=$1\">");
} else {
return "";
}
}
}