package nya.miku.wishmaster.chans.arhivach;
import android.annotation.SuppressLint;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.text.DateFormat;
import java.text.DateFormatSymbols;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nya.miku.wishmaster.api.models.AttachmentModel;
import nya.miku.wishmaster.api.models.BadgeIconModel;
import nya.miku.wishmaster.api.models.PostModel;
import nya.miku.wishmaster.api.models.ThreadModel;
import nya.miku.wishmaster.common.Logger;
/**
* Created by Kalaver <Kalaver@users.noreply.github.com> on 03.07.2015.
*/
@SuppressLint("SimpleDateFormat")
public class ArhivachThreadReader implements Closeable {
private static final String TAG = "ArhivachThreadReader";
private static final DateFormat CHAN_DATEFORMAT;
static {
DateFormatSymbols chanSymbols = new DateFormatSymbols();
chanSymbols.setShortWeekdays(new String[] { "", "Вск", "Пнд", "Втр", "Срд", "Чтв", "Птн", "Суб" });
CHAN_DATEFORMAT = new SimpleDateFormat("dd/MM/yy EEE HH:mm:ss", chanSymbols);
CHAN_DATEFORMAT.setTimeZone(TimeZone.getTimeZone("GMT+3"));
}
private static final Pattern URL_PATTERN =
Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");
private static final char[] DATA_START = "class=\"thread_inner\"".toCharArray();
private static final int FILTER_THREAD_END = 0;
// private static final int FILTER_POST_HEAD = 1;
private static final int FILTER_ATTACHMENT = 1;
private static final int FILTER_ATTACHMENT_ORIGINAL = 2;
private static final int FILTER_ATTACHMENT_THUMBNAIL = 3;
private static final int FILTER_START_COMMENT_BODY = 4;
private static final int FILTER_START_COMMENT = 5;
private static final int FILTER_END_COMMENT = 6;
private static final int FILTER_SAGE = 7;
private static final int FILTER_NAME = 8;
private static final int FILTER_TIME = 9;
private static final int FILTER_ID_START = 10;
private static final int FILTER_SUBJECT = 11;
private static final int FILTER_ID = 12;
private static final int FILTER_MAIL = 13;
private static final int FILTER_OP = 14;
public static final char[][] FILTERS_OPEN = {
"</html>".toCharArray(),
//"class=\"post_head\"".toCharArray(),
"<div class=\"post_image_block\"".toCharArray(),
"<a".toCharArray(),
"<img".toCharArray(),
"class=\"post_comment_body\"".toCharArray(),
"class=\"post_comment\"".toCharArray(),
"</div>".toCharArray(),
"class=\"poster_sage\"".toCharArray(),
"class=\"poster_name\">".toCharArray(),
"class=\"post_time\">".toCharArray(),
"class=\"post_id\"".toCharArray(),
"class=\"post_subject\">".toCharArray(),
"id=\"".toCharArray(),
"href=\"mailto:".toCharArray(),
"label-success\">OP".toCharArray(),
};
private static final char[][] FILTERS_CLOSE = {
null,
// "</div>".toCharArray(),
">".toCharArray(),
">".toCharArray(),
">".toCharArray(),
">".toCharArray(),
">".toCharArray(),
null,
">".toCharArray(),
"</span>".toCharArray(),
"</span>".toCharArray(),
"</span>".toCharArray(),
"</".toCharArray(),
"\"".toCharArray(),
">".toCharArray(),
"</span>".toCharArray(),
};
private final Reader _in;
private StringBuilder readBuffer = new StringBuilder();
private List<ThreadModel> threads;
private ThreadModel currentThread;
private List<PostModel> postsBuf;
private PostModel currentPost;
private List<AttachmentModel> currentAttachments;
public ArhivachThreadReader(Reader reader) {
_in = reader;
}
public ArhivachThreadReader(InputStream in) {
this(new BufferedReader(new InputStreamReader(in)));
}
public ThreadModel[] readPage() throws IOException {
threads = new ArrayList<ThreadModel>();
initThreadModel();
initPostModel();
skipUntilSequence(DATA_START);
readData();
return threads.toArray(new ThreadModel[threads.size()]);
}
private void readData() throws IOException {
int filtersCount = FILTERS_OPEN.length;
int[] pos = new int[filtersCount];
int[] len = new int[filtersCount];
for (int i=0; i<filtersCount; ++i) len[i] = FILTERS_OPEN[i].length;
int curChar;
while ((curChar = _in.read()) != -1) {
for (int i=0; i<filtersCount; ++i) {
if (curChar == FILTERS_OPEN[i][pos[i]]) {
++pos[i];
if (pos[i] == len[i]) {
handleFilter(i);
pos[i] = 0;
}
} else {
if (pos[i] != 0) pos[i] = curChar == FILTERS_OPEN[i][0] ? 1 : 0;
}
}
}
finalizeThread();
}
private void initThreadModel() {
currentThread = new ThreadModel();
currentThread.postsCount = 0;
currentThread.attachmentsCount = 0;
postsBuf = new ArrayList<PostModel>();
}
private void initPostModel() {
currentPost = new PostModel();
currentPost.number = "unknown";
currentPost.trip = "";
currentAttachments = new ArrayList<AttachmentModel>();
}
private void finalizeThread() {
if (postsBuf.size() > 0) {
currentThread.posts = postsBuf.toArray(new PostModel[postsBuf.size()]);
currentThread.threadNumber = currentThread.posts[0].number;
for (PostModel post : currentThread.posts) post.parentThread = currentThread.threadNumber;
threads.add(currentThread);
initThreadModel();
}
}
private void finalizePost() {
if (currentPost.number != null && currentPost.number.length() > 0) {
++currentThread.postsCount;
currentPost.attachments = currentAttachments.toArray(new AttachmentModel[currentAttachments.size()]);
if (currentPost.name == null) currentPost.name = "";
if (currentPost.subject == null) currentPost.subject = "";
if (currentPost.comment == null) currentPost.comment = "";
if (currentPost.email == null) currentPost.email = "";
if (currentPost.trip == null) currentPost.trip = "";
postsBuf.add(currentPost);
}
initPostModel();
}
private void handleFilter(int filterIndex) throws IOException {
switch (filterIndex) {
case FILTER_THREAD_END:
finalizeThread();
break;
case FILTER_ATTACHMENT:
skipUntilSequence(FILTERS_CLOSE[filterIndex]);
parseAttachment();
break;
case FILTER_START_COMMENT_BODY:
skipUntilSequence(FILTERS_CLOSE[filterIndex]);
readPost();
finalizePost();
break;
case FILTER_SAGE:
skipUntilSequence(FILTERS_CLOSE[FILTER_START_COMMENT]);
currentPost.sage=true;
break;
case FILTER_NAME:
parseName(readUntilSequence(FILTERS_CLOSE[filterIndex]));
break;
case FILTER_TIME:
parseDate(readUntilSequence(FILTERS_CLOSE[filterIndex]));
break;
case FILTER_ID_START:
skipUntilSequence(FILTERS_OPEN[FILTER_ID]);
currentPost.number=readUntilSequence(FILTERS_CLOSE[FILTER_ID]);
skipUntilSequence(FILTERS_CLOSE[filterIndex]);
break;
case FILTER_SUBJECT:
currentPost.subject=readUntilSequence(FILTERS_CLOSE[filterIndex]);
break;
case FILTER_MAIL:
parseEmail(readUntilSequence(FILTERS_CLOSE[filterIndex]));
break;
case FILTER_OP:
skipUntilSequence(FILTERS_CLOSE[filterIndex]);
currentPost.op=true;
break;
}
}
protected void parseName(String s) {
int index = s.indexOf("<");
if (index>0) {
currentPost.name = s.substring(0, index);
Matcher matcher = Pattern.compile("src=\"([^\"]*)\"",Pattern.MULTILINE).matcher(s);
ArrayList<BadgeIconModel> icons=new ArrayList<BadgeIconModel>();
while (matcher.find()) {
BadgeIconModel icon = new BadgeIconModel();
icon.source=matcher.group(1);
icon.description=null;
icons.add(icon);
}
if (icons.size()>0)
currentPost.icons = icons.toArray(new BadgeIconModel[icons.size()]);
} else
currentPost.name=s;
}
protected void parseEmail(String s) {
if (s.contains("post_mail")) {
currentPost.email=s.substring(0,s.indexOf("\""));
}
}
protected void readPost() throws IOException {
String commentData = readUntilSequence(FILTERS_OPEN[FILTER_END_COMMENT]);
currentPost.comment = commentData;
}
private void parseAttachment() throws IOException {
skipUntilSequence(FILTERS_OPEN[FILTER_ATTACHMENT_ORIGINAL]);
String attachment = readUntilSequence(FILTERS_CLOSE[FILTER_ATTACHMENT_ORIGINAL]);
String thumbnail="";
String original="";
Matcher matcher = URL_PATTERN.matcher(attachment);
if (matcher.find()) original = matcher.group(1);
skipUntilSequence(FILTERS_OPEN[FILTER_ATTACHMENT_THUMBNAIL]);
attachment = readUntilSequence(FILTERS_CLOSE[FILTER_ATTACHMENT_THUMBNAIL]);
matcher = URL_PATTERN.matcher(attachment);
if (matcher.find()) thumbnail = matcher.group(1);
if ((original.length()>0)) {
AttachmentModel model = new AttachmentModel();
model.type = AttachmentModel.TYPE_OTHER_FILE;
model.size = -1;
model.width = -1;
model.height = -1;
model.path = original;
if (thumbnail.length()>0)
model.thumbnail = thumbnail;
else
model.thumbnail = original;
String ext = model.path.substring(model.path.lastIndexOf('.') + 1).toLowerCase(Locale.US);
if (ext.equals("png") || ext.equals("jpg") || ext.equals("jpeg")) model.type = AttachmentModel.TYPE_IMAGE_STATIC;
else if (ext.equals("gif")) model.type = AttachmentModel.TYPE_IMAGE_GIF;
else if (ext.equals("webm")) model.type = AttachmentModel.TYPE_VIDEO;
else if (ext.equals("mp3") || ext.equals("ogg")) model.type = AttachmentModel.TYPE_VIDEO;
++currentThread.attachmentsCount;
currentAttachments.add(model);
}
}
protected void parseDate(String date) {
if (date.length() > 0) {
try {
currentPost.timestamp = CHAN_DATEFORMAT.parse(date).getTime();
} catch (Exception e) {
Logger.e(TAG, "cannot parse date; make sure you choose the right DateFormat for this chan", e);
}
}
}
private void skipUntilSequence(char[] sequence) throws IOException {
int len = sequence.length;
if (len == 0) return;
int pos = 0;
int curChar;
while ((curChar = _in.read()) != -1) {
if (curChar == sequence[pos]) {
++pos;
if (pos == len) break;
} else {
if (pos != 0) pos = curChar == sequence[0] ? 1 : 0;
}
}
}
private String readUntilSequence(char[] sequence) throws IOException {
int len = sequence.length;
if (len == 0) return "";
readBuffer.setLength(0);
int pos = 0;
int curChar;
while ((curChar = _in.read()) != -1) {
readBuffer.append((char) curChar);
if (curChar == sequence[pos]) {
++pos;
if (pos == len) break;
} else {
if (pos != 0) pos = curChar == sequence[0] ? 1 : 0;
}
}
int buflen = readBuffer.length();
if (buflen >= len) {
readBuffer.setLength(buflen - len);
return readBuffer.toString();
} else {
return "";
}
}
@Override
public void close() throws IOException {
_in.close();
}
}