package im.actor.runtime.markdown;
import im.actor.runtime.Log;
import im.actor.runtime.regexp.MatcherCompat;
import java.util.ArrayList;
public class MarkdownParser {
public static final int MODE_FULL = 0;
public static final int MODE_LITE = 1;
public static final int MODE_ONLY_LINKS = 2;
private static final String CODE_BLOCK = "```";
private final int mode;
public MarkdownParser(int mode) {
this.mode = mode;
}
/**
* Parsing markdown document
*
* @param text markdown text
* @return parsed document
*/
public MDDocument processDocument(String text) {
TextCursor cursor = new TextCursor(text);
ArrayList<MDSection> sections = new ArrayList<MDSection>();
while (handleCodeBlock(cursor, sections)) ;
return new MDDocument(sections.toArray(new MDSection[sections.size()]));
}
/**
* Outer parsing method: Processing code blocks first
*
* @param cursor text cursor
* @param paragraphs current paragraphs
* @return is code block found
*/
private boolean handleCodeBlock(TextCursor cursor, ArrayList<MDSection> paragraphs) {
if (mode != MODE_ONLY_LINKS) {
int blockStart = findCodeBlockStart(cursor);
if (blockStart >= 0) {
int blockEnd = findCodeBlockEnd(cursor, blockStart);
if (blockEnd >= 0) {
// Adding Text Block if there are some elements before code block
if (cursor.currentOffset < blockStart) {
handleTextBlock(cursor, blockStart, paragraphs);
}
String codeContent = cursor.text.substring(cursor.currentOffset + 3, blockEnd - 3).trim();
// TODO: Better removing of empty leading and tailing lines
// Required to remove only ONE line
if (codeContent.startsWith("\n")) {
codeContent = codeContent.substring(1);
}
if (codeContent.endsWith("\n")) {
codeContent = codeContent.substring(0, codeContent.length() - 1);
}
cursor.currentOffset = blockEnd;
paragraphs.add(new MDSection(new MDCode(codeContent)));
return true;
}
}
}
// Adding remaining text blocks
if (cursor.currentOffset < cursor.text.length()) {
handleTextBlock(cursor, cursor.text.length(), paragraphs);
}
return false;
}
/**
* Processing text blocks between code blocks
*
* @param cursor text cursor
* @param blockEnd text block end
* @param paragraphs current paragraphs
*/
private void handleTextBlock(TextCursor cursor, int blockEnd, ArrayList<MDSection> paragraphs) {
MDText[] spans = handleSpans(cursor, blockEnd);
paragraphs.add(new MDSection(spans));
cursor.currentOffset = blockEnd;
}
/**
* Processing formatting spans
*
* @param cursor text cursor
* @param blockEnd code span search limit
* @return built text segments
*/
private MDText[] handleSpans(TextCursor cursor, int blockEnd) {
ArrayList<MDText> elements = new ArrayList<MDText>();
while (handleSpan(cursor, blockEnd, elements)) ;
return elements.toArray(new MDText[elements.size()]);
}
/**
* Handling span
*
* @param cursor text cursor
* @param blockEnd span search limit
* @param elements current elements
* @return is
*/
private boolean handleSpan(TextCursor cursor, int blockEnd, ArrayList<MDText> elements) {
if (mode != MODE_ONLY_LINKS) {
int spanStart = findSpanStart(cursor, blockEnd);
if (spanStart >= 0) {
char span = cursor.text.charAt(spanStart);
int spanEnd = findSpanEnd(cursor, spanStart, blockEnd, span);
if (spanEnd >= 0) {
// Handling next elements before span
handleUrls(cursor, spanStart, elements);
// Increment offset before processing internal spans
cursor.currentOffset++;
// Building child spans
MDText[] spanElements = handleSpans(cursor, spanEnd - 1);
// End of search: move cursor after span
cursor.currentOffset = spanEnd;
MDSpan spanElement = new MDSpan(
span == '*' ? MDSpan.TYPE_BOLD : MDSpan.TYPE_ITALIC,
spanElements);
elements.add(spanElement);
return true;
}
}
}
handleUrls(cursor, blockEnd, elements);
return false;
}
/**
* Handling urls
*
* @param cursor
* @param limit
* @param elements
*/
private void handleUrls(TextCursor cursor, int limit, ArrayList<MDText> elements) {
if (mode == MODE_FULL || mode == MODE_ONLY_LINKS) {
while (true) {
TitledUrl url = findFormattedUrl(cursor, limit);
if (url != null) {
// Handling text before url first
handleRawText(cursor, url.getStart(), elements);
// Adding url
String title = cursor.text.substring(url.getStart() + 1, url.getMiddle());
String urlVal = cursor.text.substring(url.getMiddle() + 2, url.getEnd());
elements.add(new MDUrl(title, urlVal));
// Adjusting offset
cursor.currentOffset = url.getEnd() + 1;
} else {
break;
}
}
}
// Handling remaining text
handleRawText(cursor, limit, elements);
}
/**
* Handling raw text block
*
* @param cursor text cursor
* @param limit text end
* @param elements current elements
*/
private void handleRawText(TextCursor cursor, int limit, ArrayList<MDText> elements) {
while (true) {
BasicUrl url = findUrl(cursor, limit);
if (url != null) {
String link = cursor.text.substring(url.getStart(), url.getEnd());
// Handling text before url first
addText(cursor, url.getStart(), elements);
// Adding url
elements.add(new MDUrl(link, link));
// Adjusting offset
cursor.currentOffset = url.getEnd();
continue;
}
addText(cursor, limit, elements);
return;
}
}
/**
* Adding raw simple text
*
* @param cursor text cursor
* @param limit text end
* @param elements current elements
*/
private void addText(TextCursor cursor, int limit, ArrayList<MDText> elements) {
if (cursor.currentOffset < limit) {
elements.add(new MDRawText(cursor.text.substring(cursor.currentOffset, limit)));
cursor.currentOffset = limit;
}
}
/**
* Searching for valid code block begin
*
* @param cursor text cursor
* @return code block start, -1 if not found
*/
private int findCodeBlockStart(TextCursor cursor) {
int offset = cursor.currentOffset;
int index;
while ((index = cursor.text.indexOf(CODE_BLOCK, offset)) >= 0) {
if (isGoodAnchor(cursor.text, index - 1)) {
return index;
}
offset = index + 3;
}
return -1;
}
/**
* Searching for valid code block end
*
* @param cursor text cursor
* @param blockStart start of expected code block
* @return code block end, -1 if not found
*/
private int findCodeBlockEnd(TextCursor cursor, int blockStart) {
int offset = blockStart + 3;
int index;
while ((index = cursor.text.indexOf(CODE_BLOCK, offset)) >= 0) {
if (isGoodAnchor(cursor.text, index + 3)) {
return index + 3;
}
offset = index + 1;
}
return -1;
}
/**
* Searching for valid formatting span start
*
* @param cursor text cursor
* @param limit maximum index in cursor
* @return span start, -1 if not found
*/
private int findSpanStart(TextCursor cursor, int limit) {
for (int i = cursor.currentOffset; i < limit; i++) {
char c = cursor.text.charAt(i);
if (c == '*' || c == '_') {
// Check prev and next symbols
if (isGoodAnchor(cursor.text, i - 1) && isNotSymbol(cursor.text, i + 1, c)) {
return i;
}
}
}
return -1;
}
/**
* Searching for valid formatting span end
*
* @param cursor text cursor
* @param spanStart expected span start
* @param limit maximum index in cursor
* @param span span control character
* @return span end, -1 if not found
*/
private int findSpanEnd(TextCursor cursor, int spanStart, int limit, char span) {
for (int i = spanStart + 1; i < limit; i++) {
char c = cursor.text.charAt(i);
if (c == span) {
// Check prev and next symbols
if (isGoodAnchor(cursor.text, i + 1) && isNotSymbol(cursor.text, i - 1, span)) {
return i + 1;
}
}
}
return -1;
}
/**
* Searching for valid formatted url
*
* @param cursor current cursor
* @param limit search limit
* @return found url, null if not found
*/
private TitledUrl findFormattedUrl(TextCursor cursor, int limit) {
start_loop:
for (int start = cursor.currentOffset; start < limit; start++) {
// Finding beginning of url
if (cursor.text.charAt(start) == '[') {
if (!isGoodAnchor(cursor.text, start - 1)) {
continue start_loop;
}
} else {
continue start_loop;
}
// Finding middle part of url
middle_loop:
for (int middle = start + 1; middle < limit - 1; middle++) {
if (cursor.text.charAt(middle) != ']' || cursor.text.charAt(middle + 1) != '(') {
continue middle_loop;
}
end_loop:
for (int end = middle + 2; end < limit; end++) {
if (cursor.text.charAt(end) != ')') {
continue end_loop;
}
return new TitledUrl(start, middle, end);
}
}
}
return null;
}
/**
* Finding non-formatted urls in texts
*
* @param cursor current text cursor
* @param limit end of cursor
* @return founded url
*/
private BasicUrl findUrl(TextCursor cursor, int limit) {
for (int i = cursor.currentOffset; i < limit; i++) {
if (!isGoodAnchor(cursor.text, i - 1)) {
continue;
}
String currentText = cursor.text.substring(i, limit);
MatcherCompat matcher = Patterns.WEB_URL_START.matcher(currentText);
if (matcher.hasMatch()) {
String url = matcher.group();
int start = i + matcher.start();
return new BasicUrl(start, start + url.length());
}
}
return null;
}
/**
* Test if symbol at index is space or out of string bounds
*
* @param text text
* @param index char to test
* @return is good anchor
*/
private boolean isGoodAnchor(String text, int index) {
// Check if there is space and punctuation mark after block
String punct = " .,:!?\t\n";
if (index >= 0 && index < text.length()) {
if (punct.indexOf(text.charAt(index)) == -1) {
return false;
}
}
return true;
}
/**
* Checking if symbol is not eq to c
*
* @param text
* @param index
* @param c
* @return
*/
private boolean isNotSymbol(String text, int index, char c) {
if (index >= 0 && index < text.length()) {
return text.charAt(index) != c;
}
return true;
}
private static abstract class Url {
public abstract int getStart();
public abstract int getEnd();
}
private static class BasicUrl extends Url {
private int start;
private int end;
public BasicUrl(int start, int end) {
this.start = start;
this.end = end;
}
@Override
public int getStart() {
return start;
}
@Override
public int getEnd() {
return end;
}
}
private static class TitledUrl extends Url {
private int start;
private int middle;
private int end;
public TitledUrl(int start, int middle, int end) {
this.start = start;
this.middle = middle;
this.end = end;
}
@Override
public int getStart() {
return start;
}
public int getMiddle() {
return middle;
}
@Override
public int getEnd() {
return end;
}
}
private static class TextCursor {
private String text;
private int currentOffset;
public TextCursor(String text) {
this.text = text;
}
}
}