package org.quantumbadger.redreader.test.markdown; import org.junit.Test; import org.quantumbadger.redreader.reddit.prepared.markdown.CharArrSubstring; import org.quantumbadger.redreader.reddit.prepared.markdown.IntArrayLengthPair; import org.quantumbadger.redreader.reddit.prepared.markdown.MarkdownTokenizer; import static org.junit.Assert.assertEquals; public class MarkdownTokenizerTest { private static CharArrSubstring toCAS(final String s) { return CharArrSubstring.generate(s.toCharArray()); } private static IntArrayLengthPair toIALP(final int[] data) { final IntArrayLengthPair result = new IntArrayLengthPair(data.length); result.append(data); return result; } private static IntArrayLengthPair naiveTokenize(final String markdown) { final IntArrayLengthPair in = new IntArrayLengthPair(markdown.length()); final IntArrayLengthPair out = new IntArrayLengthPair(markdown.length()); in.append(markdown.toCharArray()); MarkdownTokenizer.naiveTokenize(in, out); return out; } private static void assertIAEquals(final int[] expected, final IntArrayLengthPair actual) { assertEquals(expected.length, actual.pos); for(int i = 0; i < expected.length; i++) { assertEquals(expected[i], actual.data[i]); } } @Test public void testTokenizeItalic1() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a *b*")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_ASTERISK, 'b', MarkdownTokenizer.TOKEN_ASTERISK }; assertIAEquals(expected, out); } @Test public void testTokenizeItalic2() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a* *b*")); final int[] expected = new int[] { 'a', '*', ' ', MarkdownTokenizer.TOKEN_ASTERISK, 'b', MarkdownTokenizer.TOKEN_ASTERISK }; assertIAEquals(expected, out); } @Test public void testTokenizeBold1() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a **b**")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_ASTERISK_DOUBLE, 'b', MarkdownTokenizer.TOKEN_ASTERISK_DOUBLE }; assertIAEquals(expected, out); } @Test public void testTokenizeBold2() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a** **b**")); final int[] expected = new int[] { 'a', '*', '*', ' ', MarkdownTokenizer.TOKEN_ASTERISK_DOUBLE, 'b', MarkdownTokenizer.TOKEN_ASTERISK_DOUBLE }; assertIAEquals(expected, out); } @Test public void testTokenizeLink1() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [b](c) d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, 'b', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeLink2() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [b]c) d")); final int[] expected = new int[] { 'a', ' ', '[', 'b', ']', 'c', ')', ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeLink3() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [b] (c) d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, 'b', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeLink4() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [b] (c) d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, 'b', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeLink5() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [[b]](c) d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, '[', 'b', ']', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeLink6() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [[[b]]] (c) d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, '[', '[', 'b', ']', ']', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeLink7() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [[b](c) d")); final int[] expected = new int[] { 'a', ' ', '[', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, 'b', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeLink8() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a [[[ *b* **b**]]] (c) d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, '[', '[', ' ', MarkdownTokenizer.TOKEN_ASTERISK, 'b', MarkdownTokenizer.TOKEN_ASTERISK, ' ', MarkdownTokenizer.TOKEN_ASTERISK_DOUBLE, 'b', MarkdownTokenizer.TOKEN_ASTERISK_DOUBLE, ']', ']', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeUnderscore1() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a_b_c_d")); final int[] expected = new int[] { 'a', '_', 'b', '_', 'c', '_', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeUnderscore2() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("_abcd_")); final int[] expected = new int[] { MarkdownTokenizer.TOKEN_UNDERSCORE, 'a', 'b', 'c', 'd', MarkdownTokenizer.TOKEN_UNDERSCORE }; assertIAEquals(expected, out); } @Test public void testTokenizeUnderscore3() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("_a_b cd_")); final int[] expected = new int[] { MarkdownTokenizer.TOKEN_UNDERSCORE, 'a', '_', 'b', ' ', 'c', 'd', MarkdownTokenizer.TOKEN_UNDERSCORE }; assertIAEquals(expected, out); } @Test public void testTokenizeUnderscore4() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("ab _abcd_ ab")); final int[] expected = new int[] { 'a', 'b', ' ', MarkdownTokenizer.TOKEN_UNDERSCORE, 'a', 'b', 'c', 'd', MarkdownTokenizer.TOKEN_UNDERSCORE, ' ', 'a', 'b' }; assertIAEquals(expected, out); } @Test public void testTokenizeSuperscript1() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("^^^All ^^^of ^^^this ^^^should ^^^be ^^^superscripted")); final int[] expected = new int[] { MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, 'A', 'l', 'l', ' ',MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, 'o', 'f', ' ', MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, 't', 'h', 'i', 's', ' ', MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, 's', 'h', 'o', 'u', 'l', 'd', ' ', MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, 'b', 'e', ' ', MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, MarkdownTokenizer.TOKEN_CARET, 's', 'u', 'p', 'e', 'r', 's', 'c', 'r', 'i', 'p', 't', 'e', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeRedditLink1() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a /r/abc d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, '/', 'r', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, '/', 'r', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeRedditLink2() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a /u/abc d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, '/', 'u', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, '/', 'u', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeRedditLink3() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a r/abc d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, 'r', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'r', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testTokenizeRedditLink4() { final IntArrayLengthPair out = MarkdownTokenizer.tokenize(toCAS("a u/abc d")); final int[] expected = new int[] { 'a', ' ', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, 'u', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'u', '/', 'a', 'b', 'c', MarkdownTokenizer.TOKEN_PAREN_CLOSE, ' ', 'd' }; assertIAEquals(expected, out); } @Test public void testNaiveTokenizeLink1() { final IntArrayLengthPair out = naiveTokenize("[[a]](b)"); final int[] expected = new int[] { MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, 'a', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'b', MarkdownTokenizer.TOKEN_PAREN_CLOSE }; assertIAEquals(expected, out); } @Test public void testCleanLink1() { final IntArrayLengthPair in = naiveTokenize("[[a]](b)"); final IntArrayLengthPair out = new IntArrayLengthPair(128); MarkdownTokenizer.clean(in, out); final int[] expected = new int[] { MarkdownTokenizer.TOKEN_BRACKET_SQUARE_OPEN, '[', 'a', ']', MarkdownTokenizer.TOKEN_BRACKET_SQUARE_CLOSE, MarkdownTokenizer.TOKEN_PAREN_OPEN, 'b', MarkdownTokenizer.TOKEN_PAREN_CLOSE }; assertIAEquals(expected, out); } @Test public void testFindCloseWellBracketed1() { assertEquals(MarkdownTokenizer.findCloseWellBracketed( new int[] {'(', ')'}, '(', ')', 0, 2 ), 1); } @Test public void testFindCloseWellBracketed2() { assertEquals(MarkdownTokenizer.findCloseWellBracketed( new int[] {'(', '(', ')', ')'}, '(', ')', 0, 4 ), 3); } @Test public void testFindCloseWellBracketed3() { assertEquals(MarkdownTokenizer.findCloseWellBracketed( new int[] {'(', '(', ')'}, '(', ')', 0, 3 ), -1); } }