/* * Copyright (C) 2011 Laurent Caillette * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.novelang.treemangling; import org.junit.Ignore; import org.junit.Test; import static org.novelang.parser.NodeKind.*; import static org.novelang.parser.antlr.TreeFixture.tree; import org.novelang.common.SyntacticTree; import org.novelang.common.tree.Treepath; import org.novelang.logger.Logger; import org.novelang.logger.LoggerFactory; import org.novelang.parser.antlr.TreeFixture; /** * Tests for {@link LevelMangler}. * * @author Laurent Caillette */ public class UrlManglerTest { @Test public void doNothingWhenNothingToDo() { final SyntacticTree tree = tree( NOVELLA, tree( PARAGRAPH_REGULAR ), tree( PARAGRAPH_REGULAR ) ); verifyFixNamedUrls( tree, tree ) ; } @Test public void namedUrlAtStartOfAPart() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ) ), tree( NOVELLA, tree( WHITESPACE_, " " ), tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ) ; } @Test public void detectNoNamingWanted() { verifyFixNamedUrls( tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( BLOCK_OF_LITERAL_INSIDE_GRAVE_ACCENTS, " " ), tree( _URL, tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( BLOCK_OF_LITERAL_INSIDE_GRAVE_ACCENTS, " " ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void namedUrlWithSquareBrackets() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( _URL, tree( BLOCK_INSIDE_SQUARE_BRACKETS, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ) ), tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_SQUARE_BRACKETS, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ) ; } /** * This test reproduces a bug. It stresses detection of paragraph exit. */ @Test public void fixNamedUrlAtStartOfParagraphAfterLevelIntroducer() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( WORD_, "p" ) ), tree( LEVEL_INTRODUCER_, tree( LEVEL_INTRODUCER_INDENT_, "==" ), tree( LEVEL_TITLE, tree( WORD_, "t" ) ) ), tree( PARAGRAPH_REGULAR, tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ) ), tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( WORD_, "p" ) ), tree( LINE_BREAK_ ), tree( LINE_BREAK_ ), tree( LEVEL_INTRODUCER_, tree( LEVEL_INTRODUCER_INDENT_, "==" ), tree( LEVEL_TITLE, tree( WORD_, "t" ) ) ), tree( LINE_BREAK_ ), tree( LINE_BREAK_ ), tree( WHITESPACE_, " " ), tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( WHITESPACE_, " " ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com") ) ) ) ; } @Test public void fixUnnamedUrlAtStartOfAPart() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( _URL, tree( URL_LITERAL, "http://foo.com" ) ) ) ), tree( NOVELLA, tree( WHITESPACE_, " " ), tree( PARAGRAPH_REGULAR, tree( URL_LITERAL, "http://foo.com" ) ) ) ) ; } @Test @Ignore( "Awaiting fix") public void namedUrlInsideSquareBrackets() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_SQUARE_BRACKETS, tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ) ) ), tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_SQUARE_BRACKETS, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ) ) ; } @Test public void namedUrlInsideParenthesis() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_PARENTHESIS, tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ) ) ), tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_PARENTHESIS, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ) ) ; } @Test public void fixNamedUrlInsideAParagraphWithLineBreakBeforeQuotes() { verifyFixNamedUrls( tree( PARAGRAPH_REGULAR, tree( WORD_, "w" ), tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_REGULAR, tree( WORD_, "w"), tree( LINE_BREAK_ ), tree( WHITESPACE_, " " ), tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void fixNamedUrlInsideAParagraphNoLineBreakBeforeQuotes() { verifyFixNamedUrls( tree( PARAGRAPH_REGULAR, tree( WORD_, "w" ), tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_REGULAR, tree( WORD_, "w"), tree( WHITESPACE_, " " ), tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void namedUrlAtTheStartOfARegularParagraph() { verifyFixNamedUrls( tree( PARAGRAPH_REGULAR, tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void namedUrlAtTheStartOfParagraphAsListItemWithTripleHyphen() { verifyFixNamedUrls( tree( PARAGRAPH_AS_LIST_ITEM_WITH_TRIPLE_HYPHEN_, tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_AS_LIST_ITEM_WITH_TRIPLE_HYPHEN_, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void namedUrlAtTheStartOfParagraphAsListItemWithDoubleHyphenAnNumberSign() { verifyFixNamedUrls( tree( PARAGRAPH_AS_LIST_ITEM_WITH_DOUBLE_HYPHEN_AND_NUMBER_SIGN, tree( _URL, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_AS_LIST_ITEM_WITH_DOUBLE_HYPHEN_AND_NUMBER_SIGN, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void fixUrlWithoutNameAtStartOfAParagraph() { verifyFixNamedUrls( tree( PARAGRAPH_REGULAR, tree( _URL, tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_REGULAR, tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void fixUrlWithoutNameInsideAParagraph() { verifyFixNamedUrls( tree( PARAGRAPH_REGULAR, tree( WORD_, "w" ), tree( _URL, tree( URL_LITERAL, "http://foo.com" ) ) ), tree( PARAGRAPH_REGULAR, tree( WORD_, "w" ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ; } @Test public void dontGetFooledByPreviousParagraphsInsideAngledBracketPairs() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPHS_INSIDE_ANGLED_BRACKET_PAIRS, tree( PARAGRAPH_REGULAR, tree( WORD_, "w" ) ) ), tree( PARAGRAPH_REGULAR, tree( _URL, tree( BLOCK_INSIDE_SQUARE_BRACKETS, tree( WORD_, "name" ) ), tree( URL_LITERAL, "http://foo.com" ) ) ) ), tree( NOVELLA, tree( PARAGRAPHS_INSIDE_ANGLED_BRACKET_PAIRS, tree( PARAGRAPH_REGULAR, tree( WORD_, "w" ) ) ), tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_SQUARE_BRACKETS, tree( WORD_, "name" ) ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.com" ) ) ) ) ; } /** * Was a bug, breaking an ArrayIndexOutOfBoundsException. */ @Test public void detectNotSameParagraph() { verifyFixNamedUrls( tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "q" ) ) ), tree( PARAGRAPH_REGULAR, tree( _URL, tree( URL_LITERAL, "http://foo.net" ) ) ) ), tree( NOVELLA, tree( PARAGRAPH_REGULAR, tree( BLOCK_INSIDE_DOUBLE_QUOTES, tree( WORD_, "q" ) ) ), tree( PARAGRAPH_REGULAR, tree( LINE_BREAK_ ), tree( LINE_BREAK_ ), tree( URL_LITERAL, "http://foo.net" ) ) ) ) ; } // ======= // Fixture // ======= private static final Logger LOGGER = LoggerFactory.getLogger( UrlManglerTest.class ) ; private static void verifyFixNamedUrls( final SyntacticTree expectedTree, final SyntacticTree rawTree ) { LOGGER.info( "Expected tree: ", TreeFixture.asString( expectedTree ) ) ; final Treepath< SyntacticTree > expectedTreepath = Treepath.create( expectedTree ) ; final Treepath< SyntacticTree > rehierarchized = fixNamedUrls( rawTree ) ; TreeFixture.assertEqualsNoSeparators( expectedTreepath.getTreeAtEnd(), rehierarchized.getTreeAtEnd() ) ; } private static Treepath< SyntacticTree > fixNamedUrls( final SyntacticTree rawTree ) { LOGGER.info( "Raw tree: ", TreeFixture.asString( rawTree ) ) ; final Treepath< SyntacticTree > mangledTreepath = UrlMangler.fixNamedUrls( Treepath.create( rawTree ) ) ; SyntacticTree mangledTree = mangledTreepath.getTreeAtEnd() ; LOGGER.info( "Mangled tree: ", TreeFixture.asString( mangledTree ) ) ; mangledTree = SeparatorsMangler.removeSeparators( mangledTree ) ; LOGGER.info( " No separators: ", TreeFixture.asString( mangledTree ) ) ; return mangledTreepath ; } }