/* * Copyright (C) 2011 Laurent Caillette * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.novelang.treemangling; import java.util.Set; import com.google.common.collect.ImmutableSet; import static org.novelang.parser.NodeKind.URL_LITERAL; import static org.novelang.parser.NodeKind._URL; import org.novelang.common.SimpleTree; import org.novelang.common.SyntacticTree; import org.novelang.common.tree.Traversal; import org.novelang.common.tree.Treepath; import org.novelang.common.tree.TreepathTools; import org.novelang.parser.NodeKind; /** * Wraps {@link NodeKind#URL_LITERAL} nodes into {@link NodeKind#_URL} ones, adding * preceding {@link NodeKind#BLOCK_INSIDE_DOUBLE_QUOTES}. * * <pre> * "external link name" * http://url.towards.somewhe.re * </pre> * * @author Laurent Caillette */ public class UrlMangler { private UrlMangler() { } public static Treepath< SyntacticTree > fixNamedUrls( final Treepath< SyntacticTree > treepath ) { State state = State.OUTSIDE_PARAGRAPH; Treepath< SyntacticTree > treepathToName = null ; Treepath< SyntacticTree > paragraph = null ; Treepath< SyntacticTree > current = treepath ; Treepath< SyntacticTree > result = current ; while( current != null ) { final SyntacticTree tree = current.getTreeAtEnd() ; switch ( state ) { case OUTSIDE_PARAGRAPH: state = evaluate( tree, TreeManglingConstants.PARAGRAPH_NODEKINDS_CONTAINING_URL, State.INSIDE_PARAGRAPH, State.OUTSIDE_PARAGRAPH ) ; if( State.INSIDE_PARAGRAPH == state ) { paragraph = current ; } break ; case INSIDE_PARAGRAPH : state = evaluate( tree, TreeManglingConstants.CANDIDATE_URL_NAME_NODEKINDS, State.CANDIDATE_URL_NAME, evaluate( tree, URL_LITERAL, State.URL ) ) ; if( state == State.CANDIDATE_URL_NAME ) { treepathToName = current ; } break ; case CANDIDATE_URL_NAME : state = evaluate( tree, TreeManglingConstants.SEPARATOR_NODEKINDS, // Loop on this state State.CANDIDATE_URL_NAME, // if separators. evaluate( tree, URL_LITERAL, State.URL ) ) ; if( state != State.CANDIDATE_URL_NAME && state != State.URL ) { treepathToName = null ; } break ; case URL : break ; default : throw new IllegalStateException( "Unsupported: " + state ) ; } if( State.URL == state ) { if( null != treepathToName ) { current = TreepathTools.removeSubtree( current, treepathToName ) ; } current = replaceByExternalLink( current, treepathToName ) ; treepathToName = null ; state = State.INSIDE_PARAGRAPH ; } result = current ; if( ( State.CANDIDATE_URL_NAME == state /*&& hasOnlyUrlLiteralChild( current.getTreeAtEnd() )*/ ) || current.getTreeAtEnd().isOneOf( _URL ) || tree.isOneOf( TreeManglingConstants.SKIPPED_NODEKINDS_FOR_URLMANGLER ) ) { current = Traversal.Preorder.nextUp( current ) ; } else { current = PREORDER.next( current ) ; } if( paragraph == null && state == State.INSIDE_PARAGRAPH ) { throw new Error( "Code inconsistency" ) ; } if( current != null && paragraph != null && ( ( paragraph.getLength() <= current.getLength() && ! TreepathTools.hasSameStartingIndicesAs( paragraph, current ) ) || ( // We test in both ways because paragraphs inside angled brackets // have a greater path length due to nesting. paragraph.getLength() > current.getLength() && ! TreepathTools.hasSameStartingIndicesAs( current, paragraph ) ) ) ) { state = State.OUTSIDE_PARAGRAPH ; paragraph = null ; treepathToName = null ; } } return result.getStart() ; } private static final Traversal.Preorder< SyntacticTree > PREORDER = Traversal.Preorder.create() ; /** * Replaces the {@link NodeKind#URL_LITERAL} node at the end of the treepath by a * {@link NodeKind#_URL} node. * * @param treepathToUrlLiteral treepath to the URL node. * @param treepathToUrlLiteral treepath to the name node, which must be * of {@link NodeKind#BLOCK_INSIDE_DOUBLE_QUOTES} type. * @return the new treepath. */ private static Treepath< SyntacticTree > replaceByExternalLink( final Treepath< SyntacticTree > treepathToUrlLiteral, final Treepath< SyntacticTree > treepathToName ) { final SyntacticTree nameTree; final SyntacticTree[] children ; if( null == treepathToName ) { children = new SyntacticTree[] { treepathToUrlLiteral.getTreeAtEnd() } ; } else { nameTree = treepathToName.getTreeAtEnd() ; children = new SyntacticTree[] { nameTree, treepathToUrlLiteral.getTreeAtEnd() } ; } final SyntacticTree urlTree = new SimpleTree( NodeKind._URL, children ) ; return TreepathTools.replaceTreepathEnd( treepathToUrlLiteral, urlTree ) ; } private static State evaluate( final SyntacticTree tree, final NodeKind nodeKind, final State positive ) { return evaluate( tree, ImmutableSet.of( nodeKind ), positive, State.INSIDE_PARAGRAPH ) ; } private static State evaluate( final SyntacticTree tree, final Set< NodeKind > nodeKinds, final State positive, final State negative ) { return tree.isOneOf( nodeKinds ) ? positive : negative ; } private static boolean hasOnlyUrlLiteralChild( final SyntacticTree tree ) { boolean foundOnlyOneUrl = false ; for( final SyntacticTree child : tree.getChildren() ) { if( child.isOneOf( NodeKind.URL_LITERAL ) ) { if( foundOnlyOneUrl ) { return false ; } else { foundOnlyOneUrl = true ; } } else if( ! TreeManglingConstants.SEPARATOR_NODEKINDS.contains( child.getNodeKind() ) ) { return false ; } } return foundOnlyOneUrl ; } private enum State { OUTSIDE_PARAGRAPH, INSIDE_PARAGRAPH, CANDIDATE_URL_NAME, URL } }