/* * Copyright (C) 2011 Laurent Caillette * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.novelang.treemangling; import com.google.common.base.Preconditions; import static org.novelang.parser.NodeKind.*; import org.novelang.common.SimpleTree; import org.novelang.common.SyntacticTree; import org.novelang.common.tree.Traversal; import org.novelang.common.tree.Treepath; import org.novelang.common.tree.TreepathTools; import org.novelang.parser.NodeKind; /** * Rehiererachizes embedded lists, wrapping them in * {@link org.novelang.parser.NodeKind#_EMBEDDED_LIST_ITEM}. * * @author Laurent Caillette */ public class EmbeddedListMangler { private EmbeddedListMangler() { } /** * Rehierarchize embedded list items. */ public static Treepath< SyntacticTree > rehierarchizeEmbeddedLists( final Treepath< SyntacticTree > treepathToRehierarchize ) { Treepath< SyntacticTree > current = treepathToRehierarchize ; while( true ) { final Treepath< SyntacticTree > next ; final NodeKind parsedToken = extractParsedToken( current ) ; if( parsedToken != null ) { final NodeKind syntheticToken = getSyntheticToken( parsedToken ) ; current = insertPlaceholder( current ) ; // Now currents refers to a new _PLACEHOLDER_ child right before first raw item. // The gobbler is a treepath because it may be used as a stack. final Treepath< SyntacticTree > gobbler = createGobbler( syntheticToken ) ; final int indentation = getIndentSize( current ) ; // Hitting placeholder, but no problem. final GobbleResult result = gobbleThisIndentOrGreater( gobbler, current, indentation, parsedToken, syntheticToken ) ; // So we have gobbled every items in sequence, just leaving the placeholder. current = result.gobbled ; current = TreepathTools.replaceTreepathEnd( current, result.gobbler.getTreeAtStart() ) ; next = PREORDER.nextUp( current ) ; } else { next = PREORDER.next( current ) ; } if( null == next ) { return current.getStart() ; } else { current = next ; } } } private static final Traversal.Preorder< SyntacticTree > PREORDER = Traversal.Preorder.create() ; private static Treepath< SyntacticTree > createGobbler( final NodeKind syntheticToken ) { return Treepath.create( ( SyntacticTree ) new SimpleTree( syntheticToken ) ) ; } /** * @return {@code null} if given {@link Treepath} if no no interest, the parsed token otherwise. */ private static NodeKind extractParsedToken( final Treepath< SyntacticTree > current ) { final NodeKind parsedToken = current.getTreeAtEnd().getNodeKind() ; if( getSyntheticToken( parsedToken ) != null ) { return parsedToken ; } else { return null ; } } @SuppressWarnings( { "EnumSwitchStatementWhichMissesCases" } ) private static NodeKind getSyntheticToken( final NodeKind parsedToken ) { if( parsedToken == null ) { return null ; } else { switch( parsedToken ) { case EMBEDDED_LIST_ITEM_WITH_HYPHEN_ : return _EMBEDDED_LIST_WITH_HYPHEN ; case EMBEDDED_LIST_ITEM_NUMBERED_ : return _EMBEDDED_LIST_WITH_NUMBER_SIGN ; default : return null ; } } } private static Treepath< SyntacticTree > insertPlaceholder( final Treepath< SyntacticTree > current ) { return TreepathTools.addChildAt( current.getPrevious(), new SimpleTree( _PLACEHOLDER_ ), current.getIndexInPrevious() ) ; } /** * Gobbles all consecutive nodes which have given parsed token, and which are * of the same indent or are of a greater indent. * * @param gobbler a tree which solely holds the new embedded list structure. * @param gobbleStart where the gobbling starts in the document. Nodes of interest are removed. * @param firstIndent indent found by the caller, must be 0 or more. * @param parsedToken * @param syntheticToken * @return a non-null {@code GobbleResult} with a {@code gobbler} value never null, but with * a {@code gobbled} value that can be null when nodes of interest have all been gobbled. */ private static GobbleResult gobbleThisIndentOrGreater( Treepath< SyntacticTree > gobbler, Treepath< SyntacticTree > gobbleStart, final int firstIndent, final NodeKind parsedToken, final NodeKind syntheticToken ) { Preconditions.checkArgument( gobbleStart.getTreeAtEnd().isOneOf( _PLACEHOLDER_ ) ) ; Preconditions.checkArgument( firstIndent >= 0 ) ; do { final Gobbling gobbling = gobble( gobbleStart, firstIndent, parsedToken ) ; if( gobbling.success ) { if( firstIndent == gobbling.indentation ) { // Gobble at same indentation gobbler = TreepathTools.addChildLast( gobbler, gobbling.gobbledTree ).getPrevious() ; gobbleStart = gobbling.treepathMinusGobbled ; } else if( firstIndent < gobbling.indentation ) { // Gobble at greater indentation gobbler = TreepathTools.addChildLast( gobbler, new SimpleTree( syntheticToken ) ) ; final GobbleResult result = gobbleThisIndentOrGreater( gobbler, gobbleStart, gobbling.indentation, parsedToken, syntheticToken ) ; if( result.mayContinue ) { final Gobbling gobblingLookahead = gobble( result.gobbled, -1, parsedToken ) ; if( gobblingLookahead.indentation > firstIndent ) { // If the indentation is greater than current after gobbling all nodes of same // indent or greater, this means there is some "inbetween" indent. throw new IllegalArgumentException( "Inconsistent indentation" ) ; } gobbler = result.gobbler.getPrevious() ; gobbleStart = result.gobbled ; } else { return new GobbleResult( result.gobbler.getPrevious(), result.gobbled, false ) ; } } else { // Let caller handle smaller indentation return new GobbleResult( gobbler, gobbleStart, true ) ; } } else { return new GobbleResult( gobbler, gobbleStart, false ) ; } } while( true ) ; } /** * Gobbles one item node if possible. * <p> * Given parameter {@code gobbleStart} is the path to a node of {@code _PLACEHOLDER} kind. * The {@code _PLACEHOLDER} avoids to make the parent become childless when all children * have been gobbled (this would discard useful information about where to insert the new * rehierarchized list). * <p> * Gobbling removes following siblings of the {@code _PLACEHOLDER}. If one interesting item * (evaluated by {@link #extractParsedToken(org.novelang.common.tree.Treepath}) * is found, or if there is no following sibling, then the method returns. * <p> * If, when looking for next siblings of {@code _PLACEHOLDER_} node, a * {@link org.novelang.parser.NodeKind#WHITESPACE_} is encountered, it sets the value of * {@link Gobbling#indentation}. Otherwise, this value copies {@code knownIndentation} parameter. * <p> * Each call returns a {@code Gobbling} object containing the result of the gobble. * <p> * {@link Gobbling#success} is set to false if there was no following sibling * of interest (no raw item, all separators skipped). * <p> * {@link Gobbling#gobbledTree} is the treepath to the {@code _PLACEHOLDER} node, but whith * following siblings of interest removed. * <p> * {@link Gobbling#gobbledTree} is the gobbled item. * * * @param parsedToken * @param gobbleStart a treepath to the {@code _PLACEHOLDER} node which precedes the sequence * of raw items. * * @return a {@code Gobbling} object containing * the result of the gobble. */ private static Gobbling gobble( final Treepath< SyntacticTree > gobbleStart, int indentation, final NodeKind parsedToken ) { Preconditions.checkArgument( gobbleStart.getTreeAtEnd().isOneOf( _PLACEHOLDER_ ) ) ; Treepath< SyntacticTree > start = gobbleStart ; do { if( TreepathTools.hasNextSibling( start ) ) { final Treepath< SyntacticTree > next = TreepathTools.getNextSibling( start ) ; if( extractParsedToken( next ) != null ) { final Treepath< SyntacticTree > minusNext = TreepathTools.removeNextSibling( start ) ; return new Gobbling( minusNext, makeEmbeddedListItem( next ), indentation ) ; } else { final SyntacticTree nextTree = next.getTreeAtEnd() ; if( nextTree.isOneOf( WHITESPACE_, LINE_BREAK_ ) ) { start = TreepathTools.removeNextSibling( start ) ; if( nextTree.isOneOf( WHITESPACE_ ) ) { indentation = getWhitespaceLength( nextTree ) ; } else { indentation = 0 ; } continue ; } } } // If no next sibling at all, or no useful next sibling, then return. return new Gobbling( start ) ; } while( true ) ; } private static SyntacticTree makeEmbeddedListItem( final Treepath< SyntacticTree > treepath ) { final Iterable< ? extends SyntacticTree > children = treepath.getTreeAtEnd().getChildren() ; return new SimpleTree( _EMBEDDED_LIST_ITEM, children ) ; } /** * Holds two return values: the treepath minus the gobbled nodes, and the tree of interest. */ private static class Gobbling { private final Treepath< SyntacticTree > treepathMinusGobbled ; private final SyntacticTree gobbledTree ; private final boolean success ; private final int indentation ; private Gobbling( final Treepath< SyntacticTree > treepathMinusGobbled, final SyntacticTree gobbledTree, final int indentation ) { this.treepathMinusGobbled = Preconditions.checkNotNull( treepathMinusGobbled ) ; this.gobbledTree = Preconditions.checkNotNull( gobbledTree ) ; this.success = true ; Preconditions.checkArgument( indentation >= 0 ) ; this.indentation = indentation ; } private Gobbling( final Treepath<SyntacticTree> treepathMinusGobbled ) { this.treepathMinusGobbled = treepathMinusGobbled ; this.gobbledTree = null ; this.indentation = Integer.MIN_VALUE ; this.success = false ; } } private static class GobbleResult { private final Treepath< SyntacticTree > gobbler ; private final Treepath< SyntacticTree > gobbled ; private final boolean mayContinue ; private GobbleResult( final Treepath< SyntacticTree > gobbler, final Treepath< SyntacticTree > gobbled, final boolean mayContinue ) { this.gobbler = Preconditions.checkNotNull( gobbler ) ; this.gobbled = Preconditions.checkNotNull( gobbled ) ; this.mayContinue = mayContinue ; } } /** * Returns the length of immediate left sibling, or length of immediate left sibling of * one ancestor. * @Deprecated */ private static int getIndentSize( final Treepath< SyntacticTree > treepath ) { final Treepath< SyntacticTree > previous = treepath.getPrevious(); if( null == previous ) { return 0 ; } final int indexInPrevious = treepath.getIndexInPrevious() ; if( indexInPrevious > 0 ) { final SyntacticTree leftSiblingInHierarchy = previous.getTreeAtEnd().getChildAt( indexInPrevious - 1 ) ; if( leftSiblingInHierarchy.isOneOf( WHITESPACE_ ) ) { return getWhitespaceLength( leftSiblingInHierarchy ) ; } else { return 0 ; } } else { return getIndentSize( previous ) ; } } private static int getWhitespaceLength( final SyntacticTree tree ) { Preconditions.checkArgument( tree.isOneOf( WHITESPACE_ ) ) ; return tree.getChildCount() > 0 ? tree.getChildAt( 0 ).getText().length() : 0 ; } }