/* * Copyright (C) 2011 Laurent Caillette * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.novelang.rendering; import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.commons.lang.StringUtils; import org.novelang.common.Nodepath; import org.novelang.common.Problem; import org.novelang.common.Renderable; import org.novelang.common.StylesheetMap; import org.novelang.common.SyntacticTree; import org.novelang.designator.Tag; import org.novelang.outfit.DefaultCharset; import org.novelang.parser.GeneratedLexemes; import org.novelang.parser.NodeKind; import org.novelang.parser.shared.Lexeme; /** * @author Laurent Caillette */ public class RenderingTools { private RenderingTools() { } /** * Produces a text-only version of some {@code SyntacticTree}. */ public static String textualize( final SyntacticTree tree, final Charset charset ) throws UnsupportedEncodingException { return textualize( tree, charset, new PlainTextWriter( charset ) ) ; } private static String textualize( final SyntacticTree tree, final Charset charset, final FragmentWriter fragmentWriter ) throws UnsupportedEncodingException { final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream() ; try { new GenericRenderer( fragmentWriter, false ).render( new RenderableTree( tree, charset ), byteArrayOutputStream, null, null // TODO find something better ) ; } catch ( Exception e ) { throw new RuntimeException( "Should not happen, no IO expected", e ) ; } return new String( byteArrayOutputStream.toByteArray(), charset.name() ) ; } public static ImmutableSet< Tag > toTagSet( final Set< String > tagsAsStrings) { final ImmutableSet.Builder< Tag > tagSet = ImmutableSet.builder() ; for( final String tagAsString : tagsAsStrings ) { final String cleanString = toCleanStringForTag( tagAsString ) ; if( ! StringUtils.isBlank( cleanString ) ) { final Tag tag = new Tag( cleanString ) ; tagSet.add( tag ) ; } } return tagSet.build() ; } public static Set< Tag > toImplicitTagSet( final SyntacticTree tree ) { final String identifier = toImplicitIdentifier( tree ) ; return Tag.toTagSet( identifier.split( "_" ) ) ; } /** * Produces a designator name from some {@code SyntacticTree}. */ public static String toImplicitIdentifier( final SyntacticTree tree ) { // Produce a String replacing delimiters like ()[]"" and punctuation signs by '_'. String s = null; try { s = textualize( tree, DefaultCharset.RENDERING, new PlainTextWriter( DefaultCharset.RENDERING, DELIMITERS ) { @Override public void writeLiteral( final Nodepath kinship, final String word ) throws Exception { write( kinship, asLiteral( word ) ) ; } } ); } catch ( UnsupportedEncodingException e ) { throw new RuntimeException( "Should not happen with default encoding", e ) ; } s = toCleanStringForTag( s ); return s ; } private static String toCleanStringForTag( String s ) { s = s.replaceAll( "[,.;?!:]+", "_" ) ; // Replace diacritics by their "naked" version. s = replaceAll( GeneratedLexemes.getLexemes(), s ) ; // Collapse ' ', '-', '_'. s = s.replaceAll( " +", " " ) ; s = s.replaceAll( "-+", "-" ) ; s = s.replaceAll( "_+", "_" ) ; // Collapse sequences that would fool camel-casing. s = s.replaceAll( " _", "_" ) ; s = s.replaceAll( "_ ", "_" ) ; // Camel-casing: for every word preceded by a ' ' force the 1st letter to upper case. final StringBuffer buffer = new StringBuffer() ; final Matcher matcher = WORD_BUT_FIRST.matcher( s ) ; while( matcher.find() ) { final String word = matcher.group( 1 ) ; matcher.appendReplacement( buffer, firstCharacterToUpperCase( word ) ) ; } matcher.appendTail( buffer ) ; s = buffer.toString() ; // Trim '-' and '_' at the beginning. s = s.replaceAll( "\\A-+", "" ) ; s = s.replaceAll( "\\A_+", "" ) ; // Trim '-' and '_' at the end. s = s.replaceAll( "-+\\z", "" ) ; s = s.replaceAll( "_+\\z", "" ) ; // Collapse ' ', '-', '_' again, could have created some again. s = s.replaceAll( " +", " " ) ; s = s.replaceAll( "-+", "-" ) ; s = s.replaceAll( "_+", "_" ) ; // Collapse remaining sequences. s = s.replaceAll( "-_", "_" ) ; s = s.replaceAll( "_-", "_" ) ; // Discard every character which is not a letter, a digit, a '_' or a '-'. s = s.replaceAll( "[^0-9a-zA-Z\\-\\_]+", "" ) ; return s; } private static String asLiteral( final String s ) { return s.replaceAll( "[^0-9a-zA-Z]+", "-" ) ; } private static String firstCharacterToUpperCase(final String s ) { return Character.toUpperCase( s.charAt( 0 ) ) + s.substring( 1 ) ; } private static final Pattern WORD_BUT_FIRST = Pattern.compile( // v beware of the leading space. " ([0-9a-zA-Z]+(?:-[0-9a-zA-Z]+)*)" ) ; private static String replaceAll( final Map< Character, Lexeme > characterMap, final String s ) { final StringBuilder stringBuilder = new StringBuilder() ; for( final char c : s.toCharArray() ) { final Lexeme lexeme = characterMap.get( c ) ; if( lexeme != null && lexeme.hasDiacriticlessRepresentation() ) { stringBuilder.append( lexeme.getAscii62() ) ; } else { stringBuilder.append( c ) ; } } return stringBuilder.toString() ; } private static final String DELIMITER_REPLACEMENT = "_" ; private static final PlainTextWriter.DelimiterPair TAGGING_PAIR = PlainTextWriter.pair(DELIMITER_REPLACEMENT, DELIMITER_REPLACEMENT) ; private static final Map< NodeKind, PlainTextWriter.DelimiterPair > DELIMITERS = new ImmutableMap.Builder< NodeKind, PlainTextWriter.DelimiterPair>(). put( NodeKind.BLOCK_INSIDE_DOUBLE_QUOTES, TAGGING_PAIR ). put( NodeKind.BLOCK_INSIDE_PARENTHESIS, TAGGING_PAIR ). put( NodeKind.BLOCK_INSIDE_HYPHEN_PAIRS, TAGGING_PAIR ). put( NodeKind.BLOCK_INSIDE_SQUARE_BRACKETS, TAGGING_PAIR ). // put( NodeKind.BLOCK_OF_LITERAL_INSIDE_GRAVE_ACCENT_PAIRS, TAGGING_PAIR ). // put( NodeKind.BLOCK_OF_LITERAL_INSIDE_GRAVE_ACCENTS, TAGGING_PAIR ). build() ; public static class RenderableTree implements Renderable { private final SyntacticTree tree ; private final Charset charset ; public RenderableTree( final SyntacticTree tree, final Charset charset ) { this.tree = tree ; this.charset = charset ; } @Override public Iterable<Problem> getProblems() { return ImmutableList.of() ; } @Override public Charset getRenderingCharset() { return charset; } @Override public boolean hasProblem() { return false ; } @Override public SyntacticTree getDocumentTree() { return tree; } @Override public StylesheetMap getCustomStylesheetMap() { return null ; } } }