/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.annotation.internal.content;
import java.util.HashMap;
import java.util.Map;
import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;
import org.xwiki.annotation.content.AlteredContent;
import org.xwiki.annotation.content.filter.Filter;
import org.xwiki.component.annotation.Component;
/**
* Space normalizer content alterer. Will trim all leading and trailing white spaces in the passed sequence along with
* collapsing all the inner white spaces to a single space. It also replaces all sorts of white spaces such as
* non-breakable spaces with regular spaces.
*
* @version $Id: a3800f6a16dd90774b34b8fd77e9c74e16b6c2f0 $
* @since 2.3M1
*/
@Component
@Named("space-normalizer")
@Singleton
public class SpaceNormalizerContentAlterer extends AbstractContentAlterer
{
/**
* The whitespace filter, to identify all characters which are whitespace.
*/
@Inject
@Named("whitespace")
private Filter whitespaceFilter;
@Override
public AlteredContent alter(CharSequence sequence)
{
// same as filtering only that on encountering the first space in a series of whitespace, only print the first
// one
StringBuffer buffer = new StringBuffer();
Map<Integer, Integer> initialToAltered = new HashMap<Integer, Integer>();
Map<Integer, Integer> alteredToInitial = new HashMap<Integer, Integer>();
// number of refused chars
int removedChars = 0;
Character c;
// initially assume we're in whitespace printing, since we need to trim all leading spaces
boolean isInWhitespace = true;
for (int i = 0; i < sequence.length(); ++i) {
c = sequence.charAt(i);
boolean isWhitespace = !whitespaceFilter.accept(c);
// if either it's a non-whitespace or it's a whitespace but it's the first whitespace after some characters
if (!isWhitespace || (isWhitespace && !isInWhitespace)) {
// update the whitespace printing state according to the the type of the current character
isInWhitespace = isWhitespace;
// if it's whitespace print a plain space, not the char itself
buffer.append(isWhitespace ? " " : c);
// update the altered indexes for all the removed characters in this removed fragment to point to this
// position
for (int t = 0; t <= removedChars; ++t) {
// 1+0;1 // 1+1;1
initialToAltered.put(i - t, buffer.length() - 1);
}
// restore the removed chars count
removedChars = 0;
// map this altered index to the index in the original sequence
alteredToInitial.put(buffer.length() - 1, i);
} else {
removedChars++;
}
}
// if the last character is a space, remove it and add it to the removed chars
if (buffer.length() > 0 && buffer.charAt(buffer.length() - 1) == ' ') {
buffer.deleteCharAt(buffer.length() - 1);
removedChars++;
// remove the mapping from the altered to initial mapping since it doesn't exist anymore. buffer.length is
// now the old buffer length - 1
alteredToInitial.remove(buffer.length());
}
// finally update the indexes for the last stream of removed chars
if (buffer.length() > 0) {
// add the offsets for the remaining removed chars
for (int t = 0; t < removedChars; ++t) {
initialToAltered.put(sequence.length() - 1 - t, buffer.length() - 1 - 1);
}
}
return new OffsetsMapAlteredContent(buffer.toString(), sequence.length(), initialToAltered, alteredToInitial);
}
}