/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.officeimporter.internal.filter;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import javax.inject.Named;
import javax.inject.Singleton;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xwiki.component.annotation.Component;
import org.xwiki.xml.html.filter.AbstractHTMLFilter;
import org.xwiki.xml.html.filter.ElementSelector;
/**
* Replaces {@code<br/>} elements placed in between block elements with {@code<div class="wikikmodel-emptyline"/>}.
*
* @version $Id: f77175bc400964009507ebb4d48dc7c41f66f708 $
* @since 1.8M1
*/
@Component
@Named("officeimporter/linebreak")
@Singleton
public class LineBreakFilter extends AbstractHTMLFilter
{
/**
* List of block element tag names.
*/
private static final String[] BLOCK_ELEMENT_TAGS =
new String[] {TAG_P, TAG_UL, TAG_OL, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_TABLE};
/**
* Sort the block elements tag name array.
*/
static {
Arrays.sort(BLOCK_ELEMENT_TAGS);
}
@Override
public void filter(Document document, Map<String, String> cleaningParams)
{
List<Element> lineBreaksToReplace =
filterDescendants(document.getDocumentElement(), new String[] {TAG_BR}, new ElementSelector()
{
@Override
public boolean isSelected(Element element)
{
Node prev = findPreviousNode(element);
Node next = findNextNode(element);
return !(null == prev && null == next) && (isBlockElement(prev) || isBlockElement(next));
}
});
for (Element lineBreak : lineBreaksToReplace) {
Node parent = lineBreak.getParentNode();
Element element = document.createElement(TAG_DIV);
element.setAttribute(ATTRIBUTE_CLASS, "wikimodel-emptyline");
parent.insertBefore(element, lineBreak);
parent.removeChild(lineBreak);
}
}
/**
* Finds the previous sibling of the given element which is not a {@code <br/>}, an empty text node or a comment
* node.
*
* @param element the element to be analysed.
* @return previous sibling of the given element which is not a html line-break, an empty text node or a comment
* node.
*/
private Node findPreviousNode(Element element)
{
Node prev = element.getPreviousSibling();
while (prev != null && (isLineBreak(prev) || isEmptyTextNode(prev) || isCommentNode(prev))) {
prev = prev.getPreviousSibling();
}
return prev;
}
/**
* Finds the next sibling of the given element which is not a {@code <br/>}, an empty text node or a comment node.
*
* @param element the element to be analysed.
* @return next sibling of the given element which is not a html line-break, an empty text node or a comment node.
*/
private Node findNextNode(Element element)
{
Node next = element.getNextSibling();
while (next != null && (isLineBreak(next) || isEmptyTextNode(next) || isCommentNode(next))) {
next = next.getNextSibling();
}
return next;
}
/**
* Check whether the given node represents a block element.
*
* @param node the node to be checked.
* @return true if the node represents a block element.
*/
private boolean isBlockElement(Node node)
{
boolean isBlockElement = false;
if (null != node) {
for (String blockElement : BLOCK_ELEMENT_TAGS) {
isBlockElement = blockElement.equals(node.getNodeName()) ? true : isBlockElement;
}
}
return isBlockElement;
}
/**
* Checks if a node represents empty text content (white space).
*
* @param node the {@link Node}.
* @return true if the node represents white space.
*/
private boolean isEmptyTextNode(Node node)
{
return null != node && node.getNodeType() == Node.TEXT_NODE && node.getTextContent().trim().equals("");
}
/**
* Checks if a node represents an html comment.
*
* @param node the {@link Node}.
* @return true if the node is a comment node.
*/
private boolean isCommentNode(Node node)
{
return null != node && node.getNodeType() == Node.COMMENT_NODE;
}
/**
* Checks if a node represents an html line break.
*
* @param node the {@link Node}
* @return true of the node represents a line break.
*/
private boolean isLineBreak(Node node)
{
return null != node && node.getNodeName().equals(TAG_BR);
}
}