/**
* Copyright 2015 MIR@MU Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package cz.muni.fi.mir.mathmlcanonicalization.modules;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.filter.Filters;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
/**
* Removes unary operators, i.e.
* <code>//mo[count(preceding-sibling::*) = 0]</code>.
*
* <p>
* <span class="simpleTagLabel">Input</span>
* <p>
* Well-formed Presentation or Content MathML
* </p>
* <span class="simpleTagLabel">Output</span>
* <p>
* The original code with all unatry operators removed
* </p>
* <span class="simpleTagLabel">Example Input</span>
* <pre>
* <?xml version="1.0" encoding="UTF-8"?>
* <math>
* <mrow>
* <strong><mo>-</mo></strong>
* <mi>E</mi>
* <mo>=</mo>
* <mi>m</mi>
* <msup>
* <mi>c</mi>
* <mn>2</mn>
* </msup>
* </mrow>
* </math>
* </pre>
* <span class="simpleTagLabel">Example Output</span>
* <pre>
* <?xml version="1.0" encoding="UTF-8"?>
* <math>
* <mrow>
* <mi>E</mi>
* <mo>=</mo>
* <mi>m</mi>
* <msup>
* <mi>c</mi>
* <mn>2</mn>
* </msup>
* </mrow>
* </math>
* </pre>
*
* @author Michal Růžička
*/
public class UnaryOperatorRemover extends AbstractModule implements DOMModule {
private static final Logger LOGGER = Logger.getLogger(ScriptNormalizer.class.getName());
// properties key names
private static final String PM_UNARY_OPERATORS_TO_REMOVE = "pmathremoveunaryoperators";
private static final String CM_UNARY_OPERATORS_TO_REMOVE = "cmathremoveunaryoperators";
private static final XPathExpression<Element> xpPMUnaryOperators = XPathFactory.instance().compile(
"//mathml:mo[count(preceding-sibling::*) = 0]|//mo[count(preceding-sibling::*) = 0]",
Filters.element(), null,
Namespace.getNamespace("mathml", "http://www.w3.org/1998/Math/MathML"));
private static final XPathExpression<Element> xpPMSecondOperatorInDoubleOperators = XPathFactory.instance().compile(
"//mathml:mo[preceding-sibling::*[1][self::mathml:mo]]|//mo[preceding-sibling::*[1][self::mo]]",
Filters.element(), null,
Namespace.getNamespace("mathml", "http://www.w3.org/1998/Math/MathML"));
private static final XPathExpression<Element> xpCMApplyWithTwoChildrens = XPathFactory.instance().compile(
"//mathml:apply[count(child::*)=2]|//apply[count(child::*)=2]",
Filters.element(), null,
Namespace.getNamespace("mathml", "http://www.w3.org/1998/Math/MathML"));
public UnaryOperatorRemover() {
declareProperty(PM_UNARY_OPERATORS_TO_REMOVE);
declareProperty(CM_UNARY_OPERATORS_TO_REMOVE);
}
@Override
public void execute(final Document doc) {
if (doc == null) {
throw new NullPointerException("doc");
}
final Element root = doc.getRootElement();
removeUnaryOperator(root);
}
private void removeUnaryOperator(final Element rootElem) {
assert rootElem != null;
/* Presentation MathML */
final Set<String> pmCharsToRemove = getPropertySet(PM_UNARY_OPERATORS_TO_REMOVE);
if (!pmCharsToRemove.isEmpty()) {
// Unary operators
List<Element> pmElemsToRemove = xpPMUnaryOperators.evaluate(rootElem);
for (Element toRemove : pmElemsToRemove) {
if (pmCharsToRemove.contains(toRemove.getValue())) {
LOGGER.finest("Removing element '" + toRemove.getQualifiedName() + "' with value '" + toRemove.getValue() + "'.");
toRemove.detach();
} else {
LOGGER.finest("Skipping element '" + toRemove.getQualifiedName() + "' with value '" + toRemove.getValue() + "'.");
}
}
// Second of the double operators
pmElemsToRemove = xpPMSecondOperatorInDoubleOperators.evaluate(rootElem);
for (Element toRemove : pmElemsToRemove) {
if (pmCharsToRemove.contains(toRemove.getValue())) {
LOGGER.finest("Removing the second element out of double elements '" + toRemove.getQualifiedName() + "' with value '" + toRemove.getValue() + "'.");
toRemove.detach();
} else {
LOGGER.finest("Skipping the second element out of double elements '" + toRemove.getQualifiedName() + "' with value '" + toRemove.getValue() + "'.");
}
}
}
LOGGER.finer("RemoveUnaryOperator Presentation MathML finished");
/* Content MathML */
List<Element> applyWithTwoChildrens = xpCMApplyWithTwoChildrens.evaluate(rootElem);
final Set<String> cmOperatorsToRemove = getPropertySet(CM_UNARY_OPERATORS_TO_REMOVE);
for (Element applyElem : applyWithTwoChildrens) {
Element operator = applyElem.getChildren().get(0);
if (cmOperatorsToRemove.contains(operator.getName())) {
Element operand = applyElem.getChildren().get(1);
LOGGER.finest("Removing operator '" + operator.getQualifiedName() + "' for operand '" + operand.getQualifiedName() + "'.");
operand.detach();
Element parent = applyElem.getParentElement();
int applyElemIndex = parent.indexOf(applyElem);
parent.setContent(applyElemIndex, operand);
applyElem.detach();
}
}
LOGGER.finer("RemoveUnaryOperator Content MathML finished");
}
}