package org.outerj.daisy.diff.html.dom.helper;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.xml.sax.Attributes;
/**
* Map is used to store DOM tag attribute names and values.
* This map pays no attention to sequence of attributes.
* @author karol
*/
public class AttributesMap extends HashMap<String, String> {
/**
* constant for serialization compatibility check
*/
private static final long serialVersionUID = -6165499554111988049L;
/**
* constant for style attribute name,
* We need to treat this attribute differently to consider
* HTML elements equal if they only differ in style rules order
* (in order of style rules inside style attribute value)<br>
*/
protected static final String STYLE_ATTR = "style";
/**
* constant for class attribute name.
* We need to treat this attribute differently to consider
* HTML elements equal if they only differ in order of
* multiple CSS classes inside the class attribute value.
*/
protected static final String CLASS_ATTR = "class";
/**
* just a constant to use in processing attribute.
*/
protected static final String SPACE = " ";
/**
* just a constant to use to ignore symbols that
* do not affect real attributes value
*/
protected static final String NL_TAB_REGEXP = "\\n|\\t";
public AttributesMap() {
super();
}
/**
* This constructor converts all the attribute names to lower case.
* @param attributes
*/
public AttributesMap(Attributes attributes) {
super();
for (int i = 0; i < attributes.getLength(); i++) {
put(attributes.getQName(i).toLowerCase(), attributes.getValue(i));
}
}
/**
* This method returns true if the given {@link Attributes} contains the same
* attributes as this map, with the same qualifications for "style" and "class"
* as specified in {@link AttributesMap#equals(Object)}.
*/
public boolean hasSameAttributes(Attributes attributes)
{
if (attributes.getLength() != size())
return false;
for (int i = 0; i < attributes.getLength(); i++) {
String qName = attributes.getQName(i).toLowerCase();
String value = attributes.getValue(i);
String localValue = get(qName);
if (localValue == null)
return false;
if (!localValue.equals(value)) {
if (qName.equals(STYLE_ATTR) && equivalentStyles(value, localValue))
continue;
if (qName.equals(CLASS_ATTR) && sameClassSet(value, localValue))
continue;
return false;
}
}
return true;
}
/**
* this method returns true,
* if two maps have the same set of keys and values assigned to these keys.
* Or if the difference is only in the "style" or "class" attributes values,
* which are however equivalent. For the "class" attribute that means
* the values consist of the same set of classes, but not necessarily in the
* same order, and for the "style" attribute that means that the values
* consist of the same rules (css property : value pairs) but without
* order consideration.
*/
@Override
public boolean equals(Object obj) {
boolean equals = false;
if (obj instanceof AttributesMap) {
AttributesMap attributesMap = (AttributesMap) obj;
if(size() == attributesMap.size()){
equals = true;
for (Map.Entry<String, String> entry : entrySet())
{
String attrib = entry.getKey();
String localValue = entry.getValue();
String externalValue = attributesMap.get(attrib);
if(externalValue == null || !externalValue.equals(localValue)){
if (attrib.equals(STYLE_ATTR)){
if (equivalentStyles(localValue, externalValue)){
continue;
}
} else if (attrib.equals(CLASS_ATTR)){
if (sameClassSet(localValue, externalValue)){
continue;
}
}
equals = false;
break;
}
}
}
}
return equals;
}
@Override
public int hashCode(){
int simple = 19;
int result = 0;
for (String attr: keySet()){
result += attr.hashCode()*simple;
if (attr.equals(STYLE_ATTR)){
result += normalizeStyleString(get(attr)).hashCode();
} else if (attr.equals(CLASS_ATTR)){
result += normalizeClassString(get(attr)).hashCode();
} else {
result += get(attr).hashCode();
}
}
return result;
}
/**
* Checks if 2 values for "style" attribute of an HTML tag
* are equivalent (contain same CSS property : value pairs,
* but in different (or the same) order.
* Pairs are separated by semicolons and any amount of space,
* and names from values are separated by colon and any amount of space
* @param style1
* @param style2
* @return
*/
public static boolean equivalentStyles(String style1, String style2){
if (style1 == null){
if (style2 == null){
return true; //both are nulls
} else {
return false; //one(#2) isn't null, while other is
}
} else if (style2 == null){
return false; //one(#1) isn't null, while other is
}
//no nulls at this point
//get rid of the new line symbols and tabulation
//substituting them with spaces to not "jam" separate tokens together
style1 = style1.replaceAll(NL_TAB_REGEXP, SPACE);
style2 = style2.replaceAll(NL_TAB_REGEXP, SPACE);
//get rid of consecutive spaces
style1 = style1.replaceAll(SPACE + "++", SPACE);
style2 = style2.replaceAll(SPACE + "++", SPACE);
//get rid of leading/trailing spaces
style1 = style1.trim();
style2 = style2.trim();
//check if they were actually the same
if (style1.equals(style2)){
//literally equal except maybe for leading/trailing spaces
//and text positioning with tabs/new lines
return true;
}
//style rules in the style attribute value are
//separated by semicolon with any amount of space on either side
final char SEMICOLON = ';';
//notice, that this delimiter will "eat up" all the empty styles
//like in this case: "prop1:val1 ; ; ;;;; prop2 : val2"
//you will only get 2 tokens and this: " ; ; ;;;; " will be
//considered as a single delimiter.
//atomic group and possessive quantifier used to speed up regexp
final String DELIM = SPACE + "*+(?>" + SEMICOLON + SPACE + "*+)++";
//split those to CSS property name : value pairs
String[] styleRules1 = style1.split(DELIM);
String[] styleRules2 = style2.split(DELIM);
//should be the same amount of properties, or it's not equivalent
if (styleRules1.length != styleRules2.length){
return false;
}
//sort by CSS property name
Arrays.sort(styleRules1);
Arrays.sort(styleRules2);
//remove the spaces between property name,
//the colon and the value
final String COLON_W_SPACES =
SPACE + "*+:" + SPACE + "*+";
final String COLON = ":";
for (int i = 0; i < styleRules1.length; i++){
styleRules1[i] =
styleRules1[i].replaceFirst(COLON_W_SPACES, COLON);
styleRules2[i] =
styleRules2[i].replaceFirst(COLON_W_SPACES, COLON);
if (!styleRules1[i].equals(styleRules2[i])){
return false;
}
}
return true;
}
/**
* Checks if 2 values for "class" attribute of an HTML tag
* are equivalent (contain same CSS class names, but in different
* (or same order) Ignores new line symbols and tabulation<br>
* Example:<br>
* <code><p class="styleName1 styleName2"></code><br>
* is equivalent to <br>
* <code><p class=" styleName1 styleName2 "></code><br>
* and to <br>
* <code><p class="styleName2 styleName1"></code>
* @param classSet1
* @param classSet2
* @return true if the values are equivalent (including null values)
*/
public static boolean sameClassSet(String classSet1, String classSet2){
if (classSet1 == null){
if (classSet2 == null){
return true; //both are nulls
} else {
return false; //one(#2) isn't null, while other is
}
} else if (classSet2 == null){
return false; //one(#1) isn't null, while other is
}
//no nulls at this point
//get rid of new line and tabulation symbols
classSet1 = classSet1.replaceAll(NL_TAB_REGEXP, SPACE);
classSet2 = classSet2.replaceAll(NL_TAB_REGEXP, SPACE);
//trim leading/trailing spaces
classSet1 = classSet1.trim();
classSet2 = classSet2.trim();
if (classSet1.equals(classSet2)){
//literally equal except maybe for leading/trailing spaces
//or for white space symbols (tab and new line)
//notice that style names are case-sensitive.
return true;
}
//multiple class names in the class attributes
//are separated by spaces - split into array of single classes
final String DELIM = SPACE + "++";//"++" is possessive quantifier
//splitting by any amount of spaces in between
String[] set1 = classSet1.split(DELIM);
String[] set2 = classSet2.split(DELIM);
//should be the same amount of classes, or it's not equivalent
if (set1.length != set2.length){
return false;
}
//checking classes
Arrays.sort(set1);
Arrays.sort(set2);
return Arrays.equals(set1, set2);
}
/**
* The <code>hashCode()</code> method should correspond to
* <code>equals</code> method, so we need a way to get the
* styles attribute value in the same representation we use
* when we're comparing. We could use this method in comparison
* method, however the way comparison method is written now
* is much faster, because it can fail or succeed long before
* normalization is finished.
* @param styleVal - value of "style" attribute of an HTML tag.
* @return normalized representation of the provided value
*/
public static String normalizeStyleString(String styleVal){
if (styleVal == null || styleVal.length() == 0){
return styleVal; //nothing to Normalize
}
//no nulls at this point
//get rid of the new line symbols and tabulation
//substituting them with spaces to not "jam" separate tokens together
styleVal = styleVal.replaceAll(NL_TAB_REGEXP, SPACE);
//get rid of consecutive spaces
styleVal = styleVal.replaceAll(SPACE + "++", SPACE);
//get rid of leading/trailing spaces
styleVal = styleVal.trim();
//check if they there's anything left
if (styleVal.length() == 0){
return styleVal;
}
//style rules in the style attribute value are
//separated by semicolon with any amount of space on either side
final char SEMICOLON = ';';
//notice, that this delimiter will "eat up" all the empty styles
//like in this case: "prop1:val1 ; ; ;;;; prop2 : val2"
//you will only get 2 tokens and this: " ; ; ;;;; " will be
//considered as a single delimiter.
//atomic group and possessive quantifier used to speed up regexp
final String DELIM = SPACE + "*+(?>" + SEMICOLON + SPACE + "*+)++";
//split those to CSS property name : value pairs
String[] styleRules = styleVal.split(DELIM);
//sort by CSS property name
Arrays.sort(styleRules);
//remove the spaces between property name,
//the colon and the value
final String COLON_W_SPACES =
SPACE + "*+:" + SPACE + "*+";
final String COLON = ":";
StringBuffer result = new StringBuffer();
for (int i = 0; i < styleRules.length; i++){
result.append(styleRules[i].replaceFirst(COLON_W_SPACES, COLON))
.append("; ");
}
//take away last trailing "; "
result.setLength(result.length() - 2);
return result.toString();
}
/**
* The <code>hashCode()</code> method should correspond to
* <code>equals</code> method, so we need a way to get the
* class attribute value in the same representation we use
* when we're comparing. We could use this method in comparison
* method, however the way comparison method is written now
* is much faster, because it can fail or succeed long before
* normalization is finished.
* @param classVal - value of "class" attribute of an HTML tag.
* @return normalized representation of the provided value
*/
public static String normalizeClassString(String classVal){
if (classVal == null || classVal.length() == 0){
return classVal; //nothing to normalize
}
//no nulls at this point
//get rid of new line and tabulation symbols
classVal = classVal.replaceAll(NL_TAB_REGEXP, SPACE);
//trim leading/trailing spaces
classVal = classVal.trim();
//multiple class names in the class attributes
//are separated by spaces - split into array of single classes
final String DELIM = SPACE + "++";//"++" is possessive quantifier
//splitting by any amount of spaces in between
String[] classNames = classVal.split(DELIM);
//sorting classes
Arrays.sort(classNames);
StringBuffer result = new StringBuffer();
for (int i = 0; i < classNames.length; i++){
result.append(classNames[i]).append(SPACE);
}
//take away last space
result.setLength(result.length() - 1);
return result.toString();
}
//just for a quick test
public static void main(String[] args){
String s1 = "margin-left:50px;font-size:16pt;";
String s2 = " font-size : 16pt ; ; ; ; margin-left : 50px ";
System.out.println("equal? -" + equivalentStyles(s1, s2));
}
}