/*
* LimeXMLSchemaFieldExtractor.java
*
* Created on May 1, 2001, 1:23 PM
*/
package com.limegroup.gnutella.xml;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* Helper class to extract field names from a schema document
* Note: This class is incomplete. It works only for subset of schemas.
* Some standard API should be used when available.
*<p>
* Some of Many Limitations:
* <ul>
* <li>Cant's use IDREF
* </li>
* <li> might have problems if same field name is used in two different
* contexts in the schema document (attribute names are no problem)
* </li>
* <li>Will work only if schema is valid. If schema is invalid (has errors),
* the result may be unpredictable
* </li>
* <li> Doesn't resolve references to other schemas </li>
* <li> simpleType tag shouldn't be defined independently </li>
* </ul>
* Its just a 'quick & dirty' approach to extract the field names. Whenever
* available, a standard parser should be used for parsing schemas. It is
* beyond the scope of current project to implement a parser that works with
* all the schemas.
* @author asingla
*/
class LimeXMLSchemaFieldExtractor
{
/**
* The map from names to corresponding SchemaFieldInfoList
*/
private Map _nameSchemaFieldInfoListMap = new HashMap();
/**
* A dummy name to be used when there's no name for a field
*/
private static final String DUMMY = "DUMMY";
/**
* A dummy name that can be used for a simple type
*/
private static final String DUMMY_SIMPLETYPE = "DUMMY_SIMPLETYPE";
/**
* Set of primitive types (as per XML Schema specifications)
*/
private static final Set PRIMITIVE_TYPES = new HashSet();;
/**
* A counter to generate unique number which can be appened to strings
* to form unique strings
*/
private int _uniqueCount = 1;
/**
* The last autogenerated name for 'complexType' element
*/
private String _lastUniqueComplexTypeName = "";
/**
* The last autogenerated name for 'complexType' element
*/
private SchemaFieldInfo _lastFieldInfoObject = null;
/**
* The field names that are referenced/used from some other field
* (ie which can not be root element)
*/
private Set _referencedNames = new HashSet();
//initialize the static variables
static
{
//fill it with primitive types
PRIMITIVE_TYPES.add("xsi:string");
PRIMITIVE_TYPES.add("string");
PRIMITIVE_TYPES.add("xsi:boolean");
PRIMITIVE_TYPES.add("boolean");
PRIMITIVE_TYPES.add("xsi:float");
PRIMITIVE_TYPES.add("float");
PRIMITIVE_TYPES.add("xsi:double");
PRIMITIVE_TYPES.add("double");
PRIMITIVE_TYPES.add("xsi:decimal");
PRIMITIVE_TYPES.add("decimal");
PRIMITIVE_TYPES.add("xsi:timeDuration");
PRIMITIVE_TYPES.add("timeDuration");
PRIMITIVE_TYPES.add("xsi:recurringDuration");
PRIMITIVE_TYPES.add("recurringDuration");
PRIMITIVE_TYPES.add("xsi:binary");
PRIMITIVE_TYPES.add("binary");
PRIMITIVE_TYPES.add("xsi:uriReference");
PRIMITIVE_TYPES.add("uriReference");
PRIMITIVE_TYPES.add("xsi:ID");
PRIMITIVE_TYPES.add("ID");
PRIMITIVE_TYPES.add("xsi:IDREF");
PRIMITIVE_TYPES.add("IDREF");
PRIMITIVE_TYPES.add("xsi:ENTITY");
PRIMITIVE_TYPES.add("ENTITY");
PRIMITIVE_TYPES.add("xsi:NUMTOKEN");
PRIMITIVE_TYPES.add("NUMTOKEN");
PRIMITIVE_TYPES.add("xsi:Qname");
PRIMITIVE_TYPES.add("Qname");
}
/**
* Returns a list of fields in the passed document.
* @param document The XML Schema documnet from where to extract fields
* @requires The document be a valid XML Schema without any errors
* @return A list (of SchemaFieldInfo) containing all the fields in the
* passed document.
* @throws <tt>NullPointerException</tt> if the <tt>Document</tt> argument
* is <tt>null</tt>
*/
public List getFields(Document document) {
if(document == null) {
throw new NullPointerException("null document");
}
//traverse the document and gather information
Element root = document.getDocumentElement();
traverse(root);
//now get the root element below <xsd:schema>
String rootElementName = getRootElementName();
//create a list to store the field names
List fieldNames = new LinkedList();
//fill the list with field names
fillWithFieldNames(fieldNames,
(List)_nameSchemaFieldInfoListMap.get(rootElementName),
rootElementName);
//return the list of field names
return fieldNames;
}
/**
* Fills the passed list of fieldnames with fields from
* the passed fieldInfoList.
* @param prefix The prefix to be prepended to the new fields
* being added
*/
private void fillWithFieldNames(List fieldNames,
List fieldInfoList,
final String prefix) {
//get the iterator over the elements in the fieldInfoList
Iterator iterator = fieldInfoList.iterator();
//iterate
while(iterator.hasNext()) {
//get the next SchemaFieldInfoPair
SchemaFieldInfoPair fieldInfoPair = (SchemaFieldInfoPair)iterator.next();
//get the field type set corresponding to this field pair's type
List newSchemaFieldInfoList
= (List)_nameSchemaFieldInfoListMap.get(
fieldInfoPair.getSchemaFieldInfo().getType());
//get the field
String field = fieldInfoPair.getField();
//get the field info object for this field
SchemaFieldInfo fieldInfo =
fieldInfoPair.getSchemaFieldInfo();
//if datatype is not defined elsewhere in the schema (may be
//because it is a primitive type or so)
if(newSchemaFieldInfoList == null)
{
//if not a dummy field
if(!isDummy(field))
{
//set the field name in the field info
fieldInfo.setCanonicalizedFieldName(prefix
+ XMLStringUtils.DELIMITER + field);
}
else
{
//else just add the prefix (without field, as the
//field is a dummy)
//set the field name in the field info
fieldInfo.setCanonicalizedFieldName(prefix);
}
//add to fieldNames
fieldNames.add(fieldInfo);
}
else
{
//else (i.e. when the datatype is further defined)
//if not a dummy field
if(!isDummy(field))
{
//recursively call the method with the new values
//change the prefix to account for the field
fillWithFieldNames(fieldNames,newSchemaFieldInfoList,
prefix + XMLStringUtils.DELIMITER
+ field);
}
else
{
//recursively call the method with the new values
//prefix is not changed (since the field is dummy)
fillWithFieldNames(fieldNames,newSchemaFieldInfoList,prefix);
}
}
}
}
/**
* Tests if the passed field is a dummy field
* @return true, if dummy, false otherwise
*/
private boolean isDummy(String field)
{
if(field.trim().equals(DUMMY))
return true;
return false;
}
/**
* Returns the root element below <xsd:schema>
*/
private String getRootElementName()
{
//get the set of keys in _nameSchemaFieldInfoListMap
//one of this is the root element
Set possibleRoots = ((HashMap)((HashMap)_nameSchemaFieldInfoListMap).clone()).keySet();
//Iterate over set of _referencedNames
//and remove those from possibleRoots
Iterator iterator = _referencedNames.iterator();
while(iterator.hasNext())
{
//remove from set of possibleRoots
possibleRoots.remove(iterator.next());
}
//return the first element in the set
Iterator possibleRootsIterator = possibleRoots.iterator();
return (String)possibleRootsIterator.next();
}
/**
* Traverses the given node as well as its children and fills in the
* datastructures (_nameSchemaFieldInfoListMap, _referencedNames etc) using
* the information gathered
* @param n The node which has to be traveresed (along with its children)
* @modifies this
*/
private void traverse(Node n)
{
//get the name of the node
String name = n.getNodeName();
//if element
if(isElementTag(name))
{
//process the element tag and gather specific information
processElementTag(n);
//get and process children
NodeList children = n.getChildNodes();
int numChildren = children.getLength();
for(int i=0;i<numChildren; i++)
{
//traverse the child
Node child = children.item(i);
traverse(child);
}
}
else if(isComplexTypeTag(name))
{
//if its a complex type tag, process differently.
processComplexTypeTag(n);
}
else if(isSimpleTypeTag(name))
{
//check for enumeration
processSimpleTypeForEnumeration(n, _lastFieldInfoObject);
}
else
{
//traverse children
traverseChildren(n);
}
}
/**
* Processes the 'complexType' tag (gets the structure of a complex type)
* @param n The node having 'complexType' tag
*/
private void processComplexTypeTag(Node n)
{
String name = _lastUniqueComplexTypeName;
//get attributes
NamedNodeMap attributes = n.getAttributes();
//get name attribute
Node nameAttribute = attributes.getNamedItem("name");
if(nameAttribute != null)
{
name = nameAttribute.getNodeValue();
}
//get new field info list
List fieldInfoList = new LinkedList();
//get and process children
NodeList children = n.getChildNodes();
int numChildren = children.getLength();
for(int i=0;i<numChildren; i++)
{
Node child = children.item(i);
processChildOfComplexType(child,fieldInfoList);
}
//add mapping to _nameSchemaFieldInfoListMap
_nameSchemaFieldInfoListMap.put(name, fieldInfoList);
//also add to the _referencedNames
_referencedNames.add(name);
}
/**
* Processes the child of a 'complexType' element
* @param n The child to be processed
* @param fieldInfoList The list to which information related to the child
* is to be put
* @modifies fieldInfoList
*/
private void processChildOfComplexType(Node n,
List fieldInfoList)
{
//get the name of the node
String nodeName = n.getNodeName();
//if element
if(isElementTag(nodeName))
{
processChildElementTag(n,fieldInfoList);
}
else if(isAttributeTag(nodeName))
{
processChildAttributeTag(n,fieldInfoList);
}
else
{
//get the child nodes of this node, and process them
NodeList children = n.getChildNodes();
int numChildren = children.getLength();
for(int i=0;i<numChildren; i++)
{
Node child = children.item(i);
processChildOfComplexType(child,fieldInfoList);
}
}
}
/**
* Processes the child that has the "element' tag
* @param n child node to be processed
* @param fieldInfoList The set to which information related to the child
* is to be put
* @modifies fieldInfoList
*/
private void processChildElementTag(Node n, List fieldInfoList)
{
//get attributes
NamedNodeMap attributes = n.getAttributes();
//schema field info for this element
SchemaFieldInfo schemaFieldInfo = null;
//get name attribute
Node nameAttribute = attributes.getNamedItem("name");
if(nameAttribute == null)
{
//get ref attribute
Node refAttribute = attributes.getNamedItem("ref");
if(refAttribute == null)
{
//return, cant do anything
return;
}
//get the ref name
String refName = refAttribute.getNodeValue();
//create schema field info
schemaFieldInfo = new SchemaFieldInfo(refName);
//add mapping to fieldInfoList
fieldInfoList.add(new SchemaFieldInfoPair(refName,
schemaFieldInfo));
//also add the refName to set of _referencedNames
_referencedNames.add(refName);
}
else
{
String name = nameAttribute.getNodeValue();
//get type attribute
Node typeAttribute = attributes.getNamedItem("type");
String typeName;
if(typeAttribute != null)
{
typeName = typeAttribute.getNodeValue();
}
else
{
typeName = getUniqueComplexTypeName();
//also store it in _lastUniqueComplexTypeName for future use
_lastUniqueComplexTypeName = typeName;
}
//create schema field info
schemaFieldInfo = new SchemaFieldInfo(removeNameSpace(typeName));
//add mapping to fieldInfoList
fieldInfoList.add(new SchemaFieldInfoPair(name,
schemaFieldInfo));
//initialize the _lastFieldInfoObject for enumeration types
_lastFieldInfoObject = schemaFieldInfo;
//traverse children
traverseChildren(n);
}
}
/**
* Removes the namespace part from the passed string
* @param typeName The string whose namespace part is to be removed
* @return The string after removing the namespace part (if present).
* For eg If the passed string was "ns:type", the returned value will
* be "type"
*/
private String removeNameSpace(String typeName)
{
//if no namespace part
if(typeName.indexOf(':') == -1)
{
//return the original string
return typeName;
}
else
{
//return the part of the string without namespace
return typeName.substring(typeName.indexOf(':') + 1);
}
}
/**
* Processes the attribute child element
* @param n The node whose child needs to be processed
* @param fieldInfoList The set to which information related to the child
* is to be put
* @modifies fieldInfoList
*/
private void processChildAttributeTag(Node n, List fieldInfoList)
{
//get attributes
NamedNodeMap attributes = n.getAttributes();
//get name
Node nameAttribute = attributes.getNamedItem("name");
if(nameAttribute == null)
{
//cant do much, return
return;
}
//append DELIMITER after name of the attribute (as per convention
//@see XMLStringUtils
String name = nameAttribute.getNodeValue() + XMLStringUtils.DELIMITER;
//get type
Node typeAttribute = attributes.getNamedItem("type");
String typeName;
if(typeAttribute == null)
{
typeName = DUMMY_SIMPLETYPE;
}
else
{
typeName = typeAttribute.getNodeValue();
}
//get fieldinfo object out of type
SchemaFieldInfo fieldInfo = new SchemaFieldInfo(removeNameSpace(typeName));
Node editableAttribute = attributes.getNamedItem("editable");
if(editableAttribute != null) {
if(editableAttribute.getNodeValue().equalsIgnoreCase("false"))
fieldInfo.setEditable(false);
}
Node hiddenAttribute = attributes.getNamedItem("hidden");
if(hiddenAttribute != null) {
if(hiddenAttribute.getNodeValue().equalsIgnoreCase("true"))
fieldInfo.setHidden(true);
}
Node defaultVizAttribute = attributes.getNamedItem("defaultViz");
if(defaultVizAttribute != null) {
if(defaultVizAttribute.getNodeValue().equalsIgnoreCase("true"))
fieldInfo.setDefaultVisibility(true);
}
Node widthAttribute = attributes.getNamedItem("width");
if(widthAttribute != null) {
try {
int i = Integer.parseInt(widthAttribute.getNodeValue());
fieldInfo.setDefaultWidth(i);
} catch(NumberFormatException ignored) {}
}
//test for enumeration
processSimpleTypeForEnumeration(n, fieldInfo);
//add the attribute to the fieldInfoList
addAttributeSchemaFieldInfoPair(
new SchemaFieldInfoPair(name, fieldInfo), fieldInfoList);
//add mapping to fieldInfoList
// fieldInfoList.addFirst(new SchemaFieldInfoPair(name, fieldInfo));
}
/**
* Adds the passed schemaFieldInfoPair (which came from some attribute
* in schema to the passed fieldInfoList.
* This is don eso that the client gets attributes before the other
* child elements (Summet needs it), and also so that attributes remain
* in order.
*/
private void addAttributeSchemaFieldInfoPair(
SchemaFieldInfoPair schemaFieldInfoPair,
List fieldInfoList)
{
int attributeCount = 0;
//iterate over the fieldInfoList
for(Iterator iterator = fieldInfoList.iterator();
iterator.hasNext();)
{
//get the next element in the list
SchemaFieldInfoPair nextElement =
(SchemaFieldInfoPair)iterator.next();
//if the element is an attribute
if(isAttribute(nextElement.getField()))
{
//increment the count of attributes
attributeCount++;
}
else
{
//break out of the loop (The attributes are placed only in
//the beginning of the fieldInfoList, before any other element)
break;
}
}
//now add the passed schemaFieldInfoPair after the existing
//attributes
fieldInfoList.add(attributeCount, schemaFieldInfoPair);
}
/**
* Tests the given node if it has enumerative type. If yes, then
* records the info (enumerations) in the passed fieldInfo
* object
*/
private static void processSimpleTypeForEnumeration(Node n,
SchemaFieldInfo fieldInfo)
{
//iterate over the child nodes to check for enumeration
NodeList children = n.getChildNodes();
int numChildren = children.getLength();
for(int i=0;i<numChildren; i++)
{
//get the child node
Node child = children.item(i);
//get the name of the node
String nodeName = child.getNodeName();
//if isnt an enumeration tag
if(!isEnumerationTag(nodeName))
{
//process this node (a child of it may be enumeration
//element
processSimpleTypeForEnumeration(child, fieldInfo);
}
else
{
//get the value attribute
Node nameAttribute = child.getAttributes().getNamedItem("name");
Node valueAttribute = child.getAttributes().getNamedItem("value");
String name = null, value = null;
if(nameAttribute != null)
name = nameAttribute.getNodeValue();
if(valueAttribute != null)
value = valueAttribute.getNodeValue();
//add the enumeration to fieldInfo
if(value != null && !value.equals("")) {
if(name == null || name.equals(""))
fieldInfo.addEnumerationNameValue(value, value);
else
fieldInfo.addEnumerationNameValue(name, value);
}
}
}
}
/**
* traverses the children of the passed node
*/
private void traverseChildren(Node n)
{
//get and process children
NodeList children = n.getChildNodes();
int numChildren = children.getLength();
for(int i=0;i<numChildren; i++)
{
//traverse the child
Node child = children.item(i);
traverse(child);
}
}
/**
* Tests if the given tag denotes a complex type
* @return true, if is a complex type tag, false otherwise
*/
private boolean isComplexTypeTag(String tag)
{
if(tag.trim().equals("complexType")
|| tag.trim().equals("xsd:complexType"))
{
return true;
}
else
{
return false;
}
}
/**
* Tests if the given tag denotes a simple type
* @return true, if is a complex type tag, false otherwise
*/
private boolean isSimpleTypeTag(String tag)
{
if(tag.trim().equals("simpleType")
|| tag.trim().equals("xsd:simpleType"))
{
return true;
}
else
{
return false;
}
}
/**
* Tests if the given tag denotes a attribute
* @return true, if is an attribute tag, false otherwise
*/
private boolean isAttributeTag(String tag)
{
if(tag.trim().equals("attribute") || tag.trim().equals("xsd:attribute"))
return true;
return false;
}
/**
* Gathers information from the element tag and updates the element
* name & type information in _nameSchemaFieldInfoListMap
* @param n The element node that needs to be processed
* @modifies this
*/
private void processElementTag(Node n)
{
//get attributes
NamedNodeMap attributes = n.getAttributes();
//get name attribute
Node nameAttribute = attributes.getNamedItem("name");
//return if doesnt have name attribute
if(nameAttribute == null)
return;
//get the name of the element
String name = nameAttribute.getNodeValue();
//get type attribute
Node typeAttribute = attributes.getNamedItem("type");
String typeName;
//if type is specified in the element tag
if(typeAttribute != null)
{
//get the type name
typeName = typeAttribute.getNodeValue();
}
else
{
//else assign a new unique name for this type
typeName = getUniqueComplexTypeName();
//also store it in _lastUniqueComplexTypeName for future use
_lastUniqueComplexTypeName = typeName;
}
//add mapping to _nameSchemaFieldInfoListMap
addToSchemaFieldInfoListMap(name, typeName);
}
/**
* @modifies _uniqueCount
*/
private String getUniqueComplexTypeName()
{
return "COMPLEXTYPE___" + _uniqueCount++;
}
/**
* Adds the mapping for the passed field to a new SchemaFieldInfoList,
* containing a SchemaFieldInfo element initialized with the passed
* typeName
*/
private void addToSchemaFieldInfoListMap(String field, String typeName)
{
//get new fieldinfo list
List fieldInfoList = new LinkedList();
fieldInfoList.add(new SchemaFieldInfoPair(DUMMY, new SchemaFieldInfo(
removeNameSpace(typeName))));
//add mapping to _nameSchemaFieldInfoListMap
_nameSchemaFieldInfoListMap.put(field, fieldInfoList);
//add type name to the referenced names set
_referencedNames.add(removeNameSpace(typeName));
}
/**
* Tests if the passed tag is a element tag
* @return true, if element tag, false otherwise
*/
private static boolean isElementTag(String tag)
{
if(tag.trim().equals("element") || tag.trim().equals("xsd:element"))
return true;
return false;
}
/**
* Tests if the passed tag is a enumeration tag
* @return true, if enumeration tag, false otherwise
*/
private static boolean isEnumerationTag(String tag)
{
if(tag.trim().equals("enumeration")
|| tag.trim().equals("xsd:enumeration"))
return true;
return false;
}
/**
* Tests if the passed string represents attribute as per the
* canonicalized field conventions
* @return true, if attribute field, false otherwise
*/
public boolean isAttribute(String field)
{
//return true if ends with the delimiter used to represent
//attributes
if(field.endsWith(XMLStringUtils.DELIMITER))
return true;
else
return false;
}
/**
* Stores the field and corresponding field information
*/
private static class SchemaFieldInfoPair
{
/**
* Name of the field
*/
private String _field;
/**
* Information pertaining to this field
*/
private SchemaFieldInfo _fieldInfo;
/**
* creates a new SchemaFieldInfoPair using the passed values
*/
public SchemaFieldInfoPair(String field, SchemaFieldInfo fieldInfo)
{
this._field = field;
this._fieldInfo = fieldInfo;
}
public String getField()
{
return _field;
}
public SchemaFieldInfo getSchemaFieldInfo()
{
return _fieldInfo;
}
public String toString()
{
return "[" + _field + ":" + _fieldInfo + "]";
}
}
}//end of class LimeXMLSchemaFieldExtractor