/**
*
*/
package fna.parsing;
import java.util.regex.*;
import java.util.*;
import org.jdom.Element;
/**
* @author Hong Updates
*
*/
public class DistributionParser4FNA extends EnumerativeElementParser {
/**
* @param parent
* @param text
*/
public DistributionParser4FNA(Element parent, String text, String enutag) {
super(parent, text, enutag);
// TODO Auto-generated constructor stub
}
/* (non-Javadoc)
* @see fna.parsing.ElementParser#parse()
*/
@Override
protected Element parse() {
//format text, hide [,;] in parentheses
ArrayList<String> values = new ArrayList<String>();
text = format(text);
//collect values
String[] areas = text.split("[;,]");
for(int i = 0; i<areas.length; i++){
String area = areas[i].trim();
if(area.indexOf("@")>=0){
values.addAll(allValues(area));
}else{
values.add(area);
}
}
//form elements
Iterator<String> it = values.iterator();
while(it.hasNext()){
String area = (String)it.next();
if(area.compareTo("") !=0){
Element enuelement = new Element(enutag);
enuelement.setText(area);
//System.out.println("add "+enutag+": "+area);
parent.addContent(enuelement);
}
}
return parent;
}
/**
* mexican (a@b) =>mexican(a), mexican(b)
* @param area
* @return
*/
private ArrayList<String> allValues(String area) {
ArrayList<String> values = new ArrayList<String>();
Pattern p = Pattern.compile("(.*?)\\(([^)]*?@[^)]*?)\\)(.*)");
Matcher m = p.matcher(area);
if(m.matches()){
String com = m.group(1);
String partstr = m.group(2);
String rest = m.group(3);
String[] parts = partstr.split("\\s*@\\s*");
for(int i = 0; i<parts.length; i++){
values.add(com+"("+parts[i]+")"+rest);
}
}
return values;
}
private String format(String text) {
String formated = "";
Pattern p = Pattern.compile("(.*?)(\\([^)]*,[^)]*\\))(.*)");
Matcher m = p.matcher(text);
while(m.matches()){
formated += m.group(1);
String t = m.group(2);
text = m.group(3);
t = t.replaceAll(",", "@");
formated +=t;
m = p.matcher(text);
}
formated +=text;
return formated;
}
/**
* @param args
*/
public static void main(String[] args) {
DistributionParser4FNA p = new DistributionParser4FNA(null, "mexic(a, b, c, d), asian", "distribution");
p.parse();
}
}