VolumeVerifier.java example

Explorer
phenoscape-nlp-master
- parsing-gui
  - lib
    - elk-distribution-0.3.2-owlapi-library
      - examples
        org
        semanticweb
        elk
        owlapi
        examples
        QueryingUnnamedClassExpressions.java
        QueryingWithNamedClasses.java
        RetrievingInstances.java
        SavingInferredAxioms.java
  - src
    - com
      - swtdesigner
        SWTResourceManager.java
    - fna
- phenoscapeII
  - src
package fna.parsing;

import java.io.File;
import java.io.FileInputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;

/**
 * Hong. 08/04/09
 *  a) add ./text[@case=""] and fixBrokenNames
 *  b) TODO: output a report of potential errors for the user to check manually.
 * To verify the volume document format.
 * 
 * Style verify:
 * 
 * Number verify:
 * 
 * Name verify:
 * 
 * @author chunshui
 */
@SuppressWarnings({ "unchecked", "unused" })
public class VolumeVerifier extends Thread {

	private String target;

	private String conf;

	private ProcessListener listener;

	private String path;

	private int total;

	private File[] extracted;

	private TaxonIndexer ti;
	
	private String namelist = "|";
	
	private static final Logger LOGGER = Logger.getLogger(VolumeVerifier.class);
	public VolumeVerifier(ProcessListener listener) {
		this.listener = listener;

		target = Registry.TargetDirectory+System.getProperty("file.separator");
		conf = Registry.ConfigurationDirectory;

		path = target + "extracted";
	}

	public void run () {
		listener.setProgressBarVisible(true);
		verify();
		listener.setProgressBarVisible(false);
	}
	public void verify() {
		// get the extracted files list
		listener.progress(1);
		File directory = new File(path);
		extracted = directory.listFiles();
		total = extracted.length;
		listener.info("To verify files: " + extracted.length);

		listener.progress(10);
		// init the taxon index
		ti = new TaxonIndexer(conf); // TODO: add the
		// taxon index to
		// conf
		ti.build();
		listener.info("Taxon index initialized.");

		listener.progress(30);
		// verify the files
		listener.info("To verify the files");
		if (!verifyFile()) {
			listener.info("File verify failure!");
			return;
		} else {
			listener.info("File verify success!");
		}
		listener.progress(50);
		// verify the style
		listener.info("To verify the style");
		if (!verifyStyle()) {
			listener.info("Style verify failure!");
			return;
		} else {
			listener.info("Style verify success!");
		}
		
		listener.progress(70);
		// verify the number
		listener.info("To verify the number");
		if (!verifyNumber()) {
			listener.info("Number verify failure!");
			return;
		} else {
			listener.info("Number verify success!");
		}

		listener.progress(90);
		// verify the name
		listener.info("To verify the name");
		if (!verifyName()) {
			listener.info("Name verify failure!");
			return;
		} else {
			listener.info("Name verify success!");
		}

		listener.info("Volume format verify success!");
		
		// write the updated TaxonIndexer
		TaxonIndexer.saveUpdated(conf, ti);
		listener.info("Update the TaxonIndexer success!");
		listener.progress(99);
	}

	/**
	 * To verify the extracted files validity.
	 * 
	 * @return passed
	 */
	private boolean verifyFile() {
		boolean passed = true;

		for (int i = 1; i <= total; i++) {
			File file = new File(path, i + ".xml");

			if (!file.exists()) {
				listener.info("", file.getName(), "File does not exist!");
				passed = false;
			} else if (!file.isFile()) {
				listener.info("",  file.getName(), "File is not a file!");
				passed = false;
			}
		}

		return passed;
	}

	/**
	 * To verify the style of the document.
	 * 
	 * @return passed
	 */
	private boolean verifyStyle() {
		boolean passed = true;

		try {
			// load style mapping
			Properties props = new Properties();

			// read in the translation properties;
			props.load(new FileInputStream(conf + System.getProperty("file.separator")+"style-mapping.properties"));

			for (int i = 1; i <= total; i++) {
				File file = new File(path, i + ".xml");

				SAXBuilder builder = new SAXBuilder();
				Document doc = builder.build(file);
				Element root = doc.getRootElement();

				// find all <style> tags
				List<Element> styleList = XPath.selectNodes(root,
						"/treatment/paragraph/style");

				// iterate over the <style> tags
				for (Iterator<Element> iter = styleList.iterator(); iter.hasNext();) {
					Element se = (Element) iter.next();
					String style = se.getText();
					String mapping = props.getProperty(style);

					// verify if the style has a mapping
					if (mapping == null || mapping.length() == 0) {
						listener.info("", file.getName(), "Invalid style "
								+ style);
						passed = false;
					}
				}

			}
		} catch (Exception e) {
			LOGGER.error("VolumeVerifier : verifyStyle - Exception ", e);
			throw new ParsingException(e);
		}

		return passed;
	}

	/**
	 * To verify the taxon number according to the taxon index.
	 * 
	 * If taxon index is not built yet, build it here.
	 * 
	 * @return passed True if the verify passed
	 */
	private boolean verifyNumber() {
		boolean passed = true;
		if(ti.emptyNumbers()){
			listener.info("no taxon list number to check against, build taxon numbers. ");
			fillInNumbers();
			listener.info("check passed");
			return passed;
		}
		try {
			for (int i = 1; i <= total; i++) {
				File file = new File(path, i + ".xml");

				SAXBuilder builder = new SAXBuilder();
				Document doc = builder.build(file);
				Element root = doc.getRootElement();

				Element te = (Element) XPath.selectSingleNode(root,
						"/treatment/paragraph/text");
				String text = te.getText();

				String number = ti.getNumber(i - 1);
				if (!text.startsWith(number)) {
					// hong 6/26/08: make 12c1 and 12c.1 match
					// extract number 12c.1 from text and save it in ti
					Pattern p = Pattern.compile("(.*?[a-z])(\\d+)");
					Matcher m = p.matcher(number);
					boolean check = false;
					if (m.matches()) {
						String pt = m.group(1) + "\\.?" + m.group(2);
						p = Pattern.compile(pt);
						m = p.matcher(text);
						if (m.find()) {
							ti.addNumber(text.substring(m.start(), m.end())); //add one by one in sequence
							check = true;
						}
					}
					if (!check) {
						listener.info("", file.getName(),
								"Invalid number! Expected: " + number);
						passed = false;
					}
				}
			}
		} catch (Exception e) {
			LOGGER.error("VolumeVerifier : verifyNumber - Exception ", e);
			e.printStackTrace();
			throw new ParsingException(e);
		}

		return passed;
	}

	private void fillInNumbers(){
		try {
			for (int i = 1; i <= total; i++) {
				File file = new File(path, i + ".xml");
				SAXBuilder builder = new SAXBuilder();
				Document doc = builder.build(file);
				Element root = doc.getRootElement();
				Element te = (Element) XPath.selectSingleNode(root,
						"/treatment/paragraph/text");
				String text = "";
				if(te != null){//TODO: te should not be null, but for 295.xml JDOM screw-up
					text = te.getText();
					text = text.replaceAll("\\s[a-zA-Z].*", ""); //1. Amstersdfds 12.c Ames
				}
				if(text.matches("^\\d.*")){
					ti.addNumber(text); //add one by one in sequence
					System.out.println("add numbers :"+text+" for file "+i+".xml");
				}else{
					ti.addNumber("0"); //add one by one in sequence
					System.out.println("add numbers :"+0+" for file "+i+".xml");
				}
				
			}
		} catch (Exception e) {
			LOGGER.error("VolumeVerifier : fillInNumbers - Exception ", e);
			e.printStackTrace();
				throw new ParsingException(e);
		}
	}
	/**
	 * To verify the taxon name according to the taxon index
	 * 
	 * If taxon index is not built yet, build it here.
	 * 
	 * TODO: verify the var name
	 * 
	 * @return pass True if the verify passed
	 */
	private boolean verifyName() {
		boolean passed = true;
		if(ti.emptyNames()){
			listener.info("no taxon list name to check against, build taxon names. ");
			fillInNames();
			listener.info("check passed");
			return passed;
		}
		try {
			for (int i = 1; i <= total; i++) {
				File file = new File(path, i + ".xml");

				SAXBuilder builder = new SAXBuilder();
				Document doc = builder.build(file);
				Element root = doc.getRootElement();


				String number = ti.getNumber(i - 1);
				String name = ti.getName(i - 1);
				
				//Element te = (Element) XPath.selectSingleNode(root,	"/treatment/paragraph/text");
				Element pe = (Element) XPath.selectSingleNode(root,	"/treatment/paragraph");
				
				String extractedname = extractName(pe, i + ".xml");
	
				
				//boolean check = (extractedname == null) || (name.indexOf(extractedname) >= 0 || extractedname.indexOf(name) >= 0);
				if (! match(name, extractedname)) {//HongCui
					passed = false;
					listener.info("", file.getName(),
							"Invalid name. Expected: " + number + "." + name); // TODO: if the logic is right after merged with hong's code?
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
			LOGGER.error("VolumeVerifier : verifyName - Exception ", e);
			throw new ParsingException(e);//HongCui
		}

		return passed;
	}
	
	//if all words in name are in answer, then return true
	private boolean match (String answer, String name){
		if(name == null){
			return false;
		}
		name = name.toLowerCase();
		name = name.replaceAll("\\W", " ");
		name = name.trim();
		
		
		answer = answer.toLowerCase();
		answer = answer.replaceAll("\\W", " ");
		answer = answer.trim();
		String [] answerparts = name.split("\\s+");
	
		for(int i = 0; i<answerparts.length; i++){
			if(name.indexOf(answerparts[i]) < 0){
				return false;
			}
		}
		return true;
	}

	/**
	 * extract a name from the name paragraph element
	 * a name includes its authority
	 * @param pe
	 * @return
	 */
	private String extractName(Element pe, String filename) {//TODO: the case of "�" in v. 19, 516.xml "Agoseris �elata"
		try{
			//concat <text> elements into one string
			StringBuffer buffer=new StringBuffer();

			List<Element> textList = XPath.selectNodes(pe, "./text");
			List<Element> additionalList = XPath.selectNodes(pe, "./text[@case='"+Registry.TribeGenusNameCase+"']");
			textList.addAll(additionalList);
			for (Iterator<Element> ti = textList.iterator(); ti.hasNext();) {
				Element wt = (Element) ti.next();
				buffer.append(wt.getText()).append(" ");
			}
			String text = buffer.toString().replaceAll("\\s+", " ").trim();
			text = text.replaceFirst("^.*?(?=[A-Z])", "").trim();
			//fix broken names: T HYRSOSTACHYS;  va r. subhispida
			text = fixBrokenNames(text);
			text = chunkPlaceOfPub(text, filename); //after this, text should only hold name information
			if(text.length() == 0){return "";} //TODO: shouldn't happen, except for 295.xml
	
			// hong 6/26/08: make 1.Pterostegia drymarioides
			// and 1. Pterostegia drymarioides Fischer ... match.
			// Extract full name string and save it in ti.
			
			//tribe: didn't need smallCaps info.
			Pattern p = Pattern
					.compile("\\b(?:subfam|var|subgen|subg|subsp|ser|tribe|subsect)[\\.\\s]+([-a-z]+)", Pattern.CASE_INSENSITIVE);
			Matcher m = p.matcher(text);
			
			Pattern p1 = Pattern
			.compile("\\bsect[\\.\\s]+([-a-zA-Z]+)"); //Journal names may contain e.g. Sect. IV Hong 08/04/09
			Matcher m1 = p1.matcher(text);
			boolean check = false;
			if (m.find() || m1.find()) {// sub- names
				//Pattern p4 = Pattern.compile("(.*?\\s+|^)(\\D.*)");
				//Matcher m4 = p4.matcher(text);
				//if(m4.matches()){
					//String aname = m4.group(2);
					String aname = text;
					String laname = aname.toLowerCase();
					if(namelist.indexOf("|"+laname+"|")>=0){
						listener.info("", filename, "Repeated taxon name:"+aname+" [sub rank]");
						return aname;
						//System.out.println("::::::::::duplicate "+aname+" [sub rank]");//should not occur
					}else{
						namelist += laname+"|";
						return aname;
					}
			//	}
			
				/*Pattern p2 = Pattern.compile("\\.\\s*(\\D*?\\bsubg[\\.\\s]+[-a-z]+[\\s\\w\\.]*\\)[\\s\\w]*\\.\\s?[A-Z]+\\s*\\w+)",
						Pattern.CASE_INSENSITIVE); 
				Matcher m2 = p2.matcher(text);
				
				if (m2.find()) {
					String aname = m2.group(1);
					
					
					String laname = aname.toLowerCase();
					if(namelist.indexOf("|"+laname+"|")>=0){
						listener.info("", filename, "Repeated taxon name:"+aname+" [sub rank]");
						return aname;
						//System.out.println("::::::::::duplicate "+aname+" [sub rank]");//should not occur
					}else{
						namelist += laname+"|";
						return aname;
					}
				}
				
				Pattern p3 = Pattern.compile("\\.\\s*(\\D*?\\bsect[\\.\\s]+[-a-z]+[\\s\\w\\.]*\\)[\\s\\w]*\\.\\s?[A-Z]+\\s*\\w+)",
						Pattern.CASE_INSENSITIVE); 
				Matcher m3 = p3.matcher(text);
				
				if (m3.find()) {
					String aname = m3.group(1);
					String laname = aname.toLowerCase();
					if(namelist.indexOf("|"+laname+"|")>=0){
						listener.info("", filename, "Repeated taxon name:"+aname+" [sub rank]");
						return aname;
						//System.out.println("::::::::::duplicate "+aname+" [sub rank]");//should not occur
					}else{
						namelist += laname+"|";
						return aname;
					}
				}
			
				p = Pattern.compile("\\.\\s*(\\D*?\\b(?:subfam|var|subgen|subsp|ser|tribe)[\\.\\s]+[-a-z]+)",
						Pattern.CASE_INSENSITIVE); //Hong 08/04/09
				m = p.matcher(text);				
				if (m.find()) {
					String aname = m.group(1);
					String laname = aname.toLowerCase();
					if(namelist.indexOf("|"+laname+"|")>=0){
						listener.info("", filename, "Repeated taxon name:"+aname+" [sub rank]");
						return aname;
						//System.out.println("::::::::::duplicate "+aname+" [sub rank]");//should not occur
					}else{
						namelist += laname+"|";
						return aname;
					}
				}*/
			} else {// family, genus, species names
				/*
				 * FNA v. 5 and 19
				 *List<Element> textList2 = XPath.selectNodes(pe, "./text");
				int n = nameLine(textList2);
				String namestring = textList2.get(n).getText(); //2nd <text> holds the name
				//exception: <text>1. Artemisia aleutica</text>
				namestring = namestring.replaceAll("^\\d.*?\\s", "");
				Pattern famname = Pattern.compile("\\b([a-z]*?ceae)\\b.*", Pattern.CASE_INSENSITIVE);
				Pattern genname = Pattern.compile("^([A-Z][A-Z].*?)\\b.*"); //NOTHOCALAIS with two dots on top of last I
				m = famname.matcher(namestring);*/
				
				String namestring = text;
				namestring = namestring.replaceAll("^\\d.*?\\s+", "");
				namestring = namestring.replaceAll("^\\.\\s+", ""); // if there is a . Hong 08/04/09 e.g "4 . XXXX"
				Pattern famname = Pattern.compile("^([a-z]*?ceae)\\b.*", Pattern.CASE_INSENSITIVE);
				Pattern genname = Pattern.compile("^([A-Z][A-Z].*?)\\b.*"); //NOTHOCALAIS with two dots on top of last I
				m = famname.matcher(namestring);
				if(m.find()){
					//String aname = m.group(1);
					String aname = text;
					String laname = aname.toLowerCase();
					if(namelist.indexOf("|"+laname+"|")>=0){
						listener.info("", filename, "Repeated taxon name:"+aname+" [family rank]");
						//System.out.println("::::::::::duplicate "+aname+" [family rank]"); //should not occur
					}else{
						namelist += laname+"|";
						return aname; //family
					}
				}else{
					m = genname.matcher(namestring);
					if(m.find()){
						//String aname = m.group(1);
						String aname = text;
						String laname = aname.toLowerCase();
						if(namelist.indexOf("|"+laname+"|")>=0){
							listener.info("", filename, "Repeated taxon name:"+aname+" [genus rank]");
							//System.out.println("::::::::::duplicate "+aname+" [genus rank]"); //should not occur
						}else{
							namelist += laname+"|";
							return aname; //genus
						}
					}
				    else{
				    	//String aname = namestring;//species name
				    	//Hong: 08/04/09 take the first two words as species name
				    	//String[] tokens = namestring.trim().split("\\s+");
				    	//if(tokens.length >= 2){
					    	//String aname = tokens[0]+" "+tokens[1];
							String aname = text;
				    		String laname = aname.toLowerCase();
							if(namelist.indexOf("|"+laname+"|")>=0){
								listener.info("", filename, "Repeated taxon name:"+aname+" [species rank]");
								//System.out.println("::::::::::duplicate "+aname+" [species rank]"); //could occur
								/*String nextseg = textList2.get(n+1).getText();
								String firstword = nextseg.indexOf(" ")>0? nextseg.substring(0,nextseg.indexOf(" ")) : nextseg;
								aname += " "+firstword;
								laname = aname.toLowerCase();
								if(namelist.indexOf("|"+laname+"|")>=0){
									System.out.println("::::::::::2nd try: duplicate "+aname+" [species rank]"); //should not occur
								}else{
									namelist += laname+"|";
									return aname;
								} Hong 08/04/09*/
							}else{
								namelist += laname+"|";
								return aname;
							}
				    	//}else{
				    	//	System.out.println(namestring+" is not at [species rank]");
				    	//}
				    	
				    }
				}

				/*String fgsname = name.replaceFirst("\\bfam\\b", "").trim();
				if (text.toLowerCase().replaceFirst(
						"^" + number + "\\s*\\.\\s*", "").startsWith(
						fgsname.toLowerCase())) {
					check = true;
					ti.setName(i - 1, fgsname);
				}*/
			}
		}catch(Exception e){
			LOGGER.error("VolumeVerifier : extractName - Exception ", e);
			e.printStackTrace();
		}
	
		return null;
	}
	
	/**
	 * 
	 * @param text: 14c. Mirabilis linearis (Pursh) Heimerl var. subhispida (Heimerl) Spellenberg, Novon 12: 270. 2002
	 * @return: 14c. Mirabilis linearis (Pursh) Heimerl var. subhispida (Heimerl) Spellenberg
	 */
	private String chunkPlaceOfPub(String text, String filename) {
		//search for the comma before a number
		//Pattern p = Pattern.compile("(.*?[A-Z].*?),([^,]+?)\\d.*");
		String textcp = text;
		String journal = null;
		Pattern p = Pattern.compile("(.*?[A-Z].*),\\s+([^\\d]+)\\s+\\d.*"); //(Rydberg) Munz, Man. S. Calif. Bot., 598. 1935
		Matcher m = null;
		//problems 489.xml 6a. Dysphania R. Brown sect. Adenois (Moquin-Tandon) Mosyakin & Clemants, Ukrayins�k. Bot. Zhurn., n. s. 59: 382. 2002
		while(text.matches(".*?[A-Z].*?\\d+.*")){//4a. Echinocereus pectinatus (Scheidweiler) Engelmann var. wenigeri L. D. Benson, Cact. Succ. J. (Los Angeles) 40: 124, <=================>fig. 3. 1968 � Weniger�shedgehog,ashy white ra inbow cactus, langtry rainbow cactus
			m = p.matcher(text);
			if(m.matches()){
				text = m.group(1).trim();
				journal = m.group(2).replaceFirst(",\\s*$", "").trim();				
			}else{
				break;
			}
		}
		//post process to deal with some special cases: 
		//Boerhavia line arifolia A. Gray, Amer. J. Sci. Arts
		int in = text.indexOf(","); //suspecious
		if(in > 0){
			p = Pattern.compile(",[^\\d]+&"); //may be authority list: a, b & c
			m = p.matcher(text);
			if(!m.find()){
				p = Pattern.compile("\\([^()]*?,");//may be in ()
				m = p.matcher(text);
				if(!m.find()){// deal with this case
					text = text.substring(0, in).trim();
				}
			}
		}
		
		//in Smith ed. 
		in = text.indexOf(" in ");
		if(in > 0){
			text = text.substring(0, in).trim();
			//String rest = textcp.substring(in).trim();
		}
		
		//PHYTOLACCACEAE R. Brown � Pokeweed Family
		in = text.indexOf("�");
		if(in < 0){
			in = text.indexOf("�");
		}
		if(in > 0){
			text = text.substring(0, in).trim();
			//String rest = textcp.substring(in).trim();
		}
		
		if(!isJournal(journal)){
			listener.info("", filename, "Check taxon name:"+text);
		}

		return text;//text now holds only the name, which should not contain a number
	}
	/**
	 * check journal list
	 * @param journal
	 * @return
	 */
	private boolean isJournal(String journal) {
		/*Connection conn = null;
		try{
			if(conn == null){
				Class.forName("com.mysql.jdbc.Driver");
			    String URL = "jdbc:mysql://localhost/journals?user=root&password=root";
				//String URL = ApplicationUtilities.getProperty("database.url");
				conn = DriverManager.getConnection(URL);
				Statement stmt = conn.createStatement();
				ResultSet rs = stmt.executeQuery("select * from journals where abbreviation like'"+journal+" %'");
				if(rs.next()){
					return true;
				}
				stmt.close();
			}
		}catch(Exception e){
			e.printStackTrace();
		}*/
		return false;
	}

	public static String fixBrokenNames(String text){
		Pattern p = Pattern.compile("(.*?(?:^| ))([A-Z] )(\\w.*)");
		Matcher m = p.matcher(text);
		if(m.matches()){
			text = m.group(1)+m.group(2).trim()+m.group(3);
		}
		
		//T HYRSOSTACHYS
		p = Pattern.compile("(.*?(?:^| ))([A-Z]+ )([A-Z][A-Z].*)");
		m = p.matcher(text);
		if(m.matches()){
			text = m.group(1)+m.group(2).trim()+m.group(3);
		}
		
		p = Pattern.compile("(.*?(?:^| ))(\\d+ )(\\d+.*)");//HOng 08/04/09 "3 9 . xxxx"
		m = p.matcher(text);
		if(m.matches()){
			text = m.group(1)+m.group(2).trim()+m.group(3);
		}

		//va r. 
		//make sure any of these are not broken: subfam|var|subgen|subg|subsp|ser|tribe
		p = Pattern.compile("(.*?)\\b(s ?u ?b ?f ?a ?m|v ?a ?r|s ?u ?b ?g ?e ?n|s ?u ?b ?g|s ?u ?b ?s ?p|s ?e ?r|t ?r ?i ?b ?e)\\b(.*)");
		m = p.matcher(text);
		if(m.matches()){
			text = m.group(1)+m.group(2).replaceAll("\\s+", "")+m.group(3);
		}
		return text;
	}
	private int nameLine(List<Element> textList2) {
		// TODO Auto-generated method stub
		int size = textList2.size();
		for(int i = 0; i < size; i++){
			String l = textList2.get(i).getText().trim();
			//exception: <text>1. Artemisia aleutica</text> 
			if(l.length() > 4){return i;}
			if(!l.matches("^\\d.*") && !l.matches("\\W+")){ //the name line does not start with a number, nor does it contain only non=word characters
				return i;
			}
		}
		return 0;
	}

	/**
	 * 42000077#F=43#Caryophyllaceae[fam] #
	   50260245#SF=43a#Polycarpoideae[subfam] #
	   40035931#G=1#Drymaria[genus]#
	   06301172#1#Drymaria cordata[species] #F3 I NM W
	   50266761#1a#Drymaria cordata var cordata[variety]#F3 I W
	 * 
	 * 
	 * populate taxon index with names
	 * then concate all text in Name paragraph in one <text> element.
	 */
	private void fillInNames(){
		try {
			for (int i = 1; i <= total; i++) {
				File file = new File(path, i + ".xml");
				SAXBuilder builder = new SAXBuilder();
				Document doc = builder.build(file);
				Element root = doc.getRootElement();
				Element pe = (Element) XPath.selectSingleNode(root,
						"/treatment/paragraph"); //get the first paragraph, which is the name paragraph
				String taxonname = extractName(pe, i + ".xml");
				ti.addName(taxonname);//add one by one in sequence
				System.out.println("add name :"+taxonname+ " for file "+i+".xml");
			}
		} catch (Exception e) {
			LOGGER.error("VolumeVerifier : fillInNames - Exception ", e);
			e.printStackTrace();
				throw new ParsingException(e);
		}
	}
}