/*
* MX Cheminformatics Tools for Java
*
* Copyright (c) 2007, 2008 Metamolecular, LLC
*
* http://metamolecular.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package com.chemhack.jsMolEditor.client.io.daylight;
import com.chemhack.jsMolEditor.client.jre.emulation.java.util.regex.Pattern;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
//import java.util.regex.Matcher;
//import java.util.regex.Pattern;
/**
* @author Richard L. Apodaca
* Duan Lian
*/
public class SMILESTokenizer {
private static String or = "|";
private static String element = "[A-Z][a-z]?" + or + "[a-z]";
private static String bond = "[-=#:\\.]";
private static String ring = "[1-9]" + or + "\\%[1-9][0-9]";
private static String bracket = "\\[[^\\]]*\\]";
private static String parens = "[\\(\\)]";
private static String regex = element + or + bracket + or + bond + or + ring + or + parens;
private static Pattern pattern = new Pattern(regex);
private static Pattern blacklist = new Pattern(".*[^A-Za-z0-9\\(\\)\\[\\]\\-=#:\\.\\%].*");
private Iterator<String> iterator;
public SMILESTokenizer(String smiles) {
assertNoBlacklistedCharacters(smiles);
List<String> tokens = fragment(smiles);
this.iterator = tokens.iterator();
}
public boolean hasNextToken() {
return iterator.hasNext();
}
public String nextToken() {
return iterator.next();
}
public List<String> fragment(String string) {
// Matcher matcher = pattern.matcher(string);
// String[] foundStrings = new String[string.length()];
// int foundStringCount = 0;
// int index = 0;
//
// while (matcher.find(index))
// {
// int start = matcher.start();
// int end = matcher.end();
//
// if (start == 0 && end == 0)
// {
// break;
// }
//
// foundStrings[foundStringCount] = string.substring(start, end);
// index = end;
// foundStringCount++;
// }
String[] foundStrings = pattern.match(string);
List<String> result = new ArrayList<String>();
for (int i = 0; i < foundStrings.length; i++) {
result.add(foundStrings[i]);
}
return result;
}
private void assertNoBlacklistedCharacters(String smiles) {
if (blacklist.matches(smiles)) {
throw new IllegalArgumentException("Invalid SMILES character in \"" + smiles + "\"");
}
}
}