/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.reachables; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.io.BufferedReader; import java.io.FileReader; public class TargetSelection { HashMap<String, String> substructs; TargetSelection() { this.substructs = new HashMap<String, String>(); init(); } void init() { // http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html // for how to match: Short summary: ,->or ;->and, R->in ring // // -- these are smarts so custom regexes would possibly be allowed. // this.substructs.add("O=[!C;R]", "regex"); for (String[] pattern : raw_patterns) { String smarts = pattern[1]; String pclass = pattern[0]; String psubclass = pattern[2]; String testinchi = pattern[3]; this.substructs.put(smarts, psubclass); // more precise for narrowing looking at the subclass matches } } HashMap<String, String> getPatterns() { return this.substructs; } private static void dump(String id, Object[] elems) { logProgressNoNL(id); for (int i = 0; i<elems.length; i++) { logProgressNoNL("\t" + elems[i]); } logProgressNoNL("\n"); } private static String _fileloc = "com.act.reachables.TargetSelection"; private static void logProgressNoNL(String msg) { if (!GlobalParams.LOG_PROGRESS) return; System.err.print(_fileloc + ": " + msg); } static final String[][] raw_patterns = new String[][] { // Handwritten initial patterns new String[] { "amine", "N([H])[H]", "amine", "" }, new String[] { "hydroxyl", "O[H]", "hydroxyl", "" }, new String[] { "carboxylic_acid", "C(=O)O[H]", "carboxylic_acid", "" }, new String[] { "thiol", "S[H]", "thiol", "" }, new String[] { "aldehyde", "C=O", "aldehyde", "" }, new String[] { "halogen", "[Cl,Br,I,F,At]", "halogen", "" }, // This data comes from data/target_selection_patterns_july2014.numbers (data/target_selection_patterns_july2014.formatted) new String[] { "glycosides", "C[O,S,N,C]C1([H])([O,S]C([!O;!S;!N])CCCCC1)", "octanoside", "" }, new String[] { "glycosides", "C[O,S,N,C]C1([H])([O,S]C([!O;!S;!N])CCCC1)", "heptanoside", "" }, new String[] { "glycosides", "C[O,S,N,C]C1([H])([O,S]C([!O;!S;!N])CCC1)", "pyranoside", "InChI=1S/C14H28O5S/c1-2-3-4-5-6-7-8-20-14-13(18)12(17)11(16)10(9-15)19-14/h10-18H,2-9H2,1H3/t10-,11-,12+,13-,14+/m1/s1" }, new String[] { "glycosides", "C[O,S,N,C]C1([H])([O,S]C([!O;!S;!N])CC1)", "furanoside", "InChI=1S/C28H42O8/c1-26-8-5-17(35-25-24(32)23(31)21(13-29)36-25)12-16(26)3-4-20-19(26)6-9-27(2)18(7-10-28(20,27)33)15-11-22(30)34-14-15/h11,16-21,23-25,29,31-33H,3-10,12-14H2,1-2H3/t16-,17+,18-,19+,20-,21+,23+,24-,25-,26+,27-,28?/m1/s1" }, // new String[] { "glycosides", "C[O,S,N,C]C1([H])([O,S]C([!O;!S;!N])C1)", "butanoside", "InChI=1S/C10H16N5O13P3/c11-10-13-7-6(8(17)14-10)12-3-15(7)9-4(1-16)5(26-9)2-25-30(21,22)28-31(23,24)27-29(18,19)20/h3-5,9,16H,1-2H2,(H,21,22)(H,23,24)(H2,18,19,20)(H3,11,13,14,17)" }, new String[] { "glycosides", "COC1([H])(OCCCC1)", "oxy_pyranosides", "InChI=1S/C27H32O14/c1-10-20(32)22(34)24(36)26(37-10)41-25-23(35)21(33)18(9-28)40-27(25)38-13-6-14(30)19-15(31)8-16(39-17(19)7-13)11-2-4-12(29)5-3-11/h2-7,10,16,18,20-30,32-36H,8-9H2,1H3/t10-,16+,18-,20+,21-,22+,23+,24-,25-,26-,27-/m1/s1" }, new String[] { "glycosides", "[O,N,S]([C,c])[C@@H]1O[C@@H]([C@@H](O([H]))[C@H](O([H]))[C@H]1O([H]))CO", "glucosides", "InChI=1/C16H32O6/c1-2-3-4-5-6-7-8-9-10-21-16-15(20)14(19)13(18)12(11-17)22-16/h12-20H,2-11H2,1H3/t12-,13-,14+,15-,16-/m1/s1" }, new String[] { "glycosides", "[O,N,S]([C,c])[C@@H]1O[C@H](CO)[C@H](O([H]))[C@H](O([H]))[C@H]1O([H])", "galactosides", "InChI=1/C14H15BrClNO6/c15-5-1-2-6-9(10(5)16)7(3-17-6)22-14-13(21)12(20)11(19)8(4-18)23-14/h1-3,8,11-14,17-21H,4H2/t8-,11+,12+,13-,14-/m1/s1" }, new String[] { "glycosides", "CSC1([H])(OCCCC1)", "thio_pyranoside", "InChI=1S/C14H28O5S/c1-2-3-4-5-6-7-8-20-14-13(18)12(17)11(16)10(9-15)19-14/h10-18H,2-9H2,1H3/t10-,11-,12+,13-,14+/m1/s1" }, new String[] { "glycosides", "C~NC1([H])(OCCCC1)", "pyranosyl_amines", "InChI=1S/C8H14N4O6/c9-12-10-1-4(14)11-8-7(17)6(16)5(15)3(2-13)18-8/h3,5-8,13,15-17H,1-2H2,(H,11,14)/t3-,5-,6+,7-,8-/m1/s1" }, new String[] { "glycosides", "[c,C]CC1([H])(OC([!O;!S;!N])CCC1)", "c_pyranosides", "InChI=1S/C21H22O9/c22-6-8-4-10-14(21-20(29)19(28)17(26)13(7-23)30-21)9-2-1-3-11(24)15(9)18(27)16(10)12(25)5-8/h1-5,13-14,17,19-26,28-29H,6-7H2/t13-,14+,17-,19+,20-,21+/m1/s1" }, new String[] { "glycosides", "COC1([H])(OCCC1)", "oxy_furanosides", "InChI=1S/C28H42O8/c1-26-8-5-17(35-25-24(32)23(31)21(13-29)36-25)12-16(26)3-4-20-19(26)6-9-27(2)18(7-10-28(20,27)33)15-11-22(30)34-14-15/h11,16-21,23-25,29,31-33H,3-10,12-14H2,1-2H3/t16-,17+,18-,19+,20-,21+,23+,24-,25-,26+,27-,28?/m1/s1" }, new String[] { "glycosides", "CSC1([H])(OCCC1)", "thiofuranosides", "InChI=1S/C8H16O5S/c1-2-14-8-6(12)5(11)7(13-8)4(10)3-9/h4-12H,2-3H2,1H3/t4-,5-,6-,7-,8-/m1/s1" }, new String[] { "glycosides", "CNC1([H])(OCCC1)", "furanosyl_amines", "InChI=1S/C9H13N3O5/c10-5-1-2-12(9(16)11-5)8-7(15)6(14)4(3-13)17-8/h1-2,4,6-8,13-15H,3H2,(H2,10,11,16)/t4-,6-,7+,8-/m1/s1" }, // new String[] { "glycosides", "[H]OP(=O)(OC[S,N,O]C)OP(=O)(O[H])OCC1(OC(n)CC1)", "nucleotide_sugars", "InChI: 1S/C20H31N4O16P/c1-7(26)22-12-8(27)4-20(18(32)33,39-16(12)13(29)9(28)5-25)40-41(35,36)37-6-10-14(30)15(31)17(38-10)24-3-2-11(21)23-19(24)34/h2-3,8-10,12-17,25,27-31H,4-6H2,1H3,(H,22,26)(H,32,33)(H,35,36)(H2,21,23,34)/t8-,9+,10+,12+,13+,14+,15+,16+,17+,20+/m0/s1" }, new String[] { "glycosides", "CCC1([H])(OC([!O;!S;!N])CC1)", "c_furanosides", "" }, new String[] { "glycosides", "OC1(CCCCS1)", "thio_pyranosides", "" }, new String[] { "glycosides", "OC1(CCCS1)", "thio_furanosides", "" }, new String[] { "glycosides", "SC1(CCCCS1)", "dithio_pyranosides", "" }, new String[] { "glycosides", "SC1(CCCS1)", "dithio_furanosides", "" }, new String[] { "glycosides", "OC1(CCCCN1)", "amino_pyranosides", "" }, new String[] { "glycosides", "OC1(CCCN1)", "amino_furanosides", "" }, new String[] { "glycosides", "NC1(CCCCN1)", "diamino_pyranosides", "" }, new String[] { "glycosides", "NC1(CCCN1)", "diamino_furanosides", "" }, new String[] { "glycosides", "SC1(CCCCN1)", "pyranoside_S_N", "" }, new String[] { "glycosides", "SC1(CCCN1)", "furanoside_S_N", "" }, new String[] { "glycosides", "NC1(CCCCS1)", "pyranoside_N_S", "" }, new String[] { "glycosides", "NC1(CCCS1)", "furanoside_N_S", "" }, // new String[] { "glycosides", "C[O,N,S]COC([H])(C#N)C", "cyanogenic_glycosides", "InChI=1S/C20H27NO11/c21-6-10(9-4-2-1-3-5-9)30-20-18(28)16(26)14(24)12(32-20)8-29-19-17(27)15(25)13(23)11(7-22)31-19/h1-5,10-20,22-28H,7-8H2/t10-,11+,12+,13+,14+,15-,16-,17+,18+,19+,20+/m0/s1" }, new String[] { "b_arylamines", "cCCN", "aryl_ethylamine", "InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/t9-/m0/s1" }, new String[] { "b_arylamines", "c1ccc(cc1)CC[n,N]", "all_phe", "InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/t9-/m0/s1" }, new String[] { "b_arylamines", "c1ccc2c(c1)c(cn2)CC[n,N]", "all_trp", "InChI=1S/C13H16N2O2/c1-9(16)14-6-5-10-8-15-13-4-3-11(17-2)7-12(10)13/h3-4,7-8,15H,5-6H2,1-2H3,(H,14,16)" }, new String[] { "b_arylamines", "n1cc(nc1)CC[n,N]", "all_his", "InChI=1S/C5H9N3/c6-2-1-5-3-7-4-8-5/h3-4H,1-2,6H2,(H,7,8)" }, new String[] { "b_arylamines", "c1ccc(cc1)CCN", "phenethylamines", "InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/t9-/m0/s1" }, new String[] { "b_arylamines", "c1ccc2c(c1)c(cn2)CCN", "tryptamines", "InChI=1S/C13H16N2O2/c1-9(16)14-6-5-10-8-15-13-4-3-11(17-2)7-12(10)13/h3-4,7-8,15H,5-6H2,1-2H3,(H,14,16)" }, new String[] { "b_arylamines", "n1cc(nc1)CCN", "histamines", "InChI=1S/C5H9N3/c6-2-1-5-3-7-4-8-5/h3-4H,1-2,6H2,(H,7,8)" }, new String[] { "prostaglandins", "CCCCCCCC1(CCCC1(C~CCCCCCC))", "prostaglandins", "InChI=1S/C20H34O5/c1-2-3-6-9-15(21)12-13-17-16(18(22)14-19(17)23)10-7-4-5-8-11-20(24)25/h12-13,15-17,19,21,23H,2-11,14H2,1H3,(H,24,25)/t15-,16+,17+,19+/m0/s1" }, // new String[] { "prostaglandins", "CCC1(CCCC1(CC))", "prostaglandin_short", "InChI=1S/C20H34O5/c1-2-3-6-9-15(21)12-13-17-16(18(22)14-19(17)23)10-7-4-5-8-11-20(24)25/h12-13,15-17,19,21,23H,2-11,14H2,1H3,(H,24,25)/t15-,16+,17+,19+/m0/s1" }, new String[] { "mesogens", "c1ccc(cc1)c2ccccc2", "biphenyl", "InChI=1S/C14H10/c1-2-12-8-10-14(11-9-12)13-6-4-3-5-7-13/h1,3-11H" }, new String[] { "mesogens", "c1ccc2ccccc2c1", "napthalenes", "InChI=1S/C10H8/c1-2-6-10-8-4-3-7-9(10)5-1/h1-8H" }, new String[] { "mesogens", "c3ccc2cc1ccccc1cc2c3", "Anthracene", "InChI=1S/C15H11Cl/c16-10-15-13-7-3-1-5-11(13)9-12-6-2-4-8-14(12)15/h1-9H,10H2" }, new String[] { "mesogens", "c1ccc2c(c1)ccc3ccccc32", "Phenanthrene", "InChI=1S/C15H10O2/c16-15(17)14-9-10-5-1-2-6-11(10)12-7-3-4-8-13(12)14/h1-9H,(H,16,17)" }, new String[] { "mesogens", "c1cc2ccc3cccc4c3c2c(c1)cc4", "pyrenes", "InChI=1S/C16H8N2O4/c19-17(20)13-7-3-9-1-2-10-4-8-14(18(21)22)12-6-5-11(13)15(9)16(10)12/h1-8H" }, new String[] { "mesogens", "C1(CCC2(CCCCC2(C1)))", "reduced_napthalene", "" }, new String[] { "mesogens", "C1(CCC2(CC3(CCCCC3(CC2(C1)))))", "reduced_anthracene", "" }, new String[] { "mesogens", "C1(CCC3(C(C1)CCC2(CCCCC23)))", "reduced_phenanthrene", "" }, new String[] { "mesogens", "C1CCC2C(C1)CCC3C2CCC4C3CCC4", "steroid_gonane", "InChI=1S/C17H28/c1-2-6-14-12(4-1)8-10-17-15-7-3-5-13(15)9-11-16(14)17/h12-17H,1-11H2/t12?,13-,14-,15+,16+,17-/m0/s1" }, new String[] { "mesogens", "[c,C,n,N,O]1([c,C,n,N,O][c,C,n,N,O]2([c,C,n,N,O][c,C,n,N,O][c,C,n,N,O][c,C,n,N,O]3([c,C,n,N,O][c,C,n,N,O][c,C,n,N,O][c,C,n,N,O]([c,C,n,N,O]1)[c,C,n,N,O]23)))", "hetero_three_fused", "" }, new String[] { "mesogens", "[c,C,n,N,O]1([c,C,n,N,O][c,C,n,N,O][c,C,n,N,O]3([c,C,n,N,O]([c,C,n,N,O]1)[c,C,n,N,O][c,C,n,N,O][c,C,n,N,O]2([c,C,n,N,O][c,C,n,N,O][c,C,n,N,O][c,C,n,N,O][c,C,n,N,O]23)))", "hetero_phenanthrenes", "InChI=1S/C15H10O2/c16-15(17)14-9-10-5-1-2-6-11(10)12-7-3-4-8-13(12)14/h1-9H,(H,16,17)" }, new String[] { "mesogens", "[c,C,n,N,O]1([c,C,n,N,O][c,C,n,N,O][c,C,n,N,O]2([c,C,n,N,O][c,C,n,N,O]3([c,C,n,N,O][c,C,n,N,O][c,C,n,N,O][c,C,n,N,O][c,C,n,N,O]3([c,C,n,N,O][c,C,n,N,O]2([c,C,n,N,O]1)))))", "hetero_anthracenes", "InChI=1S/C15H11Cl/c16-10-15-13-7-3-1-5-11(13)9-12-6-2-4-8-14(12)15/h1-9H,10H2" }, new String[] { "mesogens", "C#C", "acetylenes", "InChI=1S/C2H2/c1-2/h1-2H" }, new String[] { "dioxolenes", "c1ccc(c(c1)O([H]))O([H])", "catechols", "InChI=1S/C8H11NO2/c9-4-3-6-1-2-7(10)8(11)5-6/h1-2,5,10-11H,3-4,9H2" }, new String[] { "dioxolenes", "O([H])ccO([H])", "aromatic_vic_diol", "InChI=1S/C8H11NO2/c9-4-3-6-1-2-7(10)8(11)5-6/h1-2,5,10-11H,3-4,9H2" }, new String[] { "dioxolenes", "c2OCOc2", "methylenedioxy_arene", "InChI=1S/C15H19NO3.ClH/c1-2-12(16-7-3-4-8-16)15(17)11-5-6-13-14(9-11)19-10-18-13;/h5-6,9,12H,2-4,7-8,10H2,1H3;1H" }, new String[] { "dioxolenes", "O=C1C=CC=CC1=O", "o_benzoquinone", "InChI=1S/C7H6O3/c1-10-5-2-3-6(8)7(9)4-5/h2-4H,1H3" }, new String[] { "electroactive", "n1c3c(nc2c1cccc2)cccc3", "phenazines", "InChI=1S/C15H16N4.ClH/c1-9-6-13-15(8-11(9)16)18-14-7-10(19(2)3)4-5-12(14)17-13;/h4-8H,16H2,1-3H3;1H" }, new String[] { "electroactive", "n([H])1c([H])ccc([H])1", "poly_pyrrole", "InChI=1S/C4H4N2O2/c7-6(8)4-1-2-5-3-4/h1-3,5H" }, new String[] { "electroactive", "n([H])1c([H])ccc(C([H])([H])([H]))1", "methyl_pyrrole", "InChI=1S/C5H7N/c1-5-3-2-4-6-5/h2-4,6H,1H3" }, new String[] { "electroactive", "c1cc(ccc1([H]))N([H])[H]", "poly_anilines", "InChI=1S/C6H5F2N/c7-4-1-2-5(8)6(9)3-4/h1-3H,9H2" }, new String[] { "electroactive", "s1c([H])ccc([H])1", "poly_p_phenylene_sulfide", "InChI=1S/C5H4OS/c6-3-5-1-2-7-4-5/h1-4H" }, new String[] { "electroactive", "s1cccc1", "thiophenes", "InChI=1S/C5H4OS/c6-3-5-1-2-7-4-5/h1-4H" }, // new String[] { "electroactive", "n(c)(c)", "diaryl_amine", "InChI=1S/C12H11N/c1-3-7-11(8-4-1)13-12-9-5-2-6-10-12/h1-10,13H" }, // new String[] { "electroactive", "n(c)(c)(c)", "triaryl_amine", "InChI=1S/C18H15N/c1-4-10-16(11-5-1)19(17-12-6-2-7-13-17)18-14-8-3-9-15-18/h1-15H" }, // new String[] { "electroactive", "C1=C([H])C=C2C(=C1)NC3=CC=C([H])C=C3O2", "poly_phenoxazine", "InChI=1S/C12H9NO/c1-3-7-11-9(5-1)13-10-6-2-4-8-12(10)14-11/h1-8,13H" }, new String[] { "photoactive", "c[C,N]=[C,N]c", "benzylidene", "InChI=1S/C14H12/c1-3-7-13(8-4-1)11-12-14-9-5-2-6-10-14/h1-12H/b12-11+" }, new String[] { "photoactive", "C(=O)C=cc1ccccc1", "cinnamates", "" }, new String[] { "photoactive", "C1=CC(=O)C=CC1=O", "one_four_benzoquinone", "" }, new String[] { "photoactive", "O=C1c2ccccc2C(=O)c3ccccc13", "anthraquinone", "" }, new String[] { "photoactive", "cC(=O)C=C-c", "chalcone_generalized", "InChI=1S/C16H14O5/c1-21-16-9-11(17)4-5-12(16)13(18)6-2-10-3-7-14(19)15(20)8-10/h2-9,17,19-20H,1H3/b6-2+" }, new String[] { "photoactive", "C(c)(c)(c)", "tryaryl_methane", "" }, new String[] { "photoactive", "C=C2C(=Cc)C[O,S,C]C2", "fulgides_open", "" }, new String[] { "photoactive", "C=1C=CC3(C(C=1)=CC2=C(C[O,S,C]C2)C3)", "fulgides_closed", "" }, new String[] { "photoactive", "COCC1(c2ccccc2(OC(=O)C1))", "coumaryl_ester", "" }, new String[] { "photoactive", "[N+](=O)([O-])c1ccccc1C[O,S,N]C", "o_nitrobenzyl_ether", "" }, new String[] { "photoactive", "c1ccccc1C=Cc2ccccc2", "stilbenes", "InChI=1S/C14H12/c1-3-7-13(8-4-1)11-12-14-9-5-2-6-10-14/h1-12H/b12-11+" }, new String[] { "photoactive", "N(=Nc1ccccc1)c2ccccc2", "azobenzenes", "InChI=1S/C15H16N2/c1-12(2)13-8-10-15(11-9-13)17-16-14-6-4-3-5-7-14/h3-12H,1-2H3" }, new String[] { "photoactive", "c1ccccc1C=Nc2ccccc2", "benzylidene_aniline", "" }, new String[] { "photoactive", "C=CC=CC=C", "c6_pi", "" }, new String[] { "photoactive", "C=CC=CC=CC=C", "c8_pi", "" }, new String[] { "photoactive", "C=CC=CC=CC=CC=C", "c10_pi", "" }, new String[] { "photoactive", "C=CC=CC=CC=CC=CC=C", "c12_pi", "" }, new String[] { "photoactive", "C=CC=CC=CC=CC=CC=CC=C", "c14_pi", "" }, new String[] { "photoactive", "C=CC=CC=CC=CC=CC=CC=CC=C", "c16_pi", "" }, new String[] { "photoactive", "C=CC=CC=CC=CC=CC=CC=CC=CC=C", "c18_pi", "InChI=1S/C30H40O/c1-24(13-8-9-14-25(2)16-11-18-27(4)23-31)15-10-17-26(3)20-21-29-28(5)19-12-22-30(29,6)7/h8-11,13-18,20-21,23H,12,19,22H2,1-7H3/b9-8+,15-10+,16-11+,21-20+,24-13+,25-14+,26-17+,27-18+" }, new String[] { "photoactive", "C=CC=CC=CC=CC=CC=CC=CC=CC=CC=C", "c20_pi", "" }, new String[] { "photoactive", "N=CC=CN", "n1_cyanines", "" }, new String[] { "photoactive", "N=CC=CC=CN", "n2_cyanines", "" }, new String[] { "photoactive", "N=CC=CC=CC=CN", "n3_cyanines", "" }, new String[] { "photoactive", "N=CC=CC=CC=CC=CN", "n4_cyanines", "" }, new String[] { "photoactive", "N=CC=CccO", "o_cyanines", "" }, new String[] { "photoactive", "[o+]2c1ccccc1ccc2c3ccccc3", "anthocyanidin_ionized", "" }, new String[] { "photoactive", "O=C(C=Cc1ccccc1)c2ccccc2", "chalcones", "" }, // new String[] { "photoactive", "C1=CC=C2C(=C1)NC3=CC=CC=C3O2", "phenoxazine", "InChI=1S/C12H9NO/c1-3-7-11-9(5-1)13-10-6-2-4-8-12(10)14-11/h1-8,13H" }, // new String[] { "ligands", "c1ccc2c(c1)ccc(=O)o2", "coumarine", "InChI=1S/C9H6O2/c10-9-6-5-7-3-1-2-4-8(7)11-9/h1-6H" }, // new String[] { "ligands", "c1cc2cc3ccc(cc4ccc(cc5ccc(cc1n2)n5)n4)n3", "porphyrins", "InChI=1S/C32H18N8/c1-2-10-18-17(9-1)25-33-26(18)38-28-21-13-5-6-14-22(21)30(35-28)40-32-24-16-8-7-15-23(24)31(36-32)39-29-20-12-4-3-11-19(20)27(34-29)37-25/h1-16H,(H2,33,34,35,36,37,38,39,40)" }, new String[] { "ligands", "C1=CC2(OCC1)(NCCC2)", "spiropyrans", "" }, new String[] { "ligands", "O=C([H])c1ccccc1O([H])", "salicylaldehydes", "InChI=1S/C7H4Br2O2/c8-5-1-4(3-10)7(11)6(9)2-5/h1-3,11H" }, new String[] { "ligands", "[H]Oc1cccc2cccnc12", "eight_hydroxyquinolines", "InChI=1S/C9H7NO/c11-8-5-1-3-7-4-2-6-10-9(7)8/h1-6,11H" }, new String[] { "ligands", "N([H])([H])c1ccccc1N([H])([H])", "o_phenylenediamines", "InChI=1S/C6H8N2/c7-5-3-1-2-4-6(5)8/h1-4H,7-8H2" }, new String[] { "ligands", "c1ccnc(c1)c2ccccn2", "two_two_bipyridyls", "InChI=1S/C10H8N2/c1-3-7-11-9(5-1)10-6-2-4-8-12-10/h1-8H" }, new String[] { "ligands", "O=C(C)C([H])C(=O)C", "betadiketones", "InChI=1S/C5H8O2/c1-4(6)3-5(2)7/h3H2,1-2H3" }, new String[] { "ligands", "O=C(C)C(=O)([H])", "alpha_ketoacetaldehyde", "InChI=1S/C7H8O2/c8-5-7(9)6-3-1-2-4-6/h1-2,5-6H,3-4H2" }, new String[] { "ligands", "O=C(C)C([H])C(=O)([H])", "beta_ketoacetaldehyde", "" }, new String[] { "ligands", "[H]OC(=O)CCO[H]", "betahydroxyacids", "InChI=1S/C5H10O3/c1-5(2,8)3-4(6)7/h8H,3H2,1-2H3,(H,6,7)" }, new String[] { "ligands", "c1ccc(c(c1)C(=O)O)O", "salicylates", "InChI=1S/C7H6O3/c8-6-4-2-1-3-5(6)7(9)10/h1-4,8H,(H,9,10)" }, new String[] { "ligands", "[H]OccCcc(O[H])", "two_two_arylbisphenols", "InChI=1S/C13H12O2/c14-12-7-3-1-5-10(12)9-11-6-2-4-8-13(11)15/h1-8,14-15H,9H2" }, new String[] { "olefins", "C=C", "generic_olefins", "" }, new String[] { "olefins", "C=C([H])", "hydro_olefins", "" }, new String[] { "olefins", "C=C([H])([H])", "syn_dihydro_olefins", "" }, new String[] { "olefins", "C([H])=C([H])", "anti_dihydro_olefins", "InChI=1S/C10H12O/c1-3-4-9-5-7-10(11-2)8-6-9/h3-8H,1-2H3/b4-3+" }, new String[] { "olefins", "C([H])=C([H])([H])", "vinylogous_olefins", "InChI=1S/C2H3Cl/c1-2-3/h2H,1H2" }, new String[] { "condensation_monomers", "[H]OC(=O)CN([H])[H]", "alpha_amino_acids", "InChI=1S/C9H10BrNO2/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8H,5,11H2,(H,12,13)" }, new String[] { "condensation_monomers", "[H]OC(=O)CCN([H])[H]", "beta_amino_acids", "" }, new String[] { "condensation_monomers", "[H]OC(=O)C([H])C(=O)O[H]", "malonates", "InChI=1S/C9H8O4/c10-8(11)7(9(12)13)6-4-2-1-3-5-6/h1-5,7H,(H,10,11)(H,12,13)" }, new String[] { "condensation_monomers", "[H]OC(=O)CO[H]", "alpha_hydroxy_acids", "InChI=1S/C6H12O3/c1-4(2)3-5(7)6(8)9/h4-5,7H,3H2,1-2H3,(H,8,9)" }, new String[] { "reactive_handles", "cN([H])", "aryl_amine_oneH", "InChI=1S/C6H5F2N/c7-4-1-2-5(8)6(9)3-4/h1-3H,9H2" }, new String[] { "reactive_handles", "CN([H])", "alkyl_amine_oneH", "InChI=1S/CH5N/c1-2/h2H2,1H3" }, new String[] { "reactive_handles", "cC([H])=O", "aryl_aldehyde", "InChI=1S/C7H6O/c8-6-7-4-2-1-3-5-7/h1-6H" }, new String[] { "reactive_handles", "CC([H])=O", "alkyl_aldehyde", "InChI=1S/C2H4O/c1-2-3/h2H,1H3" }, new String[] { "reactive_handles", "cC(C([H])([H])([H]))=O", "acetophenones", "InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3" }, new String[] { "reactive_handles", "CCl", "alkylchloride", "" }, new String[] { "reactive_handles", "CBr", "alkylbromide", "" }, new String[] { "reactive_handles", "CI", "alkyliodide", "" }, new String[] { "reactive_handles", "cCl", "arylchloride", "InChI=1S/C6H5Cl/c7-6-4-2-1-3-5-6/h1-5H" }, new String[] { "reactive_handles", "cBr", "arylbromide", "InChI=1S/C6H5Br/c7-6-4-2-1-3-5-6/h1-5H" }, new String[] { "reactive_handles", "cI", "aryliodide", "InChI=1S/C6H5I/c7-6-4-2-1-3-5-6/h1-5H" }, new String[] { "reactive_handles", "NN([H])([H])", "hydrazine", "InChI=1S/C6H7N3O/c7-9-6(10)5-2-1-3-8-4-5/h1-4H,7H2,(H,9,10)" }, // new String[] { "reactive_handles", "cN(O)O", "aryl_nitrates", "InChI=1S/C12H10N2/c1-3-7-11(8-4-1)13-14-12-9-5-2-6-10-12/h1-10H/b14-13+" }, new String[] { "reactive_handles", "cN([H])([H])", "exocyclic_aryl_amine", "InChI=1S/C12H11N3/c13-10-6-8-12(9-7-10)15-14-11-4-2-1-3-5-11/h1-9H,13H2/b15-14+" }, new String[] { "reactive_handles", "c([H])1ccc(cc1)O", "p_hydro_phenols", "InChI=1S/C6H6O/c7-6-4-2-1-3-5-6/h1-5,7H" }, new String[] { "reactive_handles", "c([H])1ccc(cc1)N", "p_hydro_anilines", "InChI=1S/C6H7N/c7-6-4-2-1-3-5-6/h1-5H,7H2" }, new String[] { "polymeric_metabolites", "C(=O)CNC(=O)CN", "dipeptide", "InChI=1S/C75H125N21O23/c1-33(2)29-35(5)57(102)38(8)62(106)88-45(31-51(77)100)64(108)86-43(17-15-26-84-74(80)81)58(103)59(104)70(114)91-53(36(6)37(7)61(79)105)66(110)93-55-40(10)119-73(117)49-19-13-14-28-96(49)72(116)47(32-52(78)101)90-69(113)56(60(118-12)41-20-22-42(98)23-21-41)94-65(109)48(24-25-50(76)99)95(11)71(115)46(30-34(3)4)89-63(107)44(18-16-27-85-75(82)83)87-67(111)54(39(9)97)92-68(55)112/h20-23,33-40,43-49,53-60,97-98,102-104H,13-19,24-32H2,1-12H3,(H2,76,99)(H2,77,100)(H2,78,101)(H2,79,105)(H,86,108)(H,87,111)(H,88,106)(H,89,107)(H,90,113)(H,91,114)(H,92,112)(H,93,110)(H,94,109)(H4,80,81,84)(H4,82,83,85)/t35?,36-,37+,38?,39+,40+,43-,44+,45-,46-,47+,48-,49-,53-,54+,55+,56?,57?,58?,59?,60?/m0/s1" }, // new String[] { "polymeric_metabolites", "C(=O)CNC(=O)CNC(=O)CN", "tripeptide", "InChI=1S/C75H125N21O23/c1-33(2)29-35(5)57(102)38(8)62(106)88-45(31-51(77)100)64(108)86-43(17-15-26-84-74(80)81)58(103)59(104)70(114)91-53(36(6)37(7)61(79)105)66(110)93-55-40(10)119-73(117)49-19-13-14-28-96(49)72(116)47(32-52(78)101)90-69(113)56(60(118-12)41-20-22-42(98)23-21-41)94-65(109)48(24-25-50(76)99)95(11)71(115)46(30-34(3)4)89-63(107)44(18-16-27-85-75(82)83)87-67(111)54(39(9)97)92-68(55)112/h20-23,33-40,43-49,53-60,97-98,102-104H,13-19,24-32H2,1-12H3,(H2,76,99)(H2,77,100)(H2,78,101)(H2,79,105)(H,86,108)(H,87,111)(H,88,106)(H,89,107)(H,90,113)(H,91,114)(H,92,112)(H,93,110)(H,94,109)(H4,80,81,84)(H4,82,83,85)/t35?,36-,37+,38?,39+,40+,43-,44+,45-,46-,47+,48-,49-,53-,54+,55+,56?,57?,58?,59?,60?/m0/s1" }, // new String[] { "polymeric_metabolites", "C(=O)CNC(=O)CNC(=O)CNC(=O)CN", "four_peptide", "InChI=1S/C75H125N21O23/c1-33(2)29-35(5)57(102)38(8)62(106)88-45(31-51(77)100)64(108)86-43(17-15-26-84-74(80)81)58(103)59(104)70(114)91-53(36(6)37(7)61(79)105)66(110)93-55-40(10)119-73(117)49-19-13-14-28-96(49)72(116)47(32-52(78)101)90-69(113)56(60(118-12)41-20-22-42(98)23-21-41)94-65(109)48(24-25-50(76)99)95(11)71(115)46(30-34(3)4)89-63(107)44(18-16-27-85-75(82)83)87-67(111)54(39(9)97)92-68(55)112/h20-23,33-40,43-49,53-60,97-98,102-104H,13-19,24-32H2,1-12H3,(H2,76,99)(H2,77,100)(H2,78,101)(H2,79,105)(H,86,108)(H,87,111)(H,88,106)(H,89,107)(H,90,113)(H,91,114)(H,92,112)(H,93,110)(H,94,109)(H4,80,81,84)(H4,82,83,85)/t35?,36-,37+,38?,39+,40+,43-,44+,45-,46-,47+,48-,49-,53-,54+,55+,56?,57?,58?,59?,60?/m0/s1" }, // new String[] { "polymeric_metabolites", "C(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CN", "five_peptide", "InChI=1S/C75H125N21O23/c1-33(2)29-35(5)57(102)38(8)62(106)88-45(31-51(77)100)64(108)86-43(17-15-26-84-74(80)81)58(103)59(104)70(114)91-53(36(6)37(7)61(79)105)66(110)93-55-40(10)119-73(117)49-19-13-14-28-96(49)72(116)47(32-52(78)101)90-69(113)56(60(118-12)41-20-22-42(98)23-21-41)94-65(109)48(24-25-50(76)99)95(11)71(115)46(30-34(3)4)89-63(107)44(18-16-27-85-75(82)83)87-67(111)54(39(9)97)92-68(55)112/h20-23,33-40,43-49,53-60,97-98,102-104H,13-19,24-32H2,1-12H3,(H2,76,99)(H2,77,100)(H2,78,101)(H2,79,105)(H,86,108)(H,87,111)(H,88,106)(H,89,107)(H,90,113)(H,91,114)(H,92,112)(H,93,110)(H,94,109)(H4,80,81,84)(H4,82,83,85)/t35?,36-,37+,38?,39+,40+,43-,44+,45-,46-,47+,48-,49-,53-,54+,55+,56?,57?,58?,59?,60?/m0/s1" }, // new String[] { "polymeric_metabolites", "C(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CN", "six_peptide", "InChI=1S/C75H125N21O23/c1-33(2)29-35(5)57(102)38(8)62(106)88-45(31-51(77)100)64(108)86-43(17-15-26-84-74(80)81)58(103)59(104)70(114)91-53(36(6)37(7)61(79)105)66(110)93-55-40(10)119-73(117)49-19-13-14-28-96(49)72(116)47(32-52(78)101)90-69(113)56(60(118-12)41-20-22-42(98)23-21-41)94-65(109)48(24-25-50(76)99)95(11)71(115)46(30-34(3)4)89-63(107)44(18-16-27-85-75(82)83)87-67(111)54(39(9)97)92-68(55)112/h20-23,33-40,43-49,53-60,97-98,102-104H,13-19,24-32H2,1-12H3,(H2,76,99)(H2,77,100)(H2,78,101)(H2,79,105)(H,86,108)(H,87,111)(H,88,106)(H,89,107)(H,90,113)(H,91,114)(H,92,112)(H,93,110)(H,94,109)(H4,80,81,84)(H4,82,83,85)/t35?,36-,37+,38?,39+,40+,43-,44+,45-,46-,47+,48-,49-,53-,54+,55+,56?,57?,58?,59?,60?/m0/s1" }, // new String[] { "polymeric_metabolites", "C(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CN", "seven_peptide", "InChI=1S/C75H125N21O23/c1-33(2)29-35(5)57(102)38(8)62(106)88-45(31-51(77)100)64(108)86-43(17-15-26-84-74(80)81)58(103)59(104)70(114)91-53(36(6)37(7)61(79)105)66(110)93-55-40(10)119-73(117)49-19-13-14-28-96(49)72(116)47(32-52(78)101)90-69(113)56(60(118-12)41-20-22-42(98)23-21-41)94-65(109)48(24-25-50(76)99)95(11)71(115)46(30-34(3)4)89-63(107)44(18-16-27-85-75(82)83)87-67(111)54(39(9)97)92-68(55)112/h20-23,33-40,43-49,53-60,97-98,102-104H,13-19,24-32H2,1-12H3,(H2,76,99)(H2,77,100)(H2,78,101)(H2,79,105)(H,86,108)(H,87,111)(H,88,106)(H,89,107)(H,90,113)(H,91,114)(H,92,112)(H,93,110)(H,94,109)(H4,80,81,84)(H4,82,83,85)/t35?,36-,37+,38?,39+,40+,43-,44+,45-,46-,47+,48-,49-,53-,54+,55+,56?,57?,58?,59?,60?/m0/s1" }, // new String[] { "polymeric_metabolites", "C(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CN", "eight_peptide", "InChI=1S/C75H125N21O23/c1-33(2)29-35(5)57(102)38(8)62(106)88-45(31-51(77)100)64(108)86-43(17-15-26-84-74(80)81)58(103)59(104)70(114)91-53(36(6)37(7)61(79)105)66(110)93-55-40(10)119-73(117)49-19-13-14-28-96(49)72(116)47(32-52(78)101)90-69(113)56(60(118-12)41-20-22-42(98)23-21-41)94-65(109)48(24-25-50(76)99)95(11)71(115)46(30-34(3)4)89-63(107)44(18-16-27-85-75(82)83)87-67(111)54(39(9)97)92-68(55)112/h20-23,33-40,43-49,53-60,97-98,102-104H,13-19,24-32H2,1-12H3,(H2,76,99)(H2,77,100)(H2,78,101)(H2,79,105)(H,86,108)(H,87,111)(H,88,106)(H,89,107)(H,90,113)(H,91,114)(H,92,112)(H,93,110)(H,94,109)(H4,80,81,84)(H4,82,83,85)/t35?,36-,37+,38?,39+,40+,43-,44+,45-,46-,47+,48-,49-,53-,54+,55+,56?,57?,58?,59?,60?/m0/s1" }, new String[] { "polymeric_metabolites", "[$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))]", "e1_polyketide", "InChI=1S/C37H67NO13/c1-14-25-37(10,45)30(41)20(4)27(39)18(2)16-35(8,44)32(51-34-28(40)24(38(11)12)15-19(3)47-34)21(5)29(22(6)33(43)49-25)50-26-17-36(9,46-13)31(42)23(7)48-26/h18-26,28-32,34,40-42,44-45H,14-17H2,1-13H3/t18-,19-,20+,21+,22-,23+,24+,25-,26+,28-,29+,30-,31+,32-,34+,35-,36-,37-/m1/s1" }, new String[] { "polymeric_metabolites", "[$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))]", "e2_polyketide", "InChI=1S/C37H67NO13/c1-14-25-37(10,45)30(41)20(4)27(39)18(2)16-35(8,44)32(51-34-28(40)24(38(11)12)15-19(3)47-34)21(5)29(22(6)33(43)49-25)50-26-17-36(9,46-13)31(42)23(7)48-26/h18-26,28-32,34,40-42,44-45H,14-17H2,1-13H3/t18-,19-,20+,21+,22-,23+,24+,25-,26+,28-,29+,30-,31+,32-,34+,35-,36-,37-/m1/s1" }, new String[] { "polymeric_metabolites", "[$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))]", "e3_polyketide", "InChI=1S/C37H67NO13/c1-14-25-37(10,45)30(41)20(4)27(39)18(2)16-35(8,44)32(51-34-28(40)24(38(11)12)15-19(3)47-34)21(5)29(22(6)33(43)49-25)50-26-17-36(9,46-13)31(42)23(7)48-26/h18-26,28-32,34,40-42,44-45H,14-17H2,1-13H3/t18-,19-,20+,21+,22-,23+,24+,25-,26+,28-,29+,30-,31+,32-,34+,35-,36-,37-/m1/s1" }, new String[] { "polymeric_metabolites", "[$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))]", "e4_polyketide", "InChI=1S/C37H67NO13/c1-14-25-37(10,45)30(41)20(4)27(39)18(2)16-35(8,44)32(51-34-28(40)24(38(11)12)15-19(3)47-34)21(5)29(22(6)33(43)49-25)50-26-17-36(9,46-13)31(42)23(7)48-26/h18-26,28-32,34,40-42,44-45H,14-17H2,1-13H3/t18-,19-,20+,21+,22-,23+,24+,25-,26+,28-,29+,30-,31+,32-,34+,35-,36-,37-/m1/s1" }, new String[] { "polymeric_metabolites", "[$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))]", "e5_polyketide", "InChI=1S/C37H67NO13/c1-14-25-37(10,45)30(41)20(4)27(39)18(2)16-35(8,44)32(51-34-28(40)24(38(11)12)15-19(3)47-34)21(5)29(22(6)33(43)49-25)50-26-17-36(9,46-13)31(42)23(7)48-26/h18-26,28-32,34,40-42,44-45H,14-17H2,1-13H3/t18-,19-,20+,21+,22-,23+,24+,25-,26+,28-,29+,30-,31+,32-,34+,35-,36-,37-/m1/s1" }, new String[] { "polymeric_metabolites", "[$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))][$(*C([C,N,O,F,Cl,H])C(=O)),$(*C([C,N,O,F,Cl,H])C([H])(O([H,C]))),$(*C([C,N,O,F,Cl,H])=C),$(*C([H])([C,N,O,F,Cl,H])C([H]))]", "e6_polyketide", "InChI=1S/C37H67NO13/c1-14-25-37(10,45)30(41)20(4)27(39)18(2)16-35(8,44)32(51-34-28(40)24(38(11)12)15-19(3)47-34)21(5)29(22(6)33(43)49-25)50-26-17-36(9,46-13)31(42)23(7)48-26/h18-26,28-32,34,40-42,44-45H,14-17H2,1-13H3/t18-,19-,20+,21+,22-,23+,24+,25-,26+,28-,29+,30-,31+,32-,34+,35-,36-,37-/m1/s1" }, new String[] { "rare_atoms", "P([O])([O])([O])=O", "phosphates", "InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1" }, new String[] { "rare_atoms", "[#2]", "helium_atom", "" }, new String[] { "rare_atoms", "[#3]", "lithium_atom", "InChI=1S/ClH.Li/h1H;/q;+1/p-1" }, new String[] { "rare_atoms", "[#4]", "berylium_atom", "" }, new String[] { "rare_atoms", "[#5]", "boron_atom", "" }, new String[] { "rare_atoms", "[#9]", "fluorine_atom", "" }, new String[] { "rare_atoms", "[#10]", "neon_atom", "" }, new String[] { "rare_atoms", "[#11]", "sodium_atom", "" }, new String[] { "rare_atoms", "[#12]", "magnesium_atom", "" }, new String[] { "rare_atoms", "[#13]", "aluminum_atom", "" }, new String[] { "rare_atoms", "[#14]", "silicon_atom", "InChI=1S/C10H30O5Si5/c1-16(2)11-17(3,4)13-19(7,8)15-20(9,10)14-18(5,6)12-16/h1-10H3" }, new String[] { "rare_atoms", "[#15]", "phosphorus_atom", "" }, new String[] { "rare_atoms", "[#17]", "chlorine_atom", "" }, new String[] { "rare_atoms", "[#18]", "argon_atom", "" }, new String[] { "rare_atoms", "[#19]", "potassium_atom", "" }, new String[] { "rare_atoms", "[#20]", "calcium_atom", "" }, new String[] { "rare_atoms", "[#21]", "sc_atom", "" }, new String[] { "rare_atoms", "[#22]", "titanium_atom", "" }, new String[] { "rare_atoms", "[#23]", "vanadium_atom", "" }, new String[] { "rare_atoms", "[#24]", "chromium_atom", "" }, new String[] { "rare_atoms", "[#25]", "manganese_atom", "" }, new String[] { "rare_atoms", "[#26]", "iron_atom", "" }, new String[] { "rare_atoms", "[#27]", "cobalt_atom", "" }, new String[] { "rare_atoms", "[#28]", "nickel_atom", "" }, new String[] { "rare_atoms", "[#29]", "copper_atom", "" }, new String[] { "rare_atoms", "[#30]", "zinc_atom", "" }, new String[] { "rare_atoms", "[#31]", "ga_atom", "" }, new String[] { "rare_atoms", "[#32]", "germanium_atom", "" }, new String[] { "rare_atoms", "[#33]", "as_atom", "" }, new String[] { "rare_atoms", "[#34]", "selenium_atom", "" }, new String[] { "rare_atoms", "[#35]", "bromine_atom", "InChI=1S/CH3Br/c1-2/h1H3" }, new String[] { "rare_atoms", "[#36]", "kr_atom", "" }, new String[] { "rare_atoms", "[#37]", "rb_atom", "" }, new String[] { "rare_atoms", "[#38]", "sr_atom", "" }, new String[] { "rare_atoms", "[#39]", "y_atom", "" }, new String[] { "rare_atoms", "[#40]", "zr_atom", "" }, new String[] { "rare_atoms", "[#41]", "nb_atom", "" }, new String[] { "rare_atoms", "[#42]", "mo_atom", "" }, new String[] { "rare_atoms", "[#43]", "tc_atom", "" }, new String[] { "rare_atoms", "[#44]", "ruthenium_atom", "" }, new String[] { "rare_atoms", "[#45]", "rhodium_atom", "" }, new String[] { "rare_atoms", "[#46]", "palladium_atom", "" }, new String[] { "rare_atoms", "[#47]", "silver_atom", "" }, new String[] { "rare_atoms", "[#48]", "cadmium_atom", "" }, new String[] { "rare_atoms", "[#49]", "indium_atom", "" }, new String[] { "rare_atoms", "[#50]", "tin_atom", "" }, new String[] { "rare_atoms", "[#51]", "sb_atom", "" }, new String[] { "rare_atoms", "[#52]", "te_atom", "" }, new String[] { "rare_atoms", "[#53]", "iodine_atom", "" }, new String[] { "rare_atoms", "[#54]", "xe_atom", "" }, new String[] { "rare_atoms", "[#55]", "cesium_atom", "" }, new String[] { "rare_atoms", "[#56]", "barium_atom", "" }, new String[] { "rare_atoms", "[#57]", "la_atom", "" }, new String[] { "rare_atoms", "[#58]", "cesium_atom", "" }, new String[] { "rare_atoms", "[#59]", "pr_atom", "" }, new String[] { "rare_atoms", "[#60]", "nd_atom", "" }, new String[] { "rare_atoms", "[#61]", "pm_atom", "" }, new String[] { "rare_atoms", "[#62]", "sm_atom", "" }, new String[] { "rare_atoms", "[#63]", "eu_atom", "" }, new String[] { "rare_atoms", "[#64]", "gd_atom", "" }, new String[] { "rare_atoms", "[#65]", "tb_atom", "" }, new String[] { "rare_atoms", "[#66]", "dy_atom", "" }, new String[] { "rare_atoms", "[#67]", "ho_atom", "" }, new String[] { "rare_atoms", "[#68]", "er_atom", "" }, new String[] { "rare_atoms", "[#69]", "tm_atom", "" }, new String[] { "rare_atoms", "[#70]", "yb_atom", "" }, new String[] { "rare_atoms", "[#71]", "lu_atom", "" }, new String[] { "rare_atoms", "[#72]", "hf_atom", "" }, new String[] { "rare_atoms", "[#73]", "ta_atom", "" }, new String[] { "rare_atoms", "[#74]", "w_atom", "" }, new String[] { "rare_atoms", "[#75]", "re_atom", "" }, new String[] { "rare_atoms", "[#76]", "os_atom", "" }, new String[] { "rare_atoms", "[#77]", "iridum_atom", "" }, new String[] { "rare_atoms", "[#78]", "platinum_atom", "" }, new String[] { "rare_atoms", "[#79]", "gold_atom", "" }, new String[] { "rare_atoms", "[#80]", "mercury_atom", "" }, new String[] { "rare_atoms", "[#81]", "tl_atom", "" }, new String[] { "rare_atoms", "[#82]", "lead_atom", "" }, new String[] { "rare_atoms", "[#83]", "bl_atom", "" }, new String[] { "rare_atoms", "[#84]", "po_atom", "" }, new String[] { "rare_atoms", "[#85]", "at_atom", "" }, new String[] { "rare_atoms", "[#86]", "rn_atom", "" }, new String[] { "alkaloids", "N1(C)[C]2CC[C]1CCC2", "tropane", "InChI=1S/C17H21NO4/c1-18-12-8-9-13(18)15(17(20)21-2)14(10-12)22-16(19)11-6-4-3-5-7-11/h3-7,12-15H,8-10H2,1-2H3/t12-,13+,14-,15+/m0/s1" }, // new String[] { "alkaloids", "N12CCCC1CCC2", "pyrrolizidine", "InChI=1S/C8H13NO2/c10-5-6-1-3-9-4-2-7(11)8(6)9/h1,7-8,10-11H,2-5H2/t7-,8-/m1/s1" }, new String[] { "alkaloids", "C1CCNCC1", "piperidine", "InChI=1S/C10H14N2/c1-2-7-12-10(5-1)9-4-3-6-11-8-9/h3-4,6,8,10,12H,1-2,5,7H2" }, new String[] { "alkaloids", "C1CCN2CCCCC2C1", "quinolizidine", "InChI=1S/C15H24N2O2/c18-12-4-5-16-8-10-6-11(14(16)7-12)9-17-13(10)2-1-3-15(17)19/h10-14,18H,1-9H2/t10-,11-,12+,13-,14-/m1/s1" }, new String[] { "alkaloids", "N12CCCC1CCCC2", "indolizidine", "InChI=1S/C8H15NO3/c10-5-2-1-3-9-4-6(11)8(12)7(5)9/h5-8,10-12H,1-4H2/t5-,6-,7-,8-/m1/s1" }, new String[] { "alkaloids", "n1cccc1", "pyrrole", "InChI=1S/C5H7N/c1-5-3-2-4-6-5/h2-4,6H,1H3" }, new String[] { "alkaloids", "n1ccccc1", "pyridine", "InChI=1S/C5H4BrN/c6-5-2-1-3-7-4-5/h1-4H" }, // new String[] { "alkaloids", "C1(C=NC=C2)=C2C=CC=C1", "Isoquinoline", "InChI=1S/C10H6N2/c11-5-9-7-12-6-8-3-1-2-4-10(8)9/h1-4,6-7H" }, // new String[] { "alkaloids", "C1=COC=N1", "oxazole", "InChI=1S/C8H6ClNO/c1-5-10-7-4-6(9)2-3-8(7)11-5/h2-4H,1H3" }, new String[] { "alkaloids", "n1occc1", "isoxazole", "InChI=1S/C18H22N2O2.ClH/c1-2-14(13-20-10-6-7-11-20)18(21)17-12-16(19-22-17)15-8-4-3-5-9-15;/h3-5,8-9,12,14H,2,6-7,10-11,13H2,1H3;1H" }, new String[] { "alkaloids", "n1ccsc1", "thiazole", "InChI=1S/C4H3NOS/c6-2-4-1-5-3-7-4/h1-3H" }, // new String[] { "alkaloids", "c1ccc2c(c1)cncn2", "quinazoline", "InChI=1S/C8H8ClN5/c9-6-3(10)1-2-4-5(6)7(11)14-8(12)13-4/h1-2H,10H2,(H4,11,12,13,14)" }, new String[] { "alkaloids", "c1nc2ccccc2(nc1)", "quinoxaline", "InChI=1S/C9H5N3/c10-5-7-6-11-8-3-1-2-4-9(8)12-7/h1-4,6H" }, // new String[] { "alkaloids", "n1c3c(cc2c1cccc2)cccc3", "acridine", "InChI=1S/C14H11ClN2O/c1-18-9-3-5-12-11(7-9)14(16)10-4-2-8(15)6-13(10)17-12/h2-7H,1H3,(H2,16,17)" }, new String[] { "alkaloids", "n1cccc2ccccc12", "quinoline", "InChI=1S/C18H11NO2/c20-17-12-6-2-3-7-13(12)18(21)16(17)15-10-9-11-5-1-4-8-14(11)19-15/h1-10,16H" }, // new String[] { "alkaloids", "C12=C(C=CN2)C=CC=C1", "indole", "InChI=1S/C8H6BrN/c9-7-2-1-6-3-4-10-8(6)5-7/h1-5,10H" }, // new String[] { "alkaloids", "c1cncN1", "imidazole", "InChI=1S/C7H7N3/c8-7-9-5-3-1-2-4-6(5)10-7/h1-4H,(H3,8,9,10)" }, // new String[] { "alkaloids", "c1c2c(nc[nH]2)ncn1", "purine", "InChI=1S/C10H13N5/c1-7(2)3-4-11-9-8-10(13-5-12-8)15-6-14-9/h3,5-6H,4H2,1-2H3,(H2,11,12,13,14,15)" }, new String[] { "alkaloids", "C3C1=CCC=CC=C1c2c(cccc2)CC3", "colchicine_skeleton", "InChI=1S/C22H25NO6/c1-12(24)23-16-8-6-13-10-19(27-3)21(28-4)22(29-5)20(13)14-7-9-18(26-2)17(25)11-15(14)16/h7,9-11,16H,6,8H2,1-5H3,(H,23,24)/t16-/m0/s1" }, new String[] { "alkaloids", "c1ccc(cc1)CN", "benzylamine", "InChI=1S/C7H9N/c8-6-7-4-2-1-3-5-7/h1-5H,6,8H2" }, new String[] { "alkaloids", "NCCCCN", "putrescine", "InChI=1S/C9H19N3O3/c10-5-1-2-6-12-7(9(14)15)3-4-8(11)13/h7,12H,1-6,10H2,(H2,11,13)(H,14,15)/t7-/m0/s1" }, new String[] { "alkaloids", "NCCCCCN", "cadaverine", "InChI=1S/C5H14N2/c6-4-2-1-3-5-7/h1-7H2" }, // new String[] { "alkaloids", "C1CCC(NC1)C2=COC=C2", "nuphar", "InChI=1S/C15H25NO2/c1-11-4-5-14(12-7-9-18-10-12)16-13(11)6-8-15(2,3)17/h7,9-11,13-14,16-17H,4-6,8H2,1-3H3/t11-,13+,14+/m1/s1" }, // new String[] { "alkaloids", "c1ccc2c(c1)CCN3CCC4(CCCCC234)", "erithrinins", "InChI=1S/C18H21NO3/c1-21-16-9-12-5-7-19-8-6-13-3-4-14(20)11-18(13,19)15(12)10-17(16)22-2/h3-4,6,9-10,14,20H,5,7-8,11H2,1-2H3/t14-,18-/m0/s1" }, new String[] { "alkaloids", "O=C1OCc2ccccc12", "phthalides", "InChI=1S/C12H13NO2/c1-13-8-4-7-12(13)10-6-3-2-5-9(10)11(14)15-12/h2-3,5-6H,4,7-8H2,1H3" }, // new String[] { "alkaloids", "C1=C2C=COC2=NC3=CC=CC=C31", "furoquinoline", "InChI=1S/C12H9NO2/c1-14-11-8-4-2-3-5-10(8)13-12-9(11)6-7-15-12/h2-7H,1H3" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c]1", "cyclopropane", "" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c][C,c]1", "cyclobutane", "InChI=1S/C4H8/c1-2-4-3-1/h1-4H2" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c][C,c][C,c]1", "cyclopentane", "" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c][C,c][C,c][C,c]1", "cyclohexane", "" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c][C,c][C,c][C,c][C,c]1", "cycloheptane", "" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c][C,c][C,c][C,c][C,c][C,c]1", "cyclooctane", "" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c][C,c][C,c][C,c][C,c][C,c][C,c]1", "cyclononane", "" }, new String[] { "carbon_rings", "[C,c]1[C,c][C,c][C,c][C,c][C,c][C,c][C,c][C,c][C,c]1", "cyclodecane", "" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cyclopropane", "" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cyclobutane", "InChI=1S/C4H8/c1-2-4-3-1/h1-4H2" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cyclopentane", "" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cyclohexane", "" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cycloheptane", "" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cyclooctane", "" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cyclononane", "" }, new String[] { "hetero_rings", "[c,C,n,N,o,O,s,S,p,P]1[c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P][c,C,n,N,o,O,s,S,p,P]1", "hetero_cyclodecane", "" }, new String[] { "terpenoids", "CCC(C)C", "isopentenyl", "" }, new String[] { "terpenoids", "CCC(C)CCCC(C)C", "geranyl", "" }, new String[] { "terpenoids", "CCC(C)CCCC(C)CCCC(C)C", "farnesyl", "" }, new String[] { "terpenoids", "CCC(C)CCCC(C)CCCC(C)CCCC(C)C", "geranyl_geranyl", "" }, new String[] { "terpenoids", "CC(C)CCCC(C)CCCC(C)CCCCC(C)CCCC(C)CCCC(C)C", "squalenes", "" }, new String[] { "terpenoids", "C1CC1(C)C", "one_three_isopentenyl", "" }, new String[] { "terpenoids", "C1CC(C1)C", "one_four_isopentenyl", "" }, new String[] { "terpenoids", "C1CC(C)C1", "one_five_isopentenyl", "" }, new String[] { "terpenoids", "CC1C(C1)C", "two_four_isopentenyl", "" }, new String[] { "terpenoids", "CC1C(C)C1", "two_five_isopentenyl", "" }, new String[] { "terpenoids", "C1CC(C)CC1CC(C)C", "one_one_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCC1C(C)C", "one_two_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC1(C)C", "one_three_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C1)C", "one_four_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)C1", "one_five_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CC1CC(C)C", "two_one_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCC1C(C)C", "two_two_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC1(C)C", "two_three_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C1)C", "two_four_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)C1", "two_five_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CC1CC(C)C", "three_one_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCC1C(C)C", "three_two_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC1(C)C", "three_three_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C1)C", "three_four_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)C1", "three_five_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CC1CC(C)C", "four_one_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCC1C(C)C", "four_two_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC1(C)C", "four_three_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C1)C", "four_four_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)C1", "four_five_geranyl", "" }, // new String[] { "terpenoids", "CCC(C)C1C1CC(C)C", "five_one_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CC1C(C)C", "five_two_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC1(C)C", "five_three_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C1)C", "five_five_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)C1", "five_five_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CC1CC(C)C", "one_one_farnesyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCC1C(C)C", "one_two_farnesyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC1(C)C", "one_three_farnesyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC(C1)C", "one_four_farnesyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC(C)C1", "one_five_farnesyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CC1CC(C)C", "two_one_farnesyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCC1C(C)C", "two_two_farnesyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC1(C)C", "two_three_farnesyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC(C1)C", "two_four_farnesyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC(C)C1", "two_five_farnesyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CC1CC(C)C", "three_one_farnesyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCC1C(C)C", "three_two_farnesyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC1(C)C", "three_three_farnesyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC(C1)C", "three_four_farnesyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC(C)C1", "three_five_farnesyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CC1CC(C)C", "four_one_farnesyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCC1C(C)C", "four_two_farnesyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC1(C)C", "four_three_farnesyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC(C1)C", "four_four_farnesyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC(C)C1", "four_five_farnesyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CC1CC(C)C", "five_one_farnesyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCC1C(C)C", "five_two_farnesyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC1(C)C", "five_three_farnesyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC(C1)C", "five_five_farnesyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC(C)C1", "five_five_farnesyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC(C)CC1CC(C)C", "one_one_geranyl_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC(C)CCC1C(C)C", "one_two_geranyl_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC(C)CCCC1(C)C", "one_three_geranyl_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC(C)CCCC(C1)C", "one_four_geranyl_geranyl", "" }, new String[] { "terpenoids", "C1CC(C)CCCC(C)CCCC(C)CCCC(C)C1", "one_five_geranyl_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC(C)CC1CC(C)C", "two_one_geranyl_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC(C)CCC1C(C)C", "two_two_geranyl_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC(C)CCCC1(C)C", "two_three_geranyl_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC(C)CCCC(C1)C", "two_four_geranyl_geranyl", "" }, new String[] { "terpenoids", "CC1C(C)CCCC(C)CCCC(C)CCCC(C)C1", "two_five_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC(C)CC1CC(C)C", "three_one_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC(C)CCC1C(C)C", "three_two_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC(C)CCCC1(C)C", "three_three_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC(C)CCCC(C1)C", "three_four_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC1(C)CCCC(C)CCCC(C)CCCC(C)C1", "three_five_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC(C)CC1CC(C)C", "four_one_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC(C)CCC1C(C)C", "four_two_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC(C)CCCC1(C)C", "four_three_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC(C)CCCC(C1)C", "four_four_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C1)CCCC(C)CCCC(C)CCCC(C)C1", "four_five_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC(C)CC1CC(C)C", "five_one_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC(C)CCC1C(C)C", "five_two_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC(C)CCCC1(C)C", "five_three_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC(C)CCCC(C1)C", "five_four_geranyl_geranyl", "" }, new String[] { "terpenoids", "CCC(C)C1CCC(C)CCCC(C)CCCC(C)C1", "five_five_geranyl_geranyl", "" }, // Nucleotide inhibitor patterns // See Anderson email Subject "Nucleotide inhibitors" Data "Oct 16 2014" // new String[] { "nucleotide_inhibitors", "[N,n]C2*C(C*)C(*)C2(*)", "nucleotide1", // "InChI=1S/C10H13N5O4/c1-5-3-15(10(18)12-9(5)17)8-2-6(13-14-11)7(4-16)19-8/h3,6-8,16H,2,4H2,1H3,(H,12,17,18)/t6-,7+,8+/m0/s1" }, // new String[] { "nucleotide_inhibitors", "[N,n,C,c]C2*C(C*)C(*)C2(*)", "nucleotide2", // "InChI=1S/C10H13N5O4/c1-5-3-15(10(18)12-9(5)17)8-2-6(13-14-11)7(4-16)19-8/h3,6-8,16H,2,4H2,1H3,(H,12,17,18)/t6-,7+,8+/m0/s1" }, // new String[] { "nucleotide_inhibitors", "OCC[N,n,C,c,S,s]1[N,n,C,c,S,s][N,n,C,c,S,s][N,n,C,c,S,s][N,n,C,c,S,s][N,n,C,c,S,s]1", "nucleotide3", // "InChI=1S/C27H52N3O7P/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-19-35-20-16-21-37-38(33,34)24-36-25(23-31)22-30-18-17-26(28)29-27(30)32/h17-18,25,31H,2-16,19-24H2,1H3,(H,33,34)(H2,28,29,32)/t25-/m0/s1" }, new String[] { "nucleotide_inhibitors", "OCC[N,n,C,c,S,s]1[N,n,C,c,S,s][N,n,C,c,S,s][N,n,C,c,S,s][N,n,C,c,S,s]1", "nucleotide4", "InChI=1S/C9H14N5O4P/c1-6(18-5-19(15,16)17)2-14-4-13-7-8(10)11-3-12-9(7)14/h3-4,6H,2,5H2,1H3,(H2,10,11,12)(H2,15,16,17)/t6-/m1/s1" }, // new String[] { "nucleotide_inhibitors", "O=C1N=C(N)C=CN1C", "nucleotide5c", // "InChI=1S/C8H11N3O3S/c9-5-1-2-11(8(13)10-5)6-4-15-7(3-12)14-6/h1-2,6-7,12H,3-4H2,(H2,9,10,13)/t6-,7+/m1/s1" }, new String[] { "nucleotide_inhibitors", "n2cnc1c(ncn1C)c2N", "nucleotide5a", "InChI=1S/C10H13N5O4/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(18)6(17)4(1-16)19-10/h2-4,6-7,10,16-18H,1H2,(H2,11,12,13)/t4-,6-,7+,10-/m1/s1" }, // new String[] { "nucleotide_inhibitors", "O=C1NC(=O)N(C=C1[C,H])C", "nucleotide5ut", // "InChI=1S/C10H13FN2O4/c1-5-3-13(10(16)12-9(5)15)8-2-6(11)7(4-14)17-8/h3,6-8,14H,2,4H2,1H3,(H,12,15,16)/t6-,7+,8+/m0/s1" }, // new String[] { "nucleotide_inhibitors", "O=C2NC(=Nc1c2(ncn1C))N", "nucleotide5g", // "InChI=1S/C9H13N5O4/c10-9-12-7-6(8(17)13-9)11-3-14(7)4-18-5(1-15)2-16/h3,5,15-16H,1-2,4H2,(H3,10,12,13,17)" }, new String[] { "nucleotide_inhibitors", "OCC3OC(n2cnc1c(ncnc12)N([H])([H]))C([O,H])C3(O)", "adenosines", "InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1" }, // new String[] { "nucleotide_inhibitors", "O=C1C=CN(C(=O)N1)C2OC(CO)C(O)C2([O,H])", "uridines", // "InChI=1S/C9H12N2O6.2H3O4P/c12-3-4-6(14)7(15)8(17-4)11-2-1-5(13)10-9(11)16;2*1-5(2,3)4/h1-2,4,6-8,12,14-15H,3H2,(H,10,13,16);2*(H3,1,2,3,4)/t4-,6-,7-,8-;;/m1../s1" }, // new String[] { "nucleotide_inhibitors", "O=C3NC(=Nc1c3(ncn1C2OC(CO)C(O)C2([O,H])))N([H])([H])", "guanosines", // "InChI=1S/C10H13N5O5/c11-10-13-7-4(8(19)14-10)12-2-15(7)9-6(18)5(17)3(1-16)20-9/h2-3,5-6,9,16-18H,1H2,(H3,11,13,14,19)/t3-,5-,6-,9-/m1/s1" }, // new String[] { "nucleotide_inhibitors", "O=C1N=C(N([H])([H]))C=CN1C2OC(CO)C(O)C2([O,H])", "cytosines", // "InChI=1S/C9H13N3O5.3H3O4P/c10-5-1-2-12(9(16)11-5)8-7(15)6(14)4(3-13)17-8;3*1-5(2,3)4/h1-2,4,6-8,13-15H,3H2,(H2,10,11,16);3*(H3,1,2,3,4)/t4-,6-,7-,8-;;;/m1.../s1" }, // new String[] { "nucleotide_inhibitors", "CC1=CN(C(=O)NC1=O)C2OC(CO)C(O)C2([O,H])", "thymidines", // "InChI=1S/C10H15N2O8P/c1-5-3-12(10(15)11-9(5)14)8-2-6(13)7(20-8)4-19-21(16,17)18/h3,6-8,13H,2,4H2,1H3,(H,11,14,15)(H2,16,17,18)/p-2" }, }; }