/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.twentyn.search.substructure; import chemaxon.formats.MolFormatException; import chemaxon.sss.SearchConstants; import chemaxon.sss.search.MolSearch; import chemaxon.sss.search.MolSearchOptions; import chemaxon.sss.search.SearchException; import chemaxon.struc.Molecule; import chemaxon.util.MolHandler; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Consumer; public class SubstructureSearch { // TODO: are these options sufficient? Are there others we might want to use? /* Chemaxon exposes a very non-uniform means of configuring substructure search. Hence the mess of lambdas below. * Consumer solves the Function<T, void> problem. */ private static final Map<String, Consumer<MolSearchOptions>> SEARCH_OPTION_ENABLERS = Collections.unmodifiableMap(new HashMap<String, Consumer<MolSearchOptions>>() {{ put("CHARGE_MATCHING_EXACT", (so -> so.setChargeMatching(SearchConstants.CHARGE_MATCHING_EXACT))); put("CHARGE_MATCHING_IGNORE", (so -> so.setChargeMatching(SearchConstants.CHARGE_MATCHING_IGNORE))); put("IMPLICIT_H_MATCHING_ENABLED", (so -> so.setImplicitHMatching(SearchConstants.IMPLICIT_H_MATCHING_ENABLED))); put("IMPLICIT_H_MATCHING_DISABLED", (so -> so.setImplicitHMatching(SearchConstants.IMPLICIT_H_MATCHING_DISABLED))); put("IMPLICIT_H_MATCHING_IGNORE", (so -> so.setImplicitHMatching(SearchConstants.IMPLICIT_H_MATCHING_IGNORE))); put("STEREO_EXACT", (so -> so.setStereoSearchType(SearchConstants.STEREO_EXACT))); put("STEREO_IGNORE", (so -> so.setStereoSearchType(SearchConstants.STEREO_IGNORE))); put("STEREO_MODEL_COMPREHENSIVE", (so -> so.setStereoModel(SearchConstants.STEREO_MODEL_COMPREHENSIVE))); put("STEREO_MODEL_GLOBAL", (so -> so.setStereoModel(SearchConstants.STEREO_MODEL_GLOBAL))); put("STEREO_MODEL_LOCAL", (so -> so.setStereoModel(SearchConstants.STEREO_MODEL_LOCAL))); put("TAUTOMER_SEARCH_ON", (so -> so.setTautomerSearch(SearchConstants.TAUTOMER_SEARCH_ON))); put("TAUTOMER_SEARCH_OFF", (so -> so.setTautomerSearch(SearchConstants.TAUTOMER_SEARCH_OFF))); put("TAUTOMER_SEARCH_ON_IGNORE_TAUTOMERSTEREO", (so -> so.setTautomerSearch(SearchConstants.TAUTOMER_SEARCH_ON_IGNORE_TAUTOMERSTEREO))); put("VAGUE_BOND_OFF", (so -> so.setVagueBondLevel(SearchConstants.VAGUE_BOND_OFF))); put("VAGUE_BOND_LEVEL_HALF", (so -> so.setVagueBondLevel(SearchConstants.VAGUE_BOND_LEVEL_HALF))); put("VAGUE_BOND_LEVEL1", (so -> so.setVagueBondLevel(SearchConstants.VAGUE_BOND_LEVEL1))); put("VAGUE_BOND_LEVEL2", (so -> so.setVagueBondLevel(SearchConstants.VAGUE_BOND_LEVEL2))); put("VAGUE_BOND_LEVEL3", (so -> so.setVagueBondLevel(SearchConstants.VAGUE_BOND_LEVEL3))); put("VAGUE_BOND_LEVEL4", (so -> so.setVagueBondLevel(SearchConstants.VAGUE_BOND_LEVEL4))); }}); private static final MolSearchOptions DEFAULT_SEARCH_OPTIONS = new MolSearchOptions(SearchConstants.SUBSTRUCTURE); static { DEFAULT_SEARCH_OPTIONS.setImplicitHMatching(SearchConstants.IMPLICIT_H_MATCHING_DEFAULT); DEFAULT_SEARCH_OPTIONS.setVagueBondLevel(SearchConstants.VAGUE_BOND_DEFAULT); DEFAULT_SEARCH_OPTIONS.setTautomerSearch(SearchConstants.TAUTOMER_SEARCH_DEFAULT); DEFAULT_SEARCH_OPTIONS.setStereoSearchType(SearchConstants.STEREO_IGNORE); // TODO: is this preferable? } public SubstructureSearch() { } public MolSearch constructSearch(String smiles, List<String> extraOpts) throws MolFormatException { // Process any custom options. MolSearchOptions searchOptions; if (extraOpts == null || extraOpts.size() == 0) { searchOptions = DEFAULT_SEARCH_OPTIONS; } else { searchOptions = new MolSearchOptions(SearchConstants.SUBSTRUCTURE); // Apply all the specified extra search options using the key -> function mapping above. for (String opt : extraOpts) { if (!SEARCH_OPTION_ENABLERS.containsKey(opt)) { throw new IllegalArgumentException(String.format("Unrecognized search option: %s", opt)); } SEARCH_OPTION_ENABLERS.get(opt).accept(searchOptions); } } // Import the query and set it + the specified or default search options. MolSearch ms = new MolSearch(); ms.setSearchOptions(searchOptions); Molecule query = new MolHandler(smiles, true).getMolecule(); ms.setQuery(query); return ms; } public boolean matchSubstructure(Molecule target, MolSearch search) throws SearchException { search.setTarget(target); /* hits are arrays of atom ids in the target that matched the query. If multiple sites in the target matched, * then there should be multiple arrays of atom ids (but we don't care since we're just looking for any match). */ int[][] hits = search.findAll(); if (hits != null) { for (int i = 0; i < hits.length; i++) { if (hits[i].length > 0) { return true; } } } return false; } }