/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.mechanisminspection; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; public class ErosCorpus implements Iterable<Ero> { private static final Logger LOGGER = LogManager.getFormatterLogger(ErosCorpus.class); private final Class INSTANCE_CLASS_LOADER = getClass(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final String VALIDATION_EROS_FILE_NAME = "validation_eros.json"; private List<Ero> ros; private Map<Integer, Ero> roIdToEroMap; public ErosCorpus(List<Ero> ros) { this.ros = new ArrayList<>(ros); roIdToEroMap = new HashMap<>(); } public ErosCorpus() { this(new ArrayList<>()); } public List<Ero> getRos() { return ros; } public void setRos(List<Ero> eros) { this.ros = eros; } /** * Loads the mechanistic validation RO corpus from the resources directory. * * @throws IOException */ public void loadValidationCorpus() throws IOException { InputStream erosStream = INSTANCE_CLASS_LOADER.getResourceAsStream(VALIDATION_EROS_FILE_NAME); loadCorpus(erosStream); } /** * Loads an RO corpus from the supplied input stream. * * @param erosStream The input stream to load from. * @throws IOException */ public void loadCorpus(InputStream erosStream) throws IOException { ErosCorpus erosCorpus = OBJECT_MAPPER.readValue(erosStream, ErosCorpus.class); setRos(erosCorpus.getRos()); } /** * Get the list of RO ids in this corpus. * * @return The list of Ids. */ public List<Integer> getRoIds() { return ros.stream().map(ro -> ro.getId()).collect(Collectors.toList()); } /** * Filter by arbitrary predicate. * @filter the predicate to match. */ public void filterCorpus(Predicate<Ero> filter) { ros.removeIf(ro -> !filter.test(ro)); } /** * Filter corpus to contain only RO ids in the supplied list. * * @param roIdList The list of relevant ids. */ public void filterCorpusById(List<Integer> roIdList) { Set<Integer> roSet = new HashSet<>(roIdList); filterCorpus(ro -> roSet.contains(ro.getId())); } /** * Filter corpus to contain only RO with IDs in the supplied file. * * @param roIdFile A file containing RO ids, one per line. */ public void filterCorpusByIdFile(File roIdFile) throws IOException { List<Integer> roIds = getRoIdListFromFile(roIdFile); filterCorpusById(roIds); } /** * Builds an ro list from only the ros specified in the given file. * * @param file A file with one ro id per line. * @return List of relevant Eros from the corpus. */ private List<Integer> getRoIdListFromFile(File file) throws IOException { List<Integer> roIdList = new ArrayList<>(); try (BufferedReader eroReader = getErosReader(file)) { String roId; while ((roId = eroReader.readLine()) != null) { String trimmedId = roId.trim(); if (!trimmedId.equals(roId)) { LOGGER.warn("Leading or trailing whitespace found in ro id file."); } if (trimmedId.equals("")) { LOGGER.warn("Blank line detected in ro id file and ignored."); continue; } roIdList.add(Integer.parseInt(trimmedId)); } } return roIdList; } /** * Filter corpus to only contain ROs with the given number of substrates. * * @param count The required number of substrates. */ public void filterCorpusBySubstrateCount(Integer count) { filterCorpus(ro -> ro.getSubstrate_count().equals(count)); } /** * Filter corpus to only contain ROs with the given number of products. * * @param count The required number of products. */ public void filterCorpusByProductCount(Integer count) { filterCorpus(ro -> ro.getProduct_count().equals(count)); } /** * Retain only ROs with a name in this corpus. */ public void retainNamedRos() { ros.removeIf(ro -> ro.getName().isEmpty()); } /** * Gets the ERO with the given roId from the corpus. * * @param roId The ro id. * @return The Ero. */ public Ero getEro(Integer roId) { // If map already has entry for this roId, return it Ero result; if ((result = roIdToEroMap.get(roId)) != null) { return result; } // Otherwise build map in hopes of finding the correct ID along the way for (Ero ro : ros) { roIdToEroMap.put(ro.getId(), ro); } // Now, the ID should be there! if ((result = roIdToEroMap.get(roId)) != null) { return result; } // Now, if the ID is not there, throw an exception! throw new IllegalArgumentException("Supplied RO ID is not in corpus!"); } /** * Gets a reader for the RO ID file. * * @param erosFile A file containing the RO ids, with one RO ID per line. * @return A reader for the list of RO Ids. */ private BufferedReader getErosReader(File erosFile) throws FileNotFoundException { FileInputStream erosInputStream = new FileInputStream(erosFile); BufferedReader erosReader = new BufferedReader(new InputStreamReader(erosInputStream)); return erosReader; } @Override public Iterator<Ero> iterator() { return getRos().iterator(); } }