/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.lcms.db.io.parser; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ConstructAnalysisFileParser { public static final String PRODUCT_KIND_SEPARATOR = "\t"; public static final Pattern CONSTRUCT_DESIGNATOR_PATTERN = Pattern.compile("^>(.*)$"); public static final String INTERMEDIATE_PRODUCT_DESIGNATOR = "INTERMEDIATE"; private List<Pair<String, List<ConstructAssociatedChemical>>> constructProducts = new ArrayList<>(); public void parse(File inFile) throws IOException { try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) { String line; String constructId = null; List<ConstructAssociatedChemical> products = null; while ((line = reader.readLine()) != null) { Matcher matcher = CONSTRUCT_DESIGNATOR_PATTERN.matcher(line); if (matcher.matches()) { if (constructId != null) { handleConstructProductsList(constructId, products); } constructId = matcher.group(1).trim(); products = new ArrayList<>(); } else { if (constructId == null || products == null) { throw new RuntimeException("Found construct product step line without a pre-defined construct"); } String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, PRODUCT_KIND_SEPARATOR); if (fields.length != 2) { System.err.format("Skipping line with unexpected number of fields (%d): %s\n", fields.length, line); continue; } String chemical = fields[0]; String kind = fields[1]; products.add(new ConstructAssociatedChemical(chemical, kind)); } } // Finish processing anything that's left over. if (constructId != null) { handleConstructProductsList(constructId, products); } } } private void handleConstructProductsList(String construct, List<ConstructAssociatedChemical> products) { int step = 0; for (int i = products.size() - 1; i >= 0; i--) { products.get(i).setIndex(step); step++; } constructProducts.add(Pair.of(construct, products)); } public List<Pair<String, List<ConstructAssociatedChemical>>> getConstructProducts() { return constructProducts; } public static class ConstructAssociatedChemical { Integer index; String chemical; String kind; public ConstructAssociatedChemical(String chemical, String kind) { this.chemical = chemical; this.kind = kind; } public ConstructAssociatedChemical(Integer index, String chemical, String kind) { this.index = index; this.chemical = chemical; this.kind = kind; } public Integer getIndex() { return index; } protected void setIndex(Integer index) { this.index = index; } public String getChemical() { return chemical; } public String getKind() { return kind; } } }