/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package act.installer.genbank; import java.io.FileInputStream; import java.io.FileReader; import java.io.BufferedReader; import java.io.File; import java.io.FilenameFilter; import java.io.FileNotFoundException; import java.util.Set; import java.util.HashSet; import java.util.List; import java.util.ArrayList; import org.json.XML; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONArray; import act.shared.Seq; import act.installer.sequence.SequenceEntry; import act.server.MongoDB; public class Genbank { String sourceDir; HashSet<SequenceEntry> entries; public Genbank(String dir) { this.sourceDir = dir; this.entries = new HashSet<SequenceEntry>(); } public void process(int start, int end) { List<String> files = getDataFileNames(this.sourceDir); files = files.subList(start, end); process(files); } // process only the source file whose names are passed public void process(List<String> files) { for (String file : files) { String fname = this.sourceDir + "/" + file; this.entries.addAll(readEntries(fname)); } } public void sendToDB(MongoDB db) { for (SequenceEntry e : this.entries) e.writeToDB(db, Seq.AccDB.swissprot); } private Set<SequenceEntry> readEntries(String file) { Set<SequenceEntry> extracted = new HashSet<SequenceEntry>(); try { extracted.addAll(GenbankEntry.parsePossiblyMany(file)); } catch (Exception e) { System.err.println("Err reading: " + file + ". Abort."); System.exit(-1); } return extracted; } public static List<String> getDataFileNames(String dir) { FilenameFilter subdirfltr = new FilenameFilter() { public boolean accept(File dir, String sd) { return new File(dir, sd).isDirectory(); } }; FilenameFilter seqfltr = new FilenameFilter() { public boolean accept(File dir, String nm) { return nm.endsWith(".seq"); } }; List<String> all = new ArrayList<String>(); for (String subdir : new File(dir).list(subdirfltr)) { for (String seqfile : new File(dir, subdir).list(seqfltr)) { all.add(subdir + "/" + seqfile); } } return all; } }