/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.genes;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.erasmusmc.utilities.ReadTextFile;
import org.erasmusmc.utilities.WriteTextFile;
public class SwissProtOrganismFilter {
public static void main(String[] args){
Set<String> taxons = new HashSet<String>();
taxons.add("9031");
filterSwissProt("/home/data/Swiss-Prot/uniprot_sprot.dat", "/home/data/Swiss-Prot/worm.txt", taxons);
}
public static void filterSwissProt(String source, String target, Set<String> taxons){
ReadTextFile in = new ReadTextFile(source);
WriteTextFile out = new WriteTextFile(target);
List<String> record = new ArrayList<String>();
boolean include = false;
for (String line : in){
record.add(line);
if (line.startsWith("OX NCBI_TaxID=")){
String taxID = line.substring(16, line.length()-1);
include = (taxons.contains(taxID));
}
if (line.startsWith("//")){
if (include)
for (String recordLine: record)
out.writeln(recordLine);
record.clear();
include = false;
}
}
out.close();
}
}