/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.genes;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Set;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Ontology;
import org.erasmusmc.ontology.OntologyManager;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.utilities.StringUtilities;
public class ExtractGenePMIDsPerCID {
public static void main(String[] args){
new ExtractGenePMIDsPerCID();
}
public ExtractGenePMIDsPerCID(){
System.out.println(StringUtilities.now()+"\tFetching ontology");
OntologyManager manager = new OntologyManager();
ontology = manager.fetchStoreFromDatabase("Homologene_curated_min3_090107");
System.out.println(StringUtilities.now()+"\tCreating databaseID index");
((OntologyStore) ontology).createIndexForDatabaseIDs();
System.out.println(StringUtilities.now()+"\tProcessing Entrez-Gene PMID file");
processEGFile("C:/Data/Entrez-Genes/gene2pubmed");
System.out.println(StringUtilities.now()+"\tProcessing other PMID files");
//processFile("C:/home/public/Thesauri/GenesNonHuman/PMID2RGD.txt");
//processFile("C:/home/public/Thesauri/GenesNonHuman/PMID2MGD.txt");
System.out.println(StringUtilities.now()+"\tFiltering");
System.out.println("Removed "+cid2PMID.filter(25)+" PMIDs");
System.out.println("Removed "+cid2PMID.removedRefCount+" reference");
System.out.println(StringUtilities.now()+"\tSaving to file");
cid2PMID.saveToFile("/temp/cid2pmid.txt");
}
private void processEGFile(String filename) {
int count = 0;
try {
FileInputStream PSFFile = new FileInputStream(filename);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(PSFFile),1000000);
try {
bufferedReader.readLine(); //skip first line
while (bufferedReader.ready()){
processEGLine(bufferedReader.readLine());
count++;
if (count % 100000 == 0) System.out.println(count);
}
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
}
} catch (FileNotFoundException e){
e.printStackTrace();
}
}
private void processEGLine(String string) {
String[] cols = string.split("\t");
//if (cols[0].equals("9606") || cols[0].equals("10090") ||cols[0].equals("10116")){
DatabaseID databaseID = new DatabaseID("LL", cols[1]);
Set<Integer> cids = ontology.getConceptIDs(databaseID);
if (cids != null)
for (Integer cid : cids)
cid2PMID.put(cid, Integer.parseInt(cols[2]));
//}
}
private void processFile(String filename) {
System.out.println(StringUtilities.now()+"\tNow processing " + filename);
int count = 0;
try {
FileInputStream PSFFile = new FileInputStream(filename);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(PSFFile),1000000);
try {
while (bufferedReader.ready()){
processLine(bufferedReader.readLine());
count++;
if (count % 10000 == 0) System.out.println(count);
}
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
}
} catch (FileNotFoundException e){
e.printStackTrace();
}
}
private void processLine(String string) {
String[] cols = string.split("=");
String[] components = cols[1].split("_");
DatabaseID databaseID = new DatabaseID(components[0], components[1]);
Set<Integer> cids = ontology.getConceptIDs(databaseID);
if (cids != null)
for (Integer cid : cids)
cid2PMID.put(cid, Integer.parseInt(cols[0]));
}
private CID2PMID cid2PMID = new CID2PMID();
private Ontology ontology;
}