/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.dataimport.genes; import java.io.BufferedWriter; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import org.erasmusmc.collections.CountingSet; public class CID2PMID extends TreeMap<Integer, Set<Integer>>{ public void put(Integer cid, Integer pmid){ Set<Integer> pmids = get(cid); if (pmids == null){ pmids = new TreeSet<Integer>(); put(cid, pmids); } pmids.add(pmid); } public int filter(int maxCIDsPerPMID){ //Count number of occurrences of PMIDs: CountingSet<Integer> allPMIDs = new CountingSet<Integer>(); for (Set<Integer> pmids : values()) for (Integer pmid : pmids) allPMIDs.add(pmid); //Select PMIDs for removal: Iterator<Map.Entry<Integer, CountingSet.Count>> iterator = allPMIDs.key2count.entrySet().iterator(); while (iterator.hasNext()) if (iterator.next().getValue().count <= maxCIDsPerPMID) iterator.remove(); //Remove PMIDs: removedRefCount = 0; Iterator<Map.Entry<Integer, Set<Integer>>> entryIterator = entrySet().iterator(); while (entryIterator.hasNext()){ Set<Integer> pmids = entryIterator.next().getValue(); Iterator<Integer> refIterator = pmids.iterator(); while (refIterator.hasNext()) if (allPMIDs.contains(refIterator.next())){ refIterator.remove(); removedRefCount++; } if (pmids.size() == 0) entryIterator.remove(); } return allPMIDs.size(); } public int removedRefCount = 0; public void saveToFile(String filename){ try { FileOutputStream PSFFile = new FileOutputStream(filename); BufferedWriter bufferedWrite = new BufferedWriter( new OutputStreamWriter(PSFFile),1000000); try { for (Map.Entry<Integer, Set<Integer>> entry : entrySet()){ StringBuffer line = new StringBuffer(); line.append(entry.getKey()); line.append("\t"); Iterator<Integer> pmidIterator = entry.getValue().iterator(); while (pmidIterator.hasNext()){ line.append(pmidIterator.next()); if (pmidIterator.hasNext()) line.append("\t"); } bufferedWrite.write(line.toString()); bufferedWrite.newLine(); } bufferedWrite.flush(); bufferedWrite.close(); }catch (IOException e) { e.printStackTrace(); } } catch (FileNotFoundException e){ e.printStackTrace(); } } private static final long serialVersionUID = 1L; }