/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.medline;
/***Usage:
* Use this class if you want to process a series of Medline records. Records can be selected either
* through a file containing a list of PMIDs, or by specifying a year-range.
* Call the iterate method to start retrieving the records, pass an object implementing the MedlineListener
* interface as parameter and it will be called with batches of records.
* Note: uses multi-threading, so expect the next call to be made immediately after the last batch is processed
*/
import java.util.ArrayList;
import java.util.List;
import org.erasmusmc.utilities.StringUtilities;
import org.erasmusmc.utilities.ReadTextFile;
public class MedlineIterator {
public String pmidsFile = "";
public List<Integer> givenPmids = null;
public int beginYear = -1;
public int endYear = -1;
public int batchsize = 1000;
public List<Integer> unretrievedPmids;
public Boolean verbose = true;
public boolean fetchTitleAndAbstract = true;
public boolean fetchMesh = true;
public boolean fetchGeneSymbol = false;
public boolean fetchJournal = false;
public boolean fetchJournalShortForm = false;
public boolean fetchSubstances = false;
public boolean fetchPublicationType = false;
public boolean fetchPublicationDate = false;
public boolean fetchAuthors = false;
public boolean fetchLanguage = false;
public boolean fetchAffiliation = false;
public boolean fetchISSN = false;
private FetchRecordsthread medlineThread;
private MedlineListener listener;
public void iterate(MedlineListener listener) {
this.listener = listener;
if (verbose)
System.out.println("Connecting to Medline server. " + StringUtilities.now());
medlineThread = new FetchRecordsthread();
medlineThread.fetchTitleAndAbstract = fetchTitleAndAbstract;
medlineThread.fetchGeneSymbol = fetchGeneSymbol;
medlineThread.fetchMesh = fetchMesh;
medlineThread.fetchGeneSymbol = fetchGeneSymbol;
medlineThread.fetchJournal = fetchJournal;
medlineThread.fetchJournalShortForm = fetchJournalShortForm;
medlineThread.fetchSubstances = fetchSubstances;
medlineThread.fetchPublicationType = fetchPublicationType;
medlineThread.fetchPublicationDate = fetchPublicationDate;
medlineThread.fetchAuthors = fetchAuthors;
medlineThread.fetchLanguage = fetchLanguage;
medlineThread.fetchAffiliation = fetchAffiliation;
medlineThread.fetchISSN = fetchISSN;
int cycles = 1;
List<Integer> pmids = null;
for (int cycle = 0; cycle < cycles; cycle++) {
if (verbose)
System.out.println("Loading PMIDs. " + StringUtilities.now());
if (givenPmids != null) {
process(givenPmids);
System.out.println("Processed " + givenPmids.size() + " PMIDs");
} else {
// Load from file
ReadTextFile textFile = new ReadTextFile(pmidsFile);
int pmidsPerBatch = 10000;
int counter = 0;
List<String> pmidsString = textFile.loadFromFileInBatches(pmidsPerBatch);
while (pmidsString.size() > 0) {
pmids = new ArrayList<Integer>(pmidsString.size());
counter += pmidsString.size();
System.out.println(counter);
for (String pmid: pmidsString) {
try {
pmids.add(Integer.parseInt(pmid.trim()));
} catch (NumberFormatException n) {
System.out.println("Could not convert \"" + pmid + "\" to int");
}
}
process(pmids);
pmidsString = textFile.loadFromFileInBatches(pmidsPerBatch);
}
if (verbose)
System.out.println("Found " + counter + " PMIDs in file.");
}
if (verbose)
System.out.println("Processing texts. " + StringUtilities.now());
}
unretrievedPmids = medlineThread.unretrievedPmids;
if (verbose)
System.out.println("Could not retrieve " + unretrievedPmids.size() + " pmids from Database");
medlineThread.terminate();
}
private void process(List<Integer> pmids) {
List<MedlineRecord> inputBuffer = new ArrayList<MedlineRecord>();
int offset = 0;
boolean done = false;
boolean fetching = false;
boolean processed = true;
while (!done) {
if (!fetching && offset < pmids.size()) {
medlineThread.pmids.clear();
for (int i = 0; i < batchsize && i + offset < pmids.size(); i++) {
medlineThread.pmids.add(pmids.get(i + offset));
}
offset = offset + medlineThread.pmids.size();
// System.out.println(offset);
medlineThread.proceed();
fetching = true;
}
if (processed) {
if (fetching) {
medlineThread.waitUntilFinished();
inputBuffer = medlineThread.records;
processed = false;
fetching = false;
}
else {
done = true;
}
}
else {
listener.processMedlineRecords(inputBuffer);
processed = true;
}
}
}
}