/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.medline;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.erasmusmc.utilities.ReadTextFile;
/**
* Class for retrieving Medline records from our local database using multithreading.
*
* @author schuemie
*
*/
public class MedlineRecordIterator implements Iterator<MedlineRecord> {
private FetchRecordsthread medlineThread;
private Iterator<String> lineIterator;
private Iterator<MedlineRecord> bufferIterator;
private boolean isFetching = false;
public static int batchSize = 1000;
/**
* Some test code. Please ignore
* @param args
*/
public static void main(String[] args){
FetchSettings fetchSettings = new FetchSettings();
fetchSettings.fetchMesh = true;
MedlineRecordIterator iterator = new MedlineRecordIterator("/home/public/PMIDs/Random10.000.PMIDs",fetchSettings);
//MedlineRecordIterator iterator = new MedlineRecordIterator("/home/temp/PMIDsToBeIndexed.txt");
int count = 0;
while (iterator.hasNext()){
count++;
MedlineRecord record = iterator.next();
if (count % 1000 == 0){
System.out.println(count + "\t" + record.title);
for (MeSHHeader header : record.meshHeaders){
if (header.qualifier == null)
System.out.println("- " + header.descriptor + (header.descriptorMajor?"*":""));
else
System.out.println("- " + header.descriptor + (header.descriptorMajor?"*":"") + "/" + header.qualifier + (header.qualifierMajor?"*":""));
}
}
}
System.out.println("Retrieved: " + count);
System.out.println("Unretrieved: " + iterator.getUnretrievedPMIDs().size());
}
/**
* Creates an iterator that iterates over the records with PMIDs specified in the pmid file (one PMID per line).
* Uses the standard fetch settings (only titles and abstracts)
*
* @param pmidFilename
*/
public MedlineRecordIterator(String pmidFilename) {
FetchSettings fetchSettings = new FetchSettings();
init(pmidFilename, fetchSettings);
}
/**
* Creates an iterator that iterates over the records with PMIDs specified in the pmid file (one PMID per line)
* using the fetch settings specified.
*
* @param pmidFilename
* @param fetchSettings
*/
public MedlineRecordIterator(String pmidFilename, FetchSettings fetchSettings) {
init(pmidFilename, fetchSettings);
}
/**
* Returns the PMIDs that could not be retrieved from the database (so far)
*
* @return List of PMIDs
*/
public List<Integer> getUnretrievedPMIDs(){
return medlineThread.unretrievedPmids;
}
private void init(String pmidFilename, FetchSettings fetchSettings) {
ReadTextFile pmidFile = new ReadTextFile(pmidFilename);
lineIterator = pmidFile.iterator();
medlineThread = new FetchRecordsthread();
medlineThread.fetchTitleAndAbstract = fetchSettings.fetchTitleAndAbstract;
medlineThread.fetchGeneSymbol = fetchSettings.fetchGeneSymbol;
medlineThread.fetchMesh = fetchSettings.fetchMesh;
medlineThread.fetchGeneSymbol = fetchSettings.fetchGeneSymbol;
medlineThread.fetchJournal = fetchSettings.fetchJournal;
medlineThread.fetchJournalShortForm = fetchSettings.fetchJournalShortForm;
medlineThread.fetchSubstances = fetchSettings.fetchSubstances;
medlineThread.fetchPublicationType = fetchSettings.fetchPublicationType;
medlineThread.fetchPublicationDate = fetchSettings.fetchPublicationDate;
medlineThread.fetchAuthors = fetchSettings.fetchAuthors;
medlineThread.fetchLanguage = fetchSettings.fetchLanguage;
medlineThread.fetchAffiliation = fetchSettings.fetchAffiliation;
medlineThread.fetchISSN = fetchSettings.fetchISSN;
medlineThread.fetchVolumeIssuePages = fetchSettings.fetchVolumeIssuePages;
startFetch();
do {
copyFetchedToBuffer();
startFetch();
} while (!bufferIterator.hasNext() && isFetching);
}
private void copyFetchedToBuffer() {
if (isFetching)
medlineThread.waitUntilFinished();
isFetching = false;
bufferIterator = medlineThread.records.iterator();
}
private void startFetch() {
List<Integer> pmids = new ArrayList<Integer>(batchSize);
while (lineIterator.hasNext() && pmids.size() < batchSize){
String pmid = lineIterator.next();
pmids.add(Integer.parseInt(pmid));
}
if (pmids.size() == 0) {
isFetching = false;
medlineThread.terminate();
} else {
medlineThread.pmids = pmids;
medlineThread.proceed();
isFetching = true;
}
}
@Override
public boolean hasNext() {
return (bufferIterator.hasNext());
}
@Override
public MedlineRecord next() {
MedlineRecord next = bufferIterator.next();
while (!bufferIterator.hasNext() && isFetching){
copyFetchedToBuffer();
startFetch();
}
return next;
}
@Override
public void remove() {
System.err.println("Calling unimplemented method remove() in class " + this.getClass().getCanonicalName());
}
public static class FetchSettings {
public boolean fetchTitleAndAbstract = true;
public boolean fetchMesh = false;
public boolean fetchGeneSymbol = false;
public boolean fetchJournal = false;
public boolean fetchJournalShortForm = false;
public boolean fetchSubstances = false;
public boolean fetchPublicationType = false;
public boolean fetchPublicationDate = false;
public boolean fetchAuthors = false;
public boolean fetchLanguage = false;
public boolean fetchAffiliation = false;
public boolean fetchISSN = false;
public boolean fetchVolumeIssuePages = false;
}
}