/* * JBoss, Home of Professional Open Source * Copyright 2009 Red Hat Inc. and/or its affiliates and other * contributors as indicated by the @author tags. All rights reserved. * See the copyright.txt in the distribution for a full listing of * individual contributors. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.infinispan.ec2demo; import com.martiansoftware.jsap.JSAPResult; import org.infinispan.Cache; import org.infinispan.remoting.transport.Address; import org.infinispan.util.LegacyKeySupportSystemProperties; import org.infinispan.util.logging.Log; import org.infinispan.util.logging.LogFactory; import org.xml.sax.SAXException; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Random; /** * @author noconnor@redhat.com * */ public class InfluenzaDataLoader { private CacheBuilder cbuilder; private Cache<String, Influenza_N_P_CR_Element> influenzaCache; private Cache<String, Nucleotide_Protein_Element> proteinCache; private Cache<String, Nucleotide_Protein_Element> nucleiodCache; private Nucleotide_Protein_Parser npParser; private Influenza_Parser iParser; private static final Log log = LogFactory.getLog(InfluenzaDataLoader.class); public void createCache(String configFile) throws IOException { String cfgFileName = LegacyKeySupportSystemProperties.getProperty("infinispan.configuration", "infinispan.demo.cfg"); if (cfgFileName == null) cfgFileName = configFile; cbuilder = new CacheBuilder(cfgFileName); influenzaCache = cbuilder.getCacheManager().getCache("InfluenzaCache"); proteinCache = cbuilder.getCacheManager().getCache("ProteinCache"); nucleiodCache = cbuilder.getCacheManager().getCache("NucleotideCache"); } /** * @param config * @throws SAXException */ public void populateCache(JSAPResult config) throws SAXException { try { npParser = new Nucleotide_Protein_Parser(); iParser = new Influenza_Parser(); System.out.println("Caches created....Starting CacheManager"); cbuilder.getCacheManager().start(); int loadLimit = config.getInt("count"); // Dump the cluster list List<Address> z = cbuilder.getCacheManager().getMembers(); for (Address k : z) if (k != null) System.out.println("Cache Address=" + k.toString()); System.out.println("Parsing files...."); if (config.getString("ifile") != null) { log.info("Parsing Influenza data"); List<Influenza_N_P_CR_Element> iList = iParser.parseFile(config.getString("ifile")); boolean rQuery = config.getBoolean("randomquery"); int lSize = iList.size() - 1; if (rQuery) { System.out.println("Performing random queries"); Random randomGenerator = new Random(); while (true) { int currRec = randomGenerator.nextInt(lSize); Influenza_N_P_CR_Element curreElem = iList.get(currRec); this.searchCache(curreElem.getGanNucleoid()); try { Thread.sleep(1000); } catch (InterruptedException ex) { // do nothing, yea I know its naughty... } } } else { System.out.println("About to load " + iList.size() + " influenza elements into influenzaCache"); int loopCount = 0; influenzaCache.startBatch(); for (Influenza_N_P_CR_Element x : iList) { influenzaCache.put(x.getGanNucleoid(), x); loopCount++; if ((loopCount % 5000) == 0) { System.out.println("Added " + loopCount + " Influenza records"); influenzaCache.endBatch(true); influenzaCache.startBatch(); } if (loopCount == loadLimit) { System.out.println("Limited to " + loadLimit + " records"); break; } } influenzaCache.endBatch(true); System.out.println("Loaded " + influenzaCache.size() + " influenza elements into influenzaCache"); } } if (config.getString("pfile") != null) { log.info("Parsing Protein data"); List<Nucleotide_Protein_Element> npList = npParser.parseFile(config.getString("pfile")); System.out.println("About to load " + npList.size() + " protein elements into ProteinCache"); int loopCount = 0; proteinCache.startBatch(); for (Nucleotide_Protein_Element x : npList) { proteinCache.put(x.getGenbankAccessionNumber(), x); loopCount++; if ((loopCount % 5000) == 0) { System.out.println("Added " + loopCount + " protein records"); proteinCache.endBatch(true); proteinCache.startBatch(); } if (loopCount == loadLimit) { System.out.println("Limited to " + loadLimit + " records"); break; } } proteinCache.endBatch(true); System.out.println("Loaded " + proteinCache.size() + " protein elements into ProteinCache"); } if (config.getString("nfile") != null) { log.info("Parsing Nucleotide data"); List<Nucleotide_Protein_Element> npList = npParser.parseFile(config.getString("nfile")); System.out.println("About to load " + npList.size() + " nucleotide elements into NucleiodCache"); int loopCount = 0; nucleiodCache.startBatch(); for (Nucleotide_Protein_Element x : npList) { nucleiodCache.put(x.getGenbankAccessionNumber(), x); loopCount++; if ((loopCount % 5000) == 0) { System.out.println("Added " + loopCount + " Nucleotide records"); nucleiodCache.endBatch(true); nucleiodCache.startBatch(); } if (loopCount == loadLimit) { System.out.println("Limited to " + loadLimit + " records"); break; } } nucleiodCache.endBatch(true); System.out.println("Loaded " + nucleiodCache.size() + " nucleotide elements into NucleiodCache"); } System.out.println("Parsing files....Done"); } catch (IOException e) { e.printStackTrace(); } } public void searchCache(String inGBAN) { log.trace("Searching influenzaCache for " + inGBAN); // Find the virus details Influenza_N_P_CR_Element myRec = influenzaCache.get(inGBAN); if (myRec != null) { System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); System.out.println("Virus Details->" + myRec); log.trace("Searching nucleiodCache for " + myRec.getGanNucleoid()); Nucleotide_Protein_Element nucldet = nucleiodCache.get(myRec.getGanNucleoid()); System.out.println("Nucleotide details->" + nucldet); // Display the protein details Map<String, String> myProt = myRec.getProtein_Data(); for (String x : myProt.keySet()) { System.out.println("========================================================================="); log.trace("Searching proteinCache for " + x); Nucleotide_Protein_Element myProtdet = proteinCache.get(x); System.out.println("Protein->" + myProtdet); String protein_CR = myProt.get(x); System.out.println("Protein coding region->" + protein_CR); } System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); } else { log.trace("No virus data found for " + inGBAN); System.out.println("No virus data found for " + inGBAN); } } public String cacheSizes(){ return "Protein/Influenza/Nucleotide Cache Size-->" + proteinCache.size() + "/" + influenzaCache.size() + "/" + nucleiodCache.size(); } }