/*
* Copyright (C) 2010-2011 "Bio4j"
*
* This file is part of Bio4j
*
* Bio4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package com.bio4j.neo4jdb.programs;
import com.bio4j.neo4jdb.model.nodes.AlternativeProductNode;
import com.bio4j.neo4jdb.model.nodes.SequenceCautionNode;
import com.bio4j.neo4jdb.model.relationships.aproducts.AlternativeProductInitiationRel;
import com.bio4j.neo4jdb.model.relationships.aproducts.AlternativeProductPromoterRel;
import com.bio4j.neo4jdb.model.relationships.aproducts.AlternativeProductRibosomalFrameshiftingRel;
import com.bio4j.neo4jdb.model.relationships.aproducts.AlternativeProductSplicingRel;
import com.bio4j.neo4jdb.model.relationships.protein.*;
import com.bio4j.neo4jdb.model.relationships.sc.*;
import com.bio4j.neo4jdb.model.util.Bio4jManager;
import com.ohnosequences.util.Executable;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.FileHandler;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
import org.neo4j.unsafe.batchinsert.*;
/**
* Inits Bio4j DB and stores basic/general nodes and relationships
* @author Pablo Pareja Tobes <ppareja@era7.com>
*/
public class InitBio4jDB implements Executable {
//--------indexing API constans-----
private static String PROVIDER_ST = "provider";
private static String EXACT_ST = "exact";
//private static String FULL_TEXT_ST = "fulltext";
private static String LUCENE_ST = "lucene";
private static String TYPE_ST = "type";
//-----------------------------------
private static final Logger logger = Logger.getLogger("InitBio4jDB");
private static FileHandler fh;
public static ErroneousGeneModelPredictionRel erroneousGeneModelPredictionRel = new ErroneousGeneModelPredictionRel(null);
public static ErroneousInitiationRel erroneousInitiationRel = new ErroneousInitiationRel(null);
public static ErroneousTerminationRel erroneousTerminationRel = new ErroneousTerminationRel(null);
public static ErroneousTranslationRel erroneousTranslationRel = new ErroneousTranslationRel(null);
public static FrameshiftRel frameshiftRel = new FrameshiftRel(null);
public static MiscellaneousDiscrepancyRel miscellaneousDiscrepancyRel = new MiscellaneousDiscrepancyRel(null);
public static AlternativeProductInitiationRel alternativeProductInitiationRel = new AlternativeProductInitiationRel(null);
public static AlternativeProductPromoterRel alternativeProductPromoterRel = new AlternativeProductPromoterRel(null);
public static AlternativeProductSplicingRel alternativeProductSplicingRel = new AlternativeProductSplicingRel(null);
public static AlternativeProductRibosomalFrameshiftingRel alternativeProductRibosomalFrameshiftingRel = new AlternativeProductRibosomalFrameshiftingRel(null);
@Override
public void execute(ArrayList<String> array) {
String[] args = new String[array.size()];
for (int i = 0; i < array.size(); i++) {
args[i] = array.get(i);
}
main(args);
}
public static void main(String[] args) {
if (args.length != 2) {
System.out.println("This program expects the following parameters:\n"
+ "1. Bio4j DB folder \n"
+ "2. Batch inserter .properties file name");
} else {
BatchInserter inserter = null;
BatchInserterIndexProvider indexProvider = null;
Map<String, Object> alternativeProductProperties = new HashMap<>();
Map<String, Object> sequenceCautionProperties = new HashMap<>();
long alternativeProductInitiationId;
long alternativeProductPromoterId;
long alternativeProductSplicingId;
long alternativeProductRibosomalFrameshiftingId;
long seqCautionErroneousInitiationId;
long seqCautionErroneousTranslationId;
long seqCautionFrameshiftId;
long seqCautionErroneousTerminationId;
long seqCautionMiscellaneousDiscrepancyId;
long seqCautionErroneousGeneModelPredictionId;
try {
// This block configure the logger with handler and formatter
fh = new FileHandler("InitBio4jDB.log", false);
SimpleFormatter formatter = new SimpleFormatter();
fh.setFormatter(formatter);
logger.addHandler(fh);
logger.setLevel(Level.ALL);
// create the batch inserter
inserter = BatchInserters.inserter(args[0], MapUtil.load(new File(args[1])));
// create the batch index service
indexProvider = new LuceneBatchInserterIndexProvider(inserter);
BatchInserterIndex nodeTypeIndex = indexProvider.nodeIndex(Bio4jManager.NODE_TYPE_INDEX_NAME,
MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
BatchInserterIndex mainNodesIndex = indexProvider.nodeIndex(Bio4jManager.MAIN_NODES_INDEX_NAME,
MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
//----------------------------------------------------------------------------------------------------------------
//A few relationships/nodes which
//must be initialized first
//------------------ALTERNATIVE PRODUCTS--------------------
alternativeProductProperties.put(AlternativeProductNode.NODE_TYPE_PROPERTY, AlternativeProductNode.NODE_TYPE);
alternativeProductProperties.put(AlternativeProductNode.NAME_PROPERTY, AlternativeProductInitiationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
alternativeProductInitiationId = inserter.createNode(alternativeProductProperties);
nodeTypeIndex.add(alternativeProductInitiationId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, AlternativeProductNode.NODE_TYPE));
mainNodesIndex.add(alternativeProductInitiationId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_INITIATION));
alternativeProductProperties.put(AlternativeProductNode.NAME_PROPERTY, AlternativeProductPromoterRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
alternativeProductPromoterId = inserter.createNode(alternativeProductProperties);
nodeTypeIndex.add(alternativeProductPromoterId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, AlternativeProductNode.NODE_TYPE));
mainNodesIndex.add(alternativeProductPromoterId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_PROMOTER));
alternativeProductProperties.put(AlternativeProductNode.NAME_PROPERTY, AlternativeProductSplicingRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
alternativeProductSplicingId = inserter.createNode(alternativeProductProperties);
nodeTypeIndex.add(alternativeProductSplicingId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, AlternativeProductNode.NODE_TYPE));
mainNodesIndex.add(alternativeProductSplicingId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_SPLICING));
alternativeProductProperties.put(AlternativeProductNode.NAME_PROPERTY, AlternativeProductRibosomalFrameshiftingRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
alternativeProductRibosomalFrameshiftingId = inserter.createNode(alternativeProductProperties);
nodeTypeIndex.add(alternativeProductRibosomalFrameshiftingId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, AlternativeProductNode.NODE_TYPE));
mainNodesIndex.add(alternativeProductRibosomalFrameshiftingId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_RIBOSOMAL_FRAMESHIFTING));
//---------------------SEQUENCE CAUTION------------------------
sequenceCautionProperties.put(SequenceCautionNode.NODE_TYPE_PROPERTY, SequenceCautionNode.NODE_TYPE);
sequenceCautionProperties.put(SequenceCautionNode.NAME_PROPERTY, ProteinErroneousInitiationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
seqCautionErroneousInitiationId = inserter.createNode(sequenceCautionProperties);
nodeTypeIndex.add(seqCautionErroneousInitiationId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, SequenceCautionNode.NODE_TYPE));
mainNodesIndex.add(seqCautionErroneousInitiationId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_INITIATION));
sequenceCautionProperties.put(SequenceCautionNode.NAME_PROPERTY, ProteinErroneousTranslationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
seqCautionErroneousTranslationId = inserter.createNode(sequenceCautionProperties);
nodeTypeIndex.add(seqCautionErroneousTranslationId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, SequenceCautionNode.NODE_TYPE));
mainNodesIndex.add(seqCautionErroneousTranslationId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_TRANSLATION));
sequenceCautionProperties.put(SequenceCautionNode.NAME_PROPERTY, ProteinFrameshiftRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
seqCautionFrameshiftId = inserter.createNode(sequenceCautionProperties);
nodeTypeIndex.add(seqCautionFrameshiftId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, SequenceCautionNode.NODE_TYPE));
mainNodesIndex.add(seqCautionFrameshiftId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_FRAMESHIFT));
sequenceCautionProperties.put(SequenceCautionNode.NAME_PROPERTY, ProteinErroneousTerminationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
seqCautionErroneousTerminationId = inserter.createNode(sequenceCautionProperties);
nodeTypeIndex.add(seqCautionErroneousTerminationId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, SequenceCautionNode.NODE_TYPE));
mainNodesIndex.add(seqCautionErroneousTerminationId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_TERMINATION));
sequenceCautionProperties.put(SequenceCautionNode.NAME_PROPERTY, ProteinMiscellaneousDiscrepancyRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
seqCautionMiscellaneousDiscrepancyId = inserter.createNode(sequenceCautionProperties);
nodeTypeIndex.add(seqCautionMiscellaneousDiscrepancyId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, SequenceCautionNode.NODE_TYPE));
mainNodesIndex.add(seqCautionMiscellaneousDiscrepancyId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_MISCELLANEOUS_DISCREPANCY));
sequenceCautionProperties.put(SequenceCautionNode.NAME_PROPERTY, ProteinErroneousGeneModelPredictionRel.UNIPROT_ATTRIBUTE_TYPE_VALUE);
seqCautionErroneousGeneModelPredictionId = inserter.createNode(sequenceCautionProperties);
nodeTypeIndex.add(seqCautionErroneousGeneModelPredictionId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, SequenceCautionNode.NODE_TYPE));
mainNodesIndex.add(seqCautionErroneousGeneModelPredictionId, MapUtil.map(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_GENE_MODEL_PREDICTION));
//---------------------------------------------------------------------------------------------------------------
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage());
StackTraceElement[] trace = e.getStackTrace();
for (StackTraceElement stackTraceElement : trace) {
logger.log(Level.SEVERE, stackTraceElement.toString());
}
}finally {
try {
// shutdown, makes sure all changes are written to disk
indexProvider.shutdown();
inserter.shutdown();
//closing logger file handler
fh.close();
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage());
StackTraceElement[] trace = e.getStackTrace();
for (StackTraceElement stackTraceElement : trace) {
logger.log(Level.SEVERE, stackTraceElement.toString());
}
//closing logger file handler
fh.close();
}
}
}
}
}