/** * Software License, Version 1.0 * * Copyright 2003 The Trustees of Indiana University. All rights reserved. * * *Redistribution and use in source and binary forms, with or without *modification, are permitted provided that the following conditions are met: * *1) All redistributions of source code must retain the above copyright notice, * the list of authors in the original source code, this list of conditions and * the disclaimer listed in this license; *2) All redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the disclaimer listed in this license in * the documentation and/or other materials provided with the distribution; *3) Any documentation included with all redistributions must include the * following acknowledgement: * *"This product includes software developed by the Community Grids Lab. For * further information contact the Community Grids Lab at * http://communitygrids.iu.edu/." * * Alternatively, this acknowledgement may appear in the software itself, and * wherever such third-party acknowledgments normally appear. * *4) The name Indiana University or Community Grids Lab or NaradaBrokering, * shall not be used to endorse or promote products derived from this software * without prior written permission from Indiana University. For written * permission, please contact the Advanced Research and Technology Institute * ("ARTI") at 351 West 10th Street, Indianapolis, Indiana 46202. *5) Products derived from this software may not be called NaradaBrokering, * nor may Indiana University or Community Grids Lab or NaradaBrokering appear * in their name, without prior written permission of ARTI. * * * Indiana University provides no reassurances that the source code provided * does not infringe the patent or any other intellectual property rights of * any other entity. Indiana University disclaims any liability to any * recipient for claims brought by any other entity based on infringement of * intellectual property rights or otherwise. * *LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH NO *WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA UNIVERSITY GIVES *NO WARRANTIES AND MAKES NO REPRESENTATION THAT SOFTWARE IS FREE OF *INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR OTHER PROPRIETARY RIGHTS. *INDIANA UNIVERSITY MAKES NO WARRANTIES THAT SOFTWARE IS FREE FROM "BUGS", *"VIRUSES", "TROJAN HORSES", "TRAP DOORS", "WORMS", OR OTHER HARMFUL CODE. *LICENSEE ASSUMES THE ENTIRE RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR *ASSOCIATED MATERIALS, AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION *GENERATED USING SOFTWARE. */ package edu.indiana.soic.ts.mapreduce.pwd; import java.io.IOException; import java.util.HashMap; import java.util.List; import edu.indiana.soic.ts.dist.DistanceFunction; import edu.indiana.soic.ts.utils.Utils; import edu.indiana.soic.ts.utils.VectorPoint; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Mapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SWGMap extends Mapper<LongWritable, Text, LongWritable, SWGWritable> { private static final Logger LOG = LoggerFactory.getLogger(SWGMap.class); private long blockSize; private long noOfSequences; private long noOfDivisions; private DistanceFunction distFunc; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000); this.noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10); this.noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize); String distFuncName = conf.get(Constants.DIST_FUNC); this.distFunc = (DistanceFunction) Utils.loadObject(distFuncName); this.distFunc.prepare(new HashMap<>()); } public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException { long startTime = System.nanoTime(); Configuration conf = context.getConfiguration(); Counter alignmentCounter = context .getCounter(Constants.RecordCounters.ALIGNMENTS); String valString = value.toString(); String valArgs[] = valString.split(Constants.BREAK); long rowBlock = Long.parseLong(valArgs[0]); long columnBlock = Long.parseLong(valArgs[1]); boolean isDiagonal = Boolean.parseBoolean(valArgs[2]); LOG.info("row column" + rowBlock + " " + columnBlock + " " + isDiagonal + " " + valArgs[2]); long row = rowBlock * blockSize; long column = columnBlock * blockSize; long parseStartTime = System.nanoTime(); FileSystem fs = FileSystem.getLocal(conf); // parse the inputFilePart for row Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock); FSDataInputStream rowInStream = fs.open(rowPath); List<VectorPoint> rowSequences = SequenceParser.ParseFile(rowInStream); // parse the inputFilePart for column if this is not a diagonal block List<VectorPoint> colSequences; if (isDiagonal) { colSequences = rowSequences; } else { // parse the inputFilePart for column Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock); FSDataInputStream colInStream = fs.open(colPath); colSequences = SequenceParser.ParseFile(colInStream); } LOG.info("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms"); short[][] alignments = new short[(int) blockSize][(int) blockSize]; double [][]doubleDistances = new double[(int)blockSize][(int)blockSize]; double max = Double.MIN_VALUE; for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) { int columnIndex = 0; for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) { double alignment; alignment = distFunc.calc(rowSequences.get(rowIndex), colSequences.get(columnIndex)); if (alignment > max) { max = alignment; } // Get the identity and make it percent identity doubleDistances[rowIndex][columnIndex] = alignment; } alignmentCounter.increment(columnIndex); } // divide by max to get the range to 0 to 1 and then convert to short and output for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) { int columnIndex = 0; for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) { double alignment = doubleDistances[rowIndex][columnIndex] / max; short scaledScore = (short) (alignment * Short.MAX_VALUE); alignments[rowIndex][columnIndex] = scaledScore; } } SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false); dataWritable.setMax(max); dataWritable.setAlignments(alignments); context.write(new LongWritable(rowBlock), dataWritable); if (!isDiagonal) { // Create the transpose matrix of (rowBlock, colBlock) block to fill the // (colBlock, rowBlock) block. SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true); inverseDataWritable.setAlignments(alignments); context.write(new LongWritable(columnBlock), inverseDataWritable); } LOG.info("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms"); } }