/** * CopyRight by Chinamobile * * HashWritePartition.java */ package com.chinamobile.bcbsp.partition; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import com.chinamobile.bcbsp.api.Vertex; import com.chinamobile.bcbsp.io.RecordReader; import com.chinamobile.bcbsp.util.ThreadPool; import com.chinamobile.bcbsp.util.ThreadSignle; import com.chinamobile.bcbsp.api.Partitioner; import com.chinamobile.bcbsp.bspstaff.*; import com.chinamobile.bcbsp.bspstaff.BSPStaff.WorkerAgentForStaffInterface; /** * HashWritePartition * * Implements hash-based partition method.The user must provide a no-argument constructor. * * @author * @version */ public class HashWritePartition extends WritePartition { public static final Log LOG = LogFactory.getLog(HashWritePartition.class); public HashWritePartition() { } public HashWritePartition(WorkerAgentForStaffInterface workerAgent, BSPStaff staff, Partitioner<Text> partitioner) { this.workerAgent = workerAgent; this.staff = staff; this.partitioner = partitioner; } /** * This method is used to partition graph vertexes. Writing Each vertex to * the corresponding partition. In this method calls recordParse method to * create an HeadNode object. The last call partitioner's getPartitionId * method to calculate the HeadNode belongs to partition's id. If the * HeadNode belongs local partition then written to the local partition or * send it to the appropriate partition. * * @param recordReader * @throws IOException * @throws InterruptedException */ @SuppressWarnings("unchecked") @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int bufferSize = ( int ) ((this.TotalCacheSize * 1024 * 1024) / (this.staff .getStaffNum() + this.sendThreadNum)); byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize]; int bufindex[] = new int[this.staff.getStaffNum()]; BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); try { this.keyserializer.open(bb); this.valueserializer.open(bb); } catch (IOException e) { throw e; } try { while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); //LOG.info("KEY:" + key.toString() + "\tVALUE:" + value.toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (pid == this.staff.getPartition()) { local++; Vertex vertex = this.recordParse.recordParse( key.toString(), value.toString()); if (vertex == null){ lost++; continue; } staff.getGraphData().addForAll(vertex); } else { send++; bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) { // There is enough space System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } else if (buffer[pid].length < (ksize + vsize)) { // Super record. Record size is greater than the // capacity of // the buffer. So sent directly. ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker( staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); byte[] tmp = new byte[vsize + ksize]; System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize); System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize); data.set(tmp, 0, (ksize + vsize)); t.setData(data); tmp = null; LOG.info("Using Thread is: " + t.getThreadNumber()); LOG.info("this is a super record"); t.setStatus(true); } else {// Not enough space ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker( staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); data.set(buffer[pid], 0, bufindex[pid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); bufindex[pid] = 0; // store data System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } } } for (int i = 0; i < this.staff.getStaffNum(); i++) { if (bufindex[i] != 0) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(buffer[i], 0, bufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } tpool.cleanup(); tpool = null; buffer = null; bufindex = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be parsed:"+lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } } }