/** * Created by zhenhong.gzh on 16/7/13. */ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import com.aliyun.odps.Odps; import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; import com.aliyun.odps.data.RecordReader; import com.aliyun.odps.tunnel.TableTunnel; import com.aliyun.odps.tunnel.TableTunnel.DownloadSession; import com.aliyun.odps.tunnel.TunnelException; /** * use thread pool to download data */ public class TunnelMultiThreadDownloadSample { private static String accessId = "<your access id>"; private static String accessKey = "<your access Key>"; private static String odpsUrl = "<your odps endpoint>"; private static String project = "<your project>"; private static String table = "<your table name>"; // partitions of a partitioned table, eg: "pt=\'1\',ds=\'2\'" // if the table is not a partitioned table, do not need it private static String partition = "<your partition spec>"; // thread number private static int threadNum = 10; public static void main(String args[]) { Account account = new AliyunAccount(accessId, accessKey); Odps odps = new Odps(account); odps.setEndpoint(odpsUrl); odps.setDefaultProject(project); TableTunnel tunnel = new TableTunnel(odps); // if the table is not a partitioned table, do not need it PartitionSpec partitionSpec = new PartitionSpec(partition); DownloadSession downloadSession; try { // create download session for thread pool downloadSession = tunnel.createDownloadSession(project, table, partitionSpec); System.out.println("Session Status is : " + downloadSession.getStatus().toString()); long count = downloadSession.getRecordCount(); System.out.println("RecordCount is: " + count); // create thread pool ExecutorService pool = Executors.newFixedThreadPool(threadNum); ArrayList<Callable<Long>> callers = new ArrayList<Callable<Long>>(); // split total count long step = count / threadNum; for (int i = 0; i < threadNum - 1; i++) { // open record reader with specific record count for each thread // read count is step RecordReader recordReader = downloadSession.openRecordReader( step * i, step); callers.add(new DownloadThread( i, recordReader, downloadSession.getSchema())); } // the last thread, read count is (count- ((threadNum - 1) * step) RecordReader recordReader = downloadSession.openRecordReader(step * (threadNum - 1), count - ((threadNum - 1) * step)); callers.add(new DownloadThread( threadNum - 1, recordReader, downloadSession.getSchema())); Long downloadNum = 0L; // invoke the thread pool to download record List<Future<Long>> recordNum = pool.invokeAll(callers); for (Future<Long> num : recordNum) downloadNum += num.get(); System.out.println("Record Count is: " + downloadNum); pool.shutdown(); } catch (TunnelException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ExecutionException e) { e.printStackTrace(); } } }