package org.arong.egdownloader.spider; /** * 专注互联网,分享创造价值 * maoxiang@gmail.com * 2010-3-30下午04:40:06 */ import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.Proxy; import java.net.SocketAddress; import java.net.URL; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * 一个多线程支持断点续传的工具类<br/> * 2010-03 用Htpp Component重写 */ public class HttpDownloader { private final Log log = LogFactory.getLog(getClass().getName()); private int threads = 5; // 总共的线程数 private int maxThreads = 10; // 最大的线程数 private String destUrl; // 目标的URL private String savePath; // 保存的路径 private File lockFile;// 用来保存进度的文件 private String userAgent = "jHttpDownload"; private boolean useProxy = false; private String proxyServer; private int proxyPort; private String proxyUser; private String proxyPassword; private int blockSize = 1024 * 4; // 4K 一个块 // 1个位代表一个块,,用来标记是否下载完成 private byte[] blockSet; private int blockPage; // 每个线程负责的大小 private int blocks; private boolean running; // 是否运行中,避免线程不能释放 // =======下载进度信息 private long beginTime; private AtomicLong downloaded = new AtomicLong(0); // 已下载的字节数/ private long fileLength; // 总的字节数 // 监控线程,用来保存进度和汇报进度 private MonitorThread monitorThread = new MonitorThread(); public HttpDownloader(String destUrl, String savePath, int threads) { this.threads = threads; this.destUrl = destUrl; this.savePath = savePath; } public HttpDownloader(String destUrl, String savePath) { this(destUrl, savePath, 5); } /** * 开始下载 */ public boolean download() { log.info("下载文件" + destUrl + ",保存路径=" + savePath); beginTime = System.currentTimeMillis(); boolean ok = false; try { File saveFile = new File(savePath); lockFile = new File(savePath + ".lck"); if (lockFile.exists() && !lockFile.canWrite()) { throw new Exception("文件被锁住,或许已经在下载中了"); } File parent = saveFile.getParentFile(); if (!parent.exists()) { log.info("创建目录=" + parent.getAbsolutePath()); } if (!parent.canWrite()) { throw new Exception("保存目录不可写"); } if (saveFile.exists()) { if (!saveFile.canWrite()) { throw new Exception("保存文件不可写,无法继续下载"); } log.info("检查之前下载的文件"); if (lockFile.exists()) { log.info("加载之前下载进度"); loadPrevious(); } } else { lockFile.createNewFile(); } // 1初始化httpClient HttpURLConnection httpUrlConnection = getHttpConnection(0); String contentLength = httpUrlConnection .getHeaderField("Content-Length"); if (contentLength != null) { try { fileLength = Long.parseLong(contentLength); } catch (Exception e) { } } log.info("下载文件的大小:" + fileLength); if (fileLength <= 0) { // 不支持多线程下载,采用单线程下载 log.info("服务器不能返回文件大小,采用单线程下载"); threads = 1; } if (httpUrlConnection.getHeaderField("Content-Range") == null) { log.info("服务器不支持断点续传"); threads = 1; } else { log.info("服务器支持断点续传"); } // if (fileLength > 0 && parent.getFreeSpace() < fileLength) { throw new Exception("磁盘空间不够"); } if (fileLength > 0) { int i = (int) (fileLength / blockSize); if (fileLength % blockSize > 0) { i++; } blocks = i; } else { // 一个块 blocks = 1; } if (blockSet != null) { log.info("检查文件,是否能够续传"); if (blockSet.length != fileLength / (8l * blockSize)) { log.info("文件大小已改变,需要重新下载"); blockSet = null; } } if (blockSet == null) { blockSet = BitUtil.createBit(blocks); } log.info("文件的块数:" + blocks + "," + blockSet.length); if (threads > maxThreads) { log.info("超过最大线程数,使用最大线程数"); threads = maxThreads; } blockPage = blocks / threads; // 每个线程负责的块数 log.info("分配线程。线程数量=" + threads + ",块总数=" + blocks + ",总字节数=" + fileLength + ",每块大小=" + blockSize + ",块/线程=" + blockPage); // 检查 running = true; // 创建一个线程组,方便观察和调试 ThreadGroup downloadGroup = new ThreadGroup("download"); for (int i = 0; i < threads; i++) { int begin = i * blockPage; int end = (i + 1) * blockPage; if (i == threads - 1 && blocks % threads > 0) { // 如果最后一个线程,有余数,需要修正 end = blocks; } // 扫描每个线程的块是否有需要下载的 boolean needDownload = false; for (int j = begin; j < end; j++) { if (!BitUtil.getBit(blockSet, j)) { needDownload = true; break; } } if (!needDownload) { log.info("所有块已经下载完毕.Begin=" + begin + ",End=" + end); } // 启动下载其他线程 DownloadThread downloadThread = new DownloadThread( downloadGroup, i, begin, end); downloadThread.start(); } monitorThread.setStop(false); monitorThread.start(); // 也可以用Thread.join 实现等待进程完成 while (downloadGroup.activeCount() > 0) { Thread.sleep(2000); } ok = true; } catch (Exception e) { e.printStackTrace(); log.error(e); } finally { // closeHttpClient(); } if (ok) { log.info("删除进度文件:" + lockFile.getAbsolutePath()); lockFile.delete(); } monitorThread.setStop(true); log.info("下载完成,耗时:" + getTime((System.currentTimeMillis() - beginTime) / 1000)); return ok; } private void loadPrevious() throws Exception { ByteArrayOutputStream outStream = new ByteArrayOutputStream(); FileInputStream inStream = new FileInputStream(lockFile); byte[] buffer = new byte[4096]; int n = 0; while (-1 != (n = inStream.read(buffer))) { outStream.write(buffer, 0, n); } outStream.close(); inStream.close(); blockSet = outStream.toByteArray(); log.info("之前的文件大小应该是:" + blockSet.length * 8l * blockSize + ",一共有:" + blockSet.length + "块"); } private HttpURLConnection httpConnection0; // 用来快速返回,减少一次连接 private HttpURLConnection getHttpConnection(long pos) throws IOException { if (pos == 0 && httpConnection0 != null) { return httpConnection0; } URL url = new URL(destUrl); log.debug("开始一个Http请求连接。Url=" + url + "定位:" + pos + "/n"); // 默认的会处理302请求 HttpURLConnection.setFollowRedirects(false); HttpURLConnection httpConnection = null; if (useProxy) { log.debug("使用代理进行连接.ProxyServer=" + proxyServer + ",ProxyPort=" + proxyPort); SocketAddress addr = new InetSocketAddress(proxyServer, proxyPort); Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); httpConnection = (HttpURLConnection) url.openConnection(proxy); if (proxyUser != null && proxyPassword != null) { String encoded = new String(Base64.encodeBase64(new String( proxyUser + ":" + proxyPassword).getBytes())); httpConnection.setRequestProperty("Proxy-Authorization", "Basic " + encoded); } } else { httpConnection = (HttpURLConnection) url.openConnection(); } httpConnection.setRequestProperty("User-Agent", userAgent); httpConnection.setRequestProperty("RANGE", "bytes=" + pos + "-"); int responseCode = httpConnection.getResponseCode(); log.debug("服务器返回:" + responseCode); Map<String, List<String>> headers = httpConnection.getHeaderFields(); Iterator<String> iterator = headers.keySet().iterator(); while (iterator.hasNext()) { String key = iterator.next(); String value = ""; for (String v : headers.get(key)) { value = ";" + v; } log.debug(key + "=" + value); } if (responseCode < 200 || responseCode >= 400) { throw new IOException("服务器返回无效信息:" + responseCode); } if (pos == 0) { httpConnection0 = httpConnection; } return httpConnection; } public void returnConnection(HttpURLConnection connecton) { if (connecton != null) connecton.disconnect(); connecton = null; } private String getDesc() { long downloadBytes = downloaded.longValue(); return String .format( "已下载/总大小=%s/%s(%s),速度:%s,耗时:%s,剩余大小:%d", getFileSize(downloadBytes), getFileSize(fileLength), getProgress(fileLength, downloadBytes), getFileSize(downloadBytes / ((System.currentTimeMillis() - beginTime) / 1000 + 1)), getTime((System.currentTimeMillis() - beginTime) / 1000), fileLength - downloadBytes); } private String getFileSize(long totals) { // 计算文件大小 int i = 0; String j = "BKMGT"; float s = totals; while (s > 1024) { s /= 1024; i++; } return String.format("%.2f", s) + j.charAt(i); } private String getProgress(long totals, long read) { if (totals == 0) return "0%"; return String.format("%d", read * 100 / totals) + "%"; } private String getTime(long seconds) { int i = 0; String j = "秒分时天"; long s = seconds; String result = ""; while (s > 0) { if (s % 60 > 0) { result = String.valueOf(s % 60) + (char) j.charAt(i) + result; } s /= 60; i++; } return result; } /** * 一个下载线程. */ private class DownloadThread extends Thread { private RandomAccessFile destFile; // 用来实现保存的随机文件 private int id = 0; private int blockBegin = 0; // 开始块 private int blockEnd = 0; // 结束块 private long pos;// 绝对指针 private String getThreadName() { return "DownloadThread-" + id + "=>"; } public DownloadThread(ThreadGroup group, int id, int blockBegin, int blockEnd) throws Exception { super(group, "downloadThread-" + id); this.id = id; this.blockBegin = blockBegin; this.blockEnd = blockEnd; this.pos = 1l * blockBegin * blockSize; // 转换为长整型 destFile = new RandomAccessFile(savePath, "rw"); } public void run() { BufferedInputStream inputStream = null; try { log.info(getThreadName() + "下载线程." + this.toString()); log.info(getThreadName() + ":定位文件位置.Pos=" + 1l * blockBegin * blockSize); destFile.seek(1l * blockBegin * blockSize); log.info(getThreadName() + ":开始下载.[ " + blockBegin + " - " + blockEnd + "]"); HttpURLConnection httpConnection = getHttpConnection(pos); inputStream = new BufferedInputStream(httpConnection .getInputStream()); byte[] b = new byte[blockSize]; while (blockBegin < blockEnd) { if (!running) { log.info(getThreadName() + ":停止下载.当前块:" + blockBegin); return; } log.debug(getThreadName() + "下载块=" + blockBegin); int counts = 0; // 已下载字节数 if (BitUtil.getBit(blockSet, blockBegin)) { log.debug(getThreadName() + ":块下载已经完成=" + blockBegin); destFile.skipBytes(blockSize); int skips = 0; while (skips < blockSize) { skips += inputStream.skip(blockSize - skips); } downloaded.addAndGet(blockSize); } else { while (counts < blockSize) { int read = inputStream.read(b, 0, blockSize - counts); if (read < 0) break; counts += read; destFile.write(b, 0, read); downloaded.addAndGet(read); } BitUtil.setBit(blockSet, blockBegin, true); // 标记已经下载完成 } blockBegin++; } httpConnection.disconnect(); log.info(getThreadName() + "下载完成."); return; } catch (Exception e) { log.error(getThreadName() + "下载错误:" + e.getMessage()); e.printStackTrace(); } finally { try { if (inputStream != null) inputStream.close(); } catch (Exception te) { log.error(te); } try { if (destFile != null) destFile.close(); } catch (Exception te) { log.error(te); } } } } // 监控线程,并保存进度,方便下次断点续传 private class MonitorThread extends Thread { boolean stop = false; public void setStop(boolean stop) { this.stop = stop; } public void run() { FileOutputStream saveStream = null; try { while (running && !stop) { saveStream = new FileOutputStream(lockFile); log.info(getDesc()); // 保存进度 saveStream.write(blockSet, 0, blockSet.length); sleep(1000); saveStream.close(); } } catch (Exception e) { e.printStackTrace(); log.error(e); } finally { } } } // 用来操作位的工具 private static class BitUtil { public static byte[] createBit(int len) { int size = len / Byte.SIZE; if (len % Byte.SIZE > 0) { size++; } return new byte[size]; } /** 取出某位,是0 还是1 */ public static boolean getBit(byte[] bits, int pos) { int i = pos / Byte.SIZE; int b = bits[i]; int j = pos % Byte.SIZE; byte c = (byte) (0x80 >>> (j - 1)); return b == c; } /** 设置某位,是0 还是1 */ public static void setBit(byte[] bits, int pos, boolean flag) { int i = pos / Byte.SIZE; byte b = bits[i]; int j = pos % Byte.SIZE; byte c = (byte) (0x80 >>> (j - 1)); if (flag) { bits[i] = (byte) (b | c); } else { c = (byte) (0xFF ^ c); bits[i] = (byte) (b & c); } } } }