package com.bao.lc.site.sx;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpResponse;
import org.apache.http.params.HttpProtocolParams;
import com.bao.lc.AppConfig;
import com.bao.lc.client.BrowserClient;
import com.bao.lc.client.utils.HttpClientUtils;
import com.bao.lc.common.exception.ParseException;
import com.bao.lc.util.AppUtils;
import com.bao.lc.util.MiscUtils;
public class CnetDownload
{
private static Log log = LogFactory.getLog(CnetDownload.class);
private String url;
private BrowserClient session;
public CnetDownload(String url)
{
this.url = url;
session = new BrowserClient();
HttpProtocolParams.setUserAgent(session.getParams(), BrowserClient.AGENT_CHROME);
}
public int download()
{
int rc = 0;
HttpResponse rsp = null;
String content = null;
String nextUrl = null;
try
{
// 1. Get url user page content
rsp = session.execute(url);
content = HttpClientUtils.saveToString(rsp.getEntity(), "UTF-8");
nextUrl = extractDirectLink(content);
// 2. Get direct download link page
rsp = session.execute(nextUrl);
content = HttpClientUtils.saveToString(rsp.getEntity(), "UTF-8");
nextUrl = extraceSrcFileURL(content);
log.info(nextUrl);
// 3. Download file
rsp = session.execute(nextUrl);
String fileName = extraceFileName(nextUrl);
HttpClientUtils.saveToFile(rsp.getEntity(), AppUtils.getOutputFilePath(fileName));
}
catch(Exception e)
{
log.error(e.toString(), e);
rc = -1;
}
finally
{
session.getConnectionManager().shutdown();
}
return rc;
}
private String extractDirectLink(String content) throws ParseException
{
String regex = "<div class=\"dlLinkWrapper(.*?)\"> <a href=\"(.+?)\" (.*?)>Direct Download Link</a>";
List<String> valueList = new ArrayList<String>();
int matchCount = MiscUtils.getRegexValue(content, regex, valueList, true, 0);
if(matchCount < 1)
{
throw new ParseException("Failed to find direct download link. matchCount = " + matchCount);
}
for(int i = 0; i < matchCount; i++)
{
String dlUrl = valueList.get(i * 3 + 2);
String id = valueList.get(i * 3 + 3);
if(id.contains("loggedInUserDlLink"))
{
return dlUrl;
}
}
valueList.remove(0);
log.error("Failed to find Direct Download Link. Match Result: " + valueList);
return null;
}
private String extraceSrcFileURL(String content) throws ParseException
{
String regex = "src:'(.+)'";
String url = MiscUtils.getRegexValueOnce(content, regex, 1);
return url;
}
private String extraceFileName(String url)
{
int beginIndex = url.lastIndexOf('/');
if(beginIndex == -1)
{
return null;
}
int endIndex = url.indexOf('?', beginIndex + 1);
if(endIndex == -1)
{
return null;
}
return url.substring(beginIndex + 1, endIndex);
}
public static void grab(String url, int count, int interval)
{
Random rand = new Random(System.currentTimeMillis());
int nSucc = 0, nFail = 0;
int rc = 0;
for(int i = 0; i < count; i++)
{
CnetDownload grabber = new CnetDownload(url);
rc = grabber.download();
if(rc == 0)
{
nSucc++;
}
else
{
nFail++;
}
//Sleep if needed
if(i < (count - 1) && interval != 0)
{
MiscUtils.sleep(interval, rand);
}
}
String result = String.format(
"Result of grab:\n\turl=[%s]\n\tcount=[%d]\n\tinterval=[%d]\n\n\tOK=[%02d], Fail=[%02d]\n",
url, count, interval, nSucc, nFail);
log.info(result);
}
@SuppressWarnings("unused")
private void unitTest()
{
try
{
String content = IOUtils.toString(new FileInputStream("cnet.html"), "UTF-8");
String url = extractDirectLink(content);
log.info(url);
}
catch(Exception e)
{
log.error(e.toString(), e);
}
}
/**
* @param args
*/
public static void main(String[] args)
{
String url = AppConfig.getInstance().getPropInput("cnet.dl.url");
int count = MiscUtils.toInt(AppConfig.getInstance().getPropInput("cnet.dl.count"));
int interval = MiscUtils.toInt(AppConfig.getInstance().getPropInput("cnet.dl.interval"));
String value = null;
for(int i = 0; args != null && i < args.length; i++)
{
if(args[i].startsWith("-url="))
{
value = args[i].substring("-url=".length());
url = value;
}
else if(args[i].startsWith("-count="))
{
value = args[i].substring("-count=".length());
count = Integer.parseInt(value);
}
else if(args[i].startsWith("-interval="))
{
value = args[i].substring("-interval=".length());
interval = Integer.parseInt(value);
}
}
log.info(String.format("url=%s, count=%d, interval=%d", url, count, interval));
grab(url, count, interval);
}
}