/*
* Copyright (C) 2014 Davide Pastore
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package neembuu.release1.externalImpl.linkhandler;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import neembuu.release1.api.file.OnlineFile;
import neembuu.release1.api.file.PropertyProvider;
import neembuu.release1.api.linkhandler.LinkHandler;
import neembuu.release1.api.linkhandler.LinkHandlerProvider;
import neembuu.release1.api.linkhandler.TrialLinkHandler;
import neembuu.release1.captcha.Captcha;
import neembuu.release1.defaultImpl.file.BasicOnlineFile;
import neembuu.release1.defaultImpl.file.BasicPropertyProvider;
import neembuu.release1.httpclient.NHttpClient;
import neembuu.release1.httpclient.utils.NHttpClientUtils;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import davidepastore.StringUtils;
import java.net.URLEncoder;
import neembuu.release1.api.log.LoggerUtil;
import neembuu.release1.defaultImpl.external.ELHProvider;
import neembuu.release1.defaultImpl.linkhandler.BasicLinkHandler;
import neembuu.release1.defaultImpl.linkhandler.Utils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
*
* @author davidepastore
*/
@ELHProvider(checkingRegex = YoutubeLinkHandlerProvider.REG_EXP)
public class YoutubeLinkHandlerProvider implements LinkHandlerProvider {
private static final Logger LOGGER = LoggerUtil.getLogger(YoutubeLinkHandlerProvider.class.getName()); // all logs go into an html file
private final String K_CHALLENGE_URL = "https://www.google.com/recaptcha/api/challenge?k=";
private final String K_CHALLENGE_CODE = "6LcVessSAAAAAH73irTtpZYKknjeBvN3nuUzJ2G3";
static final String REG_EXP = "https?://(www.youtube.com/watch\\?(feature=player_embedded&)?v=|youtu.be/)([\\w\\-\\_]*)(&(amp;)?[\\w\\?=((\\w)|(\\W))]*)?";
@Override
public TrialLinkHandler tryHandling(final String url) {
return new YT_TLH(url);
}
@Override
public LinkHandler getLinkHandler(TrialLinkHandler tlh) throws Exception {
if( !(tlh instanceof YT_TLH) || !tlh.canHandle()){return null;}
BasicLinkHandler.Builder linkHandlerBuilder = clipConverterExtraction(tlh); //linkYoutubeExtraction(tlh);
return linkHandlerBuilder.build();
}
private void xzz(){}
/**
* Grab the title.
* @param text
* @param url
*/
private void grabTitle(String text, String url) {
String grabbedTitle; /*= text.replaceFirst("(.*)<meta name=\"title\" content=", "").trim();
// change html characters to their UTF8 counterpart
grabbedTitle = (grabbedTitle);
grabbedTitle = grabbedTitle.replaceFirst("^\"", "").replaceFirst("\">$", "");
// http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx
//
grabbedTitle = grabbedTitle.replaceAll("<", "");
grabbedTitle = grabbedTitle.replaceAll(">", "");
grabbedTitle = grabbedTitle.replaceAll(":", "");
grabbedTitle = grabbedTitle.replaceAll("/", " ");
grabbedTitle = grabbedTitle.replaceAll("\\\\", " ");
grabbedTitle = grabbedTitle.replaceAll("|", "");
grabbedTitle = grabbedTitle.replaceAll("\\?", "");
grabbedTitle = grabbedTitle.replaceAll("\\*", "");
grabbedTitle = grabbedTitle.replaceAll("/", " ");
grabbedTitle = grabbedTitle.replaceAll("\"", " ");
grabbedTitle = grabbedTitle.replaceAll("%", "");
*/
grabbedTitle = StringUtils.stringBetweenTwoStrings(text, "<title>", " - YouTube");
String contentType = NHttpClientUtils.getContentType(url,NHttpClient.getNewInstance());
if(contentType.equals("video/webm")){
grabbedTitle += ".webm";
}
LOGGER.log(Level.INFO,"Title: " + grabbedTitle);
throw new IllegalStateException("Legacy code");
//this.filename = grabbedTitle; // complete file name without path
}
/**
* Find text data.
* @param text the text.
* @return a list of the urls.
*/
private ArrayList<String> findTextData(String text){
ArrayList<String> finalUrls = new ArrayList<String>();
try {
String encodedUrl = StringUtils.stringBetweenTwoStrings(text, "\"url_encoded_fmt_stream_map\": \"", "\"");
LOGGER.log(Level.INFO, "encoded url: {0}", encodedUrl);
LOGGER.log(Level.INFO,"encoded url: " + encodedUrl);
encodedUrl = encodedUrl.replaceFirst("\".*", "");
encodedUrl = encodedUrl.replaceFirst("\".*", "");
encodedUrl = encodedUrl.replace("%25","%");
encodedUrl = encodedUrl.replace("\\u0026", "&");
encodedUrl = encodedUrl.replace("\\", "");
String[] urls = encodedUrl.split(",");
for(int i = 0; i< urls.length; i++){
String[] fmtUrlPair = urls[i].split("url=http", 2);
fmtUrlPair[1] = "url=http"+fmtUrlPair[1]+"&"+fmtUrlPair[0];
fmtUrlPair[0] = fmtUrlPair[1].substring(fmtUrlPair[1].indexOf("itag=")+5, fmtUrlPair[1].indexOf("itag=")+5+1+(fmtUrlPair[1].matches(".*itag=[0-9]{2}.*")?1:0)+(fmtUrlPair[1].matches(".*itag=[0-9]{3}.*")?1:0));
fmtUrlPair[1] = fmtUrlPair[1].replaceFirst("url=http%3A%2F%2F", "http://");
fmtUrlPair[1] = fmtUrlPair[1].replaceAll("%3F","?").replaceAll("%2F", "/").replaceAll("%3B",";").replaceAll("%2C",",").replaceAll("%3D","=").replaceAll("%26", "&").replaceAll("%252C", "%2C").replaceAll("sig=", "signature=").replaceAll("&s=", "&signature=").replaceAll("\\?s=", "?signature=");
// remove duplicated &itag=xy
if (StringUtils.countString(fmtUrlPair[1], "itag=") == 2){
// LOGGER.log(Level.INFO,"Deleting itag!");
fmtUrlPair[1] = fmtUrlPair[1].replaceFirst("itag=[0-9]{1,3}", "");
}
//LOGGER.log(Level.INFO,"url[" + i + "]: " + urls[i]);
LOGGER.log(Level.INFO, "fmtUrlPair[1]: {0}\nfmtUrlPair[0]: {1}", new Object[]{fmtUrlPair[1], fmtUrlPair[0]});
// LOGGER.log(Level.INFO,"fmtUrlPair[1]: "+ fmtUrlPair[1] +"\nfmtUrlPair[0]: " + fmtUrlPair[0]);
finalUrls.add(fmtUrlPair[1]);
}
} catch (Exception ex) {
ex.printStackTrace();
}
printUrls(finalUrls);
//Setting filename
grabTitle(text, finalUrls.get(0)); //the first quality
return finalUrls;
}
/**
* Use http://www.linkyoutube.com service to get the urls.
* @param url the youtube url.
* @return A BasicLinkHandler.Builder with all the urls found for this video.
*/
private BasicLinkHandler.Builder linkYoutubeExtraction(TrialLinkHandler tlh)throws Exception{
return linkYoutubeExtraction(tlh, 0);
}
private BasicLinkHandler.Builder linkYoutubeExtraction(TrialLinkHandler tlh, int retryCount)throws Exception{
String url = tlh.getReferenceLinkString();
BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create();
try {
DefaultHttpClient httpClient = NHttpClient.getNewInstance();
String requestUrl = "http://www.linkyoutube.com/watch/index.php?video=" + URLEncoder.encode(url, "UTF-8");
final String responseString = NHttpClientUtils.getData(requestUrl, httpClient);
//Set the group name as the name of the video
String nameOfVideo = getVideoName(url);
String fileName = "text";
linkHandlerBuilder.setGroupName(nameOfVideo);
long c_duration = -1;
Document doc = Jsoup.parse(responseString);
Elements elements = doc.select("#download_links a");
for (Element element : elements) {
String singleUrl = element.attr("href");
fileName = element.text();
if(!singleUrl.equals("#")){
long length = NHttpClientUtils.calculateLength(singleUrl, httpClient);
singleUrl = Utils.normalize(singleUrl);
LOGGER.log(Level.INFO,"Normalized URL: " + singleUrl);
if(length==0){
length = NHttpClientUtils.calculateLength(singleUrl,httpClient);
}
//LOGGER.log(Level.INFO,"Length: " + length);
if(length <= 0){ continue; /*skip this url*/ }
BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder
.createFile();
try{ // finding video/audio length
String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&");
long duration = (int)(Double.parseDouble(dur)*1000);
if(c_duration < 0 ){ c_duration = duration; }
fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration);
LOGGER.log(Level.INFO,"dur="+dur);
}catch(NumberFormatException a){
// ignore
}
try{ // finding the quality short name
String type = fileName.substring(fileName.indexOf("(")+1);
type = type.substring(0, type.indexOf(")"));
fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION, type);
LOGGER.log(Level.INFO,"type="+type);
}catch(Exception a){
a.printStackTrace();
}
fileName = nameOfVideo + " " +fileName;
fileBuilder.setName(fileName)
.setUrl(singleUrl)
.setSize(length).next();
}
}
for(OnlineFile of : linkHandlerBuilder.getFiles()){
long dur = of.getPropertyProvider().getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS);
if(dur < 0 && c_duration > 0 &&
of.getPropertyProvider() instanceof BasicPropertyProvider){
((BasicPropertyProvider)of.getPropertyProvider())
.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS,c_duration);
}
}
} catch (Exception ex) {
int retryLimit = ((YT_TLH)tlh).retryLimit;
ex.printStackTrace();
LOGGER.log(Level.INFO,"retry no. = " + retryCount);
if(retryCount > retryLimit) throw ex;
return linkYoutubeExtraction(tlh, retryCount + 1);
}
return linkHandlerBuilder;
}
/**
* Returns the name of the video.
* @param url The url of the video.
* @return Returns the title of the video.
*/
private String getVideoName(String url) throws Exception{
final String responseString = NHttpClientUtils.getData(url, NHttpClient.getNewInstance());
Document doc = Jsoup.parse(responseString);
return doc.select("meta[name=title]").attr("content");
}
/**
* Use cliconverter.cc service to get the urls.
* @param url the youtube url.
* @return A BasicLinkHandler.Builder with all the urls found for this video.
*/
private BasicLinkHandler.Builder clipConverterExtraction(TrialLinkHandler tlh)throws Exception{
return clipConverterExtraction(tlh, 0);
}
private BasicLinkHandler.Builder clipConverterExtraction(TrialLinkHandler tlh, int retryCount)throws Exception{
String url = tlh.getReferenceLinkString();
BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create();
try {
DefaultHttpClient httpClient = NHttpClient.getNewInstance();
HttpPost httpPost = new HttpPost("http://www.clipconverter.cc/check.php");
List<NameValuePair> formparams = new ArrayList<NameValuePair>();
formparams.add(new BasicNameValuePair("mediaurl", url));
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
httpPost.setEntity(entity);
HttpResponse httpResponse = httpClient.execute(httpPost);
final String responseString = EntityUtils.toString(httpResponse.getEntity());
JSONObject jSonObject = new JSONObject(responseString);
//LOGGER.log(Level.INFO,jSonObject);
if(jSonObject.has("redirect")){
int count = retryCount;
//If captcha is incorrect, add a count
if(!handleCaptcha(jSonObject)){
count++;
}
return clipConverterExtraction(tlh, count);
}
JSONArray jSonArray = jSonObject.getJSONArray("url");
LOGGER.log(Level.INFO,"urls: " + jSonArray);
//Set the group name as the name of the video
String nameOfVideo = jSonObject.getString("filename");
//normalize name of video
//nameOfVideo = jpfm.util.UniversallyValidFileName.makeUniversallyValidFileName(nameOfVideo);
linkHandlerBuilder.setGroupName(nameOfVideo);
// Davide you cannot create a this.fileName field
// this.filename = jSonObject.getString("filename") + ".mp4";
// The same YoutubeLinkHandler object will be used for hanlding
// all Youtube links. We "do" it in different threads in
// neembuu.release1.ui.actions.LinkActionsImpl line 128
// void reAddAction(boolean anotherThread)
long c_duration = -1;
for (int i = 0; i < jSonArray.length(); i++) {
jSonObject = (JSONObject) jSonArray.get(i);
String fileName = jSonObject.getString("text");
LOGGER.log(Level.INFO,"Filename: " + fileName);
final String extension = jSonObject.getString("filetype").toLowerCase();
fileName = StringUtils.stringBetweenTwoStrings(fileName, ">", "<");
fileName = fileName + "." + extension;
String singleUrl = jSonObject.getString("url");
//singleUrl = singleUrl.substring(0, singleUrl.indexOf("#"));
//did some changes, but this doesn't help :(
LOGGER.log(Level.INFO,"Before normalization URL: " + singleUrl);
long length = tryFindingSize(singleUrl);
singleUrl = Utils.normalize(singleUrl);
LOGGER.log(Level.INFO,"Normalized URL: " + singleUrl);
if(length==0){
length = NHttpClientUtils.calculateLength(singleUrl,httpClient);
}
//LOGGER.log(Level.INFO,"Length: " + length);
if(length <= 0){ continue; /*skip this url*/ }
BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder
.createFile();
try{ // finding video/audio length
String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&");
long duration = (int)(Double.parseDouble(dur)*1000);
if(c_duration < 0 ){ c_duration = duration; }
fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration);
LOGGER.log(Level.INFO,"dur="+dur);
}catch(Exception a){
// ignore
}
try{ // finding the quality short name
String type = fileName.substring(fileName.indexOf("(")+1);
type = type.substring(0,type.indexOf(")"));
fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION, type);
if(type.contains("480")||type.contains("1080")){
fileBuilder.putBooleanPropertyValue(PropertyProvider.BooleanProperty.UNSTABLE_VARIANT, true);
}
LOGGER.log(Level.INFO,"type="+type);
}catch(Exception a){
a.printStackTrace();
}
fileName = nameOfVideo + " " +fileName;
fileBuilder.setName(fileName)
.setUrl(singleUrl)
.setSize(length).next();
}
for(OnlineFile of : linkHandlerBuilder.getFiles()){
long dur = of.getPropertyProvider().getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS);
if(dur < 0 && c_duration > 0 &&
of.getPropertyProvider() instanceof BasicPropertyProvider){
((BasicPropertyProvider)of.getPropertyProvider())
.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS,c_duration);
}
}
} catch (Exception ex) {
int retryLimit = ((YT_TLH)tlh).retryLimit;
ex.printStackTrace();
LOGGER.log(Level.INFO,"retry no. = "+retryCount);
if(retryCount > retryLimit) throw ex;
return clipConverterExtraction(tlh,retryCount+1);
}
return linkHandlerBuilder;
}
private long tryFindingSize(String rawURL){
try{
String s = "size=";
String sz = rawURL.substring(rawURL.indexOf(s)+s.length());
if(sz.contains("#")){
sz = sz.substring(0,sz.indexOf("#"));
}
long size = Long.parseLong(sz);
return size;
}catch(Exception a){
/*size not found ignore*/
a.printStackTrace();
}
return 0;
}
/**
* Print all the url (debug purpose).
* @param urls ArrayList<String> with all the urls.
*/
private void printUrls(ArrayList<String> urls) {
LOGGER.log(Level.INFO,"\n***** START PRINTING YOUTUBE URLS *****");
for (String url : urls) {
LOGGER.log(Level.INFO,url);
}
LOGGER.log(Level.INFO,"***** END PRINTING YOUTUBE URLS *****\n");
}
/**
* Handle the captcha string.
* @param jSonObject The JSONObject with the redirect url.
* @return Returns true if the captcha is correct, false otherwise.
*/
private boolean handleCaptcha(JSONObject jSonObject) {
try {
LOGGER.log(Level.INFO,"Handling captcha.");
final String redirect = jSonObject.getString("redirect");
final String url = "http://www.clipconverter.cc" + redirect;
final DefaultHttpClient httpClient = NHttpClient.getNewInstance();
//Get the captcha code
Captcha captcha = new Captcha();
captcha.setFormTitle("Captcha for Youtube.com");
if (captcha.findCCaptchaUrlFromK(K_CHALLENGE_URL + K_CHALLENGE_CODE) != null) {
captcha.findCaptchaImageURL();
final String captchaString = captcha.getCaptchaString();
HttpPost httpPost = new HttpPost(url);
List<NameValuePair> formparams = new ArrayList<>();
formparams.add(new BasicNameValuePair("recaptcha_challenge_field", captcha.getCCaptchaUrl()));
formparams.add(new BasicNameValuePair("recaptcha_response_field", captchaString));
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
httpPost.setEntity(entity);
HttpResponse httpResponse = httpClient.execute(httpPost);
final String responseString = EntityUtils.toString(httpResponse.getEntity());
return !responseString.contains("Invalid captcha!");
} else {
throw new Exception("Captcha generic error");
}
} catch (JSONException ex) {
//ex.printStackTrace();
LOGGER.log(Level.INFO,"error",ex);
} catch (Exception ex) {
//ex.printStackTrace();
LOGGER.log(Level.INFO,"error",ex);
}
return false;
}
static final class YT_TLH implements TrialLinkHandler {
private final String url;
private int retryLimit = 5;
public void setRetryLimit(int retryLimit) { this.retryLimit = retryLimit; }
YT_TLH(String url) {
// normalize the url here
this.url = Utils.normalize(url);
}
/**
* Inspired by: <a href="http://stackoverflow.com/questions/3717115/regular-expression-for-youtube-links">Stack Overflow</a>
* @param url
* @return
*/
@Override public boolean canHandle() {
boolean result = url.matches("https?://(www.youtube.com/watch\\?(feature=player_embedded&)?v=|youtu.be/)([\\w\\-\\_]*)(&(amp;)?[\\w\\?=((\\w)|(\\W))]*)?");
LOGGER.log(Level.INFO, "Youtube can handle {0} ? {1}", new Object[]{url,result});
return result;
}
@Override public String getErrorMessage() { return canHandle() ? null : "Cannot handle"; }
@Override public boolean containsMultipleLinks() { return true; }
@Override public String tempDisplayName() { return url; }
@Override public String getReferenceLinkString() {
return url;
}
};
}