/* * Copyright (C) 2014 Davide Pastore * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package neembuu.release1.externalImpl.linkhandler; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import neembuu.release1.api.file.OnlineFile; import neembuu.release1.api.file.PropertyProvider; import neembuu.release1.api.linkhandler.LinkHandler; import neembuu.release1.api.linkhandler.LinkHandlerProvider; import neembuu.release1.api.linkhandler.TrialLinkHandler; import neembuu.release1.captcha.Captcha; import neembuu.release1.defaultImpl.file.BasicOnlineFile; import neembuu.release1.defaultImpl.file.BasicPropertyProvider; import neembuu.release1.httpclient.NHttpClient; import neembuu.release1.httpclient.utils.NHttpClientUtils; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import davidepastore.StringUtils; import java.net.URLEncoder; import neembuu.release1.api.log.LoggerUtil; import neembuu.release1.defaultImpl.external.ELHProvider; import neembuu.release1.defaultImpl.linkhandler.BasicLinkHandler; import neembuu.release1.defaultImpl.linkhandler.Utils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * * @author davidepastore */ @ELHProvider(checkingRegex = YoutubeLinkHandlerProvider.REG_EXP) public class YoutubeLinkHandlerProvider implements LinkHandlerProvider { private static final Logger LOGGER = LoggerUtil.getLogger(YoutubeLinkHandlerProvider.class.getName()); // all logs go into an html file private final String K_CHALLENGE_URL = "https://www.google.com/recaptcha/api/challenge?k="; private final String K_CHALLENGE_CODE = "6LcVessSAAAAAH73irTtpZYKknjeBvN3nuUzJ2G3"; static final String REG_EXP = "https?://(www.youtube.com/watch\\?(feature=player_embedded&)?v=|youtu.be/)([\\w\\-\\_]*)(&(amp;)?[\\w\\?=((\\w)|(\\W))]*)?"; @Override public TrialLinkHandler tryHandling(final String url) { return new YT_TLH(url); } @Override public LinkHandler getLinkHandler(TrialLinkHandler tlh) throws Exception { if( !(tlh instanceof YT_TLH) || !tlh.canHandle()){return null;} BasicLinkHandler.Builder linkHandlerBuilder = clipConverterExtraction(tlh); //linkYoutubeExtraction(tlh); return linkHandlerBuilder.build(); } private void xzz(){} /** * Grab the title. * @param text * @param url */ private void grabTitle(String text, String url) { String grabbedTitle; /*= text.replaceFirst("(.*)<meta name=\"title\" content=", "").trim(); // change html characters to their UTF8 counterpart grabbedTitle = (grabbedTitle); grabbedTitle = grabbedTitle.replaceFirst("^\"", "").replaceFirst("\">$", ""); // http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx // grabbedTitle = grabbedTitle.replaceAll("<", ""); grabbedTitle = grabbedTitle.replaceAll(">", ""); grabbedTitle = grabbedTitle.replaceAll(":", ""); grabbedTitle = grabbedTitle.replaceAll("/", " "); grabbedTitle = grabbedTitle.replaceAll("\\\\", " "); grabbedTitle = grabbedTitle.replaceAll("|", ""); grabbedTitle = grabbedTitle.replaceAll("\\?", ""); grabbedTitle = grabbedTitle.replaceAll("\\*", ""); grabbedTitle = grabbedTitle.replaceAll("/", " "); grabbedTitle = grabbedTitle.replaceAll("\"", " "); grabbedTitle = grabbedTitle.replaceAll("%", ""); */ grabbedTitle = StringUtils.stringBetweenTwoStrings(text, "<title>", " - YouTube"); String contentType = NHttpClientUtils.getContentType(url,NHttpClient.getNewInstance()); if(contentType.equals("video/webm")){ grabbedTitle += ".webm"; } LOGGER.log(Level.INFO,"Title: " + grabbedTitle); throw new IllegalStateException("Legacy code"); //this.filename = grabbedTitle; // complete file name without path } /** * Find text data. * @param text the text. * @return a list of the urls. */ private ArrayList<String> findTextData(String text){ ArrayList<String> finalUrls = new ArrayList<String>(); try { String encodedUrl = StringUtils.stringBetweenTwoStrings(text, "\"url_encoded_fmt_stream_map\": \"", "\""); LOGGER.log(Level.INFO, "encoded url: {0}", encodedUrl); LOGGER.log(Level.INFO,"encoded url: " + encodedUrl); encodedUrl = encodedUrl.replaceFirst("\".*", ""); encodedUrl = encodedUrl.replaceFirst("\".*", ""); encodedUrl = encodedUrl.replace("%25","%"); encodedUrl = encodedUrl.replace("\\u0026", "&"); encodedUrl = encodedUrl.replace("\\", ""); String[] urls = encodedUrl.split(","); for(int i = 0; i< urls.length; i++){ String[] fmtUrlPair = urls[i].split("url=http", 2); fmtUrlPair[1] = "url=http"+fmtUrlPair[1]+"&"+fmtUrlPair[0]; fmtUrlPair[0] = fmtUrlPair[1].substring(fmtUrlPair[1].indexOf("itag=")+5, fmtUrlPair[1].indexOf("itag=")+5+1+(fmtUrlPair[1].matches(".*itag=[0-9]{2}.*")?1:0)+(fmtUrlPair[1].matches(".*itag=[0-9]{3}.*")?1:0)); fmtUrlPair[1] = fmtUrlPair[1].replaceFirst("url=http%3A%2F%2F", "http://"); fmtUrlPair[1] = fmtUrlPair[1].replaceAll("%3F","?").replaceAll("%2F", "/").replaceAll("%3B",";").replaceAll("%2C",",").replaceAll("%3D","=").replaceAll("%26", "&").replaceAll("%252C", "%2C").replaceAll("sig=", "signature=").replaceAll("&s=", "&signature=").replaceAll("\\?s=", "?signature="); // remove duplicated &itag=xy if (StringUtils.countString(fmtUrlPair[1], "itag=") == 2){ // LOGGER.log(Level.INFO,"Deleting itag!"); fmtUrlPair[1] = fmtUrlPair[1].replaceFirst("itag=[0-9]{1,3}", ""); } //LOGGER.log(Level.INFO,"url[" + i + "]: " + urls[i]); LOGGER.log(Level.INFO, "fmtUrlPair[1]: {0}\nfmtUrlPair[0]: {1}", new Object[]{fmtUrlPair[1], fmtUrlPair[0]}); // LOGGER.log(Level.INFO,"fmtUrlPair[1]: "+ fmtUrlPair[1] +"\nfmtUrlPair[0]: " + fmtUrlPair[0]); finalUrls.add(fmtUrlPair[1]); } } catch (Exception ex) { ex.printStackTrace(); } printUrls(finalUrls); //Setting filename grabTitle(text, finalUrls.get(0)); //the first quality return finalUrls; } /** * Use http://www.linkyoutube.com service to get the urls. * @param url the youtube url. * @return A BasicLinkHandler.Builder with all the urls found for this video. */ private BasicLinkHandler.Builder linkYoutubeExtraction(TrialLinkHandler tlh)throws Exception{ return linkYoutubeExtraction(tlh, 0); } private BasicLinkHandler.Builder linkYoutubeExtraction(TrialLinkHandler tlh, int retryCount)throws Exception{ String url = tlh.getReferenceLinkString(); BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create(); try { DefaultHttpClient httpClient = NHttpClient.getNewInstance(); String requestUrl = "http://www.linkyoutube.com/watch/index.php?video=" + URLEncoder.encode(url, "UTF-8"); final String responseString = NHttpClientUtils.getData(requestUrl, httpClient); //Set the group name as the name of the video String nameOfVideo = getVideoName(url); String fileName = "text"; linkHandlerBuilder.setGroupName(nameOfVideo); long c_duration = -1; Document doc = Jsoup.parse(responseString); Elements elements = doc.select("#download_links a"); for (Element element : elements) { String singleUrl = element.attr("href"); fileName = element.text(); if(!singleUrl.equals("#")){ long length = NHttpClientUtils.calculateLength(singleUrl, httpClient); singleUrl = Utils.normalize(singleUrl); LOGGER.log(Level.INFO,"Normalized URL: " + singleUrl); if(length==0){ length = NHttpClientUtils.calculateLength(singleUrl,httpClient); } //LOGGER.log(Level.INFO,"Length: " + length); if(length <= 0){ continue; /*skip this url*/ } BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder .createFile(); try{ // finding video/audio length String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&"); long duration = (int)(Double.parseDouble(dur)*1000); if(c_duration < 0 ){ c_duration = duration; } fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration); LOGGER.log(Level.INFO,"dur="+dur); }catch(NumberFormatException a){ // ignore } try{ // finding the quality short name String type = fileName.substring(fileName.indexOf("(")+1); type = type.substring(0, type.indexOf(")")); fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION, type); LOGGER.log(Level.INFO,"type="+type); }catch(Exception a){ a.printStackTrace(); } fileName = nameOfVideo + " " +fileName; fileBuilder.setName(fileName) .setUrl(singleUrl) .setSize(length).next(); } } for(OnlineFile of : linkHandlerBuilder.getFiles()){ long dur = of.getPropertyProvider().getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS); if(dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider){ ((BasicPropertyProvider)of.getPropertyProvider()) .putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS,c_duration); } } } catch (Exception ex) { int retryLimit = ((YT_TLH)tlh).retryLimit; ex.printStackTrace(); LOGGER.log(Level.INFO,"retry no. = " + retryCount); if(retryCount > retryLimit) throw ex; return linkYoutubeExtraction(tlh, retryCount + 1); } return linkHandlerBuilder; } /** * Returns the name of the video. * @param url The url of the video. * @return Returns the title of the video. */ private String getVideoName(String url) throws Exception{ final String responseString = NHttpClientUtils.getData(url, NHttpClient.getNewInstance()); Document doc = Jsoup.parse(responseString); return doc.select("meta[name=title]").attr("content"); } /** * Use cliconverter.cc service to get the urls. * @param url the youtube url. * @return A BasicLinkHandler.Builder with all the urls found for this video. */ private BasicLinkHandler.Builder clipConverterExtraction(TrialLinkHandler tlh)throws Exception{ return clipConverterExtraction(tlh, 0); } private BasicLinkHandler.Builder clipConverterExtraction(TrialLinkHandler tlh, int retryCount)throws Exception{ String url = tlh.getReferenceLinkString(); BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create(); try { DefaultHttpClient httpClient = NHttpClient.getNewInstance(); HttpPost httpPost = new HttpPost("http://www.clipconverter.cc/check.php"); List<NameValuePair> formparams = new ArrayList<NameValuePair>(); formparams.add(new BasicNameValuePair("mediaurl", url)); UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); httpPost.setEntity(entity); HttpResponse httpResponse = httpClient.execute(httpPost); final String responseString = EntityUtils.toString(httpResponse.getEntity()); JSONObject jSonObject = new JSONObject(responseString); //LOGGER.log(Level.INFO,jSonObject); if(jSonObject.has("redirect")){ int count = retryCount; //If captcha is incorrect, add a count if(!handleCaptcha(jSonObject)){ count++; } return clipConverterExtraction(tlh, count); } JSONArray jSonArray = jSonObject.getJSONArray("url"); LOGGER.log(Level.INFO,"urls: " + jSonArray); //Set the group name as the name of the video String nameOfVideo = jSonObject.getString("filename"); //normalize name of video //nameOfVideo = jpfm.util.UniversallyValidFileName.makeUniversallyValidFileName(nameOfVideo); linkHandlerBuilder.setGroupName(nameOfVideo); // Davide you cannot create a this.fileName field // this.filename = jSonObject.getString("filename") + ".mp4"; // The same YoutubeLinkHandler object will be used for hanlding // all Youtube links. We "do" it in different threads in // neembuu.release1.ui.actions.LinkActionsImpl line 128 // void reAddAction(boolean anotherThread) long c_duration = -1; for (int i = 0; i < jSonArray.length(); i++) { jSonObject = (JSONObject) jSonArray.get(i); String fileName = jSonObject.getString("text"); LOGGER.log(Level.INFO,"Filename: " + fileName); final String extension = jSonObject.getString("filetype").toLowerCase(); fileName = StringUtils.stringBetweenTwoStrings(fileName, ">", "<"); fileName = fileName + "." + extension; String singleUrl = jSonObject.getString("url"); //singleUrl = singleUrl.substring(0, singleUrl.indexOf("#")); //did some changes, but this doesn't help :( LOGGER.log(Level.INFO,"Before normalization URL: " + singleUrl); long length = tryFindingSize(singleUrl); singleUrl = Utils.normalize(singleUrl); LOGGER.log(Level.INFO,"Normalized URL: " + singleUrl); if(length==0){ length = NHttpClientUtils.calculateLength(singleUrl,httpClient); } //LOGGER.log(Level.INFO,"Length: " + length); if(length <= 0){ continue; /*skip this url*/ } BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder .createFile(); try{ // finding video/audio length String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&"); long duration = (int)(Double.parseDouble(dur)*1000); if(c_duration < 0 ){ c_duration = duration; } fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration); LOGGER.log(Level.INFO,"dur="+dur); }catch(Exception a){ // ignore } try{ // finding the quality short name String type = fileName.substring(fileName.indexOf("(")+1); type = type.substring(0,type.indexOf(")")); fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION, type); if(type.contains("480")||type.contains("1080")){ fileBuilder.putBooleanPropertyValue(PropertyProvider.BooleanProperty.UNSTABLE_VARIANT, true); } LOGGER.log(Level.INFO,"type="+type); }catch(Exception a){ a.printStackTrace(); } fileName = nameOfVideo + " " +fileName; fileBuilder.setName(fileName) .setUrl(singleUrl) .setSize(length).next(); } for(OnlineFile of : linkHandlerBuilder.getFiles()){ long dur = of.getPropertyProvider().getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS); if(dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider){ ((BasicPropertyProvider)of.getPropertyProvider()) .putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS,c_duration); } } } catch (Exception ex) { int retryLimit = ((YT_TLH)tlh).retryLimit; ex.printStackTrace(); LOGGER.log(Level.INFO,"retry no. = "+retryCount); if(retryCount > retryLimit) throw ex; return clipConverterExtraction(tlh,retryCount+1); } return linkHandlerBuilder; } private long tryFindingSize(String rawURL){ try{ String s = "size="; String sz = rawURL.substring(rawURL.indexOf(s)+s.length()); if(sz.contains("#")){ sz = sz.substring(0,sz.indexOf("#")); } long size = Long.parseLong(sz); return size; }catch(Exception a){ /*size not found ignore*/ a.printStackTrace(); } return 0; } /** * Print all the url (debug purpose). * @param urls ArrayList<String> with all the urls. */ private void printUrls(ArrayList<String> urls) { LOGGER.log(Level.INFO,"\n***** START PRINTING YOUTUBE URLS *****"); for (String url : urls) { LOGGER.log(Level.INFO,url); } LOGGER.log(Level.INFO,"***** END PRINTING YOUTUBE URLS *****\n"); } /** * Handle the captcha string. * @param jSonObject The JSONObject with the redirect url. * @return Returns true if the captcha is correct, false otherwise. */ private boolean handleCaptcha(JSONObject jSonObject) { try { LOGGER.log(Level.INFO,"Handling captcha."); final String redirect = jSonObject.getString("redirect"); final String url = "http://www.clipconverter.cc" + redirect; final DefaultHttpClient httpClient = NHttpClient.getNewInstance(); //Get the captcha code Captcha captcha = new Captcha(); captcha.setFormTitle("Captcha for Youtube.com"); if (captcha.findCCaptchaUrlFromK(K_CHALLENGE_URL + K_CHALLENGE_CODE) != null) { captcha.findCaptchaImageURL(); final String captchaString = captcha.getCaptchaString(); HttpPost httpPost = new HttpPost(url); List<NameValuePair> formparams = new ArrayList<>(); formparams.add(new BasicNameValuePair("recaptcha_challenge_field", captcha.getCCaptchaUrl())); formparams.add(new BasicNameValuePair("recaptcha_response_field", captchaString)); UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); httpPost.setEntity(entity); HttpResponse httpResponse = httpClient.execute(httpPost); final String responseString = EntityUtils.toString(httpResponse.getEntity()); return !responseString.contains("Invalid captcha!"); } else { throw new Exception("Captcha generic error"); } } catch (JSONException ex) { //ex.printStackTrace(); LOGGER.log(Level.INFO,"error",ex); } catch (Exception ex) { //ex.printStackTrace(); LOGGER.log(Level.INFO,"error",ex); } return false; } static final class YT_TLH implements TrialLinkHandler { private final String url; private int retryLimit = 5; public void setRetryLimit(int retryLimit) { this.retryLimit = retryLimit; } YT_TLH(String url) { // normalize the url here this.url = Utils.normalize(url); } /** * Inspired by: <a href="http://stackoverflow.com/questions/3717115/regular-expression-for-youtube-links">Stack Overflow</a> * @param url * @return */ @Override public boolean canHandle() { boolean result = url.matches("https?://(www.youtube.com/watch\\?(feature=player_embedded&)?v=|youtu.be/)([\\w\\-\\_]*)(&(amp;)?[\\w\\?=((\\w)|(\\W))]*)?"); LOGGER.log(Level.INFO, "Youtube can handle {0} ? {1}", new Object[]{url,result}); return result; } @Override public String getErrorMessage() { return canHandle() ? null : "Cannot handle"; } @Override public boolean containsMultipleLinks() { return true; } @Override public String tempDisplayName() { return url; } @Override public String getReferenceLinkString() { return url; } }; }