package com.gettingmobile.goodnews.download; import android.graphics.Bitmap; import android.graphics.Bitmap.CompressFormat; import android.graphics.BitmapFactory; import android.graphics.BitmapFactory.Options; import android.util.Log; import com.gettingmobile.io.IOUtils; import java.io.*; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.regex.Matcher; import java.util.regex.Pattern; class ImageProcessor { private static final String LOG_TAG = "goodnews.ImageProcessor"; public static final int HTTPTIMEOUT = 5000; private static final Pattern PATTERN_IMG = Pattern.compile("(?i)<img [^>]*>"); private static final Pattern PATTERN_SRC = Pattern.compile("(?i)(?<=src=\")[^\"]*(?=\")"); private static final Pattern PATTERN_ATTR = Pattern.compile("(?i)(?<=( ))[a-z]*?=\"[^\"]*\""); private static final Pattern PATTERN_A_TAG = Pattern.compile("(?i)<[/]?a[ >]"); private final File itemDir; private final String prefix; private int imageCounter = 0; private final URL baseUrl; private final URL pathUrl; private String html; private final int displaySmallSize; private final int displayLargeSize; private final boolean downscale; /** * For testing purposes only, see ImageProcessor(URL pageUrl, StringBuilder html, int displayLargeSize) * @param itemDir directory to store images in * @param prefix name prefix for image files * @param pageUrl The URL of the original page * @param html The html document of the original page * @param displaySmallSize the size of the shorter edge of the display in pixels. * @param displayLargeSize the size of the longer edge of the display in pixels. * @param downscale whether to scale the image down to the display or not. */ public ImageProcessor(File itemDir, String prefix, String pageUrl, String html, int displaySmallSize, int displayLargeSize, boolean downscale) { this.itemDir = itemDir; this.prefix = prefix; this.baseUrl = getBaseUrl(pageUrl, false); this.pathUrl = getBaseUrl(pageUrl, true); this.html = html; this.displaySmallSize = displaySmallSize; this.displayLargeSize = displayLargeSize; this.downscale = downscale; } /** * Get the base URL of the given base URL * @param pageUrl URL of the web page * @param withPath if the path should be included, or just the server name i.e. http://www.dom.com/path/ or just http://www.dom.com/ * @return The url of the path of the webpage, minus the file name, ending with / */ public static URL getBaseUrl(String pageUrl, boolean withPath) { try { if (pageUrl == null) return null; final URL url = new URL(pageUrl); final int i = url.getPath().lastIndexOf("/"); final String path = (i > -1 ? url.getPath().substring(0, i) : ""); if (!withPath) { return new URL(url.getProtocol()+"://"+url.getAuthority() + "/"); } else { return new URL(url.getProtocol()+"://"+url.getAuthority() + path + "/"); } } catch (MalformedURLException ex) { Log.w(LOG_TAG, "malformed url " + pageUrl, ex); return null; } } /** * Method that does the actual work, finds all img tags using regex, * all html outside of img tags is just appended to the output StringBuilder. * @throws java.io.IOException if it went wrong */ public void processImages() throws IOException { try { int lastTagEnd = 0; //Last end of img tag regex match, initially start of string boolean lastFragmentEndedWithinLink = false; final StringBuilder htmlOut = new StringBuilder(); //Somewhere to put our new html doc final Matcher matcher = PATTERN_IMG.matcher(html); while(matcher.find()) { // Append all non-img tag content since last match final String htmlSinceLastImg = html.substring(lastTagEnd, matcher.start()); htmlOut.append(htmlSinceLastImg); lastTagEnd = matcher.end(); //set the last tag end to end of current match String imgTag = matcher.group(0); // the <img....> tag lastFragmentEndedWithinLink = endsWithinLinkTag(htmlSinceLastImg, lastFragmentEndedWithinLink); imgTag = processImage(imgTag, !lastFragmentEndedWithinLink); htmlOut.append(imgTag); } htmlOut.append(html.substring(lastTagEnd)); // finally append all non-img data since the last match html = htmlOut.toString(); } catch (OutOfMemoryError ex) { // not much we can do here but forwarding the error to be gracefully handled throw new IOException("Failed to download images because I'm out of memory"); } } /** * Process a single image tag. * @param imgTag The original img tag to create replacement for with inline/embedded image * @param createLinkToOrigImg Whether or not to wrap img tag in link to the original image * @return a string for the new img tag, i.e. <img src="data:image/png;base64,asdasdlkjlasjd..." alt="other attributes are preserved"/> * @throws java.io.IOException if it went wrong */ private String processImage(String imgTag, boolean createLinkToOrigImg) throws IOException { String imgOut = imgTag; final Matcher matcher = PATTERN_SRC.matcher(imgTag); if(matcher.find()) { final String sourceRel = matcher.group(0); try { final URL imageUrl; if(sourceRel.contains("://")) { imageUrl = new URL(sourceRel); //img src on different site } else if(sourceRel.startsWith("/")) { imageUrl = (baseUrl != null) ? new URL(baseUrl, sourceRel) : null; } else { imageUrl = (pathUrl != null) ? new URL(pathUrl, sourceRel) : null; //img src relative to page url. } if (imageUrl != null) { // Get the resized image and encode into Base64 final ImageInfo imageInfo = fetchAndResizeImage(imageUrl, downscale ? displayLargeSize : 0); if(imageInfo != null) { // Create the new tag with the image data, and applicable attributes from original img tag final StringBuilder newTag = new StringBuilder(); newTag.append("<img"); /* * only include image dimensions (width and height attribute) if they are remarkable smaller * than the display, so that we can do CSS downscaling when displaying the item in all other cases. * (we are deviding the size by two to ensure that the image is really not getting to large, if the * item view contains borders or stuff like that). */ final int maxExplicitDimensionSize = displaySmallSize / 2; if (imageInfo.width < maxExplicitDimensionSize && imageInfo.height < maxExplicitDimensionSize) { newTag.append(" width=\"").append(imageInfo.width). append("\" height=\"").append(imageInfo.height).append('"'); } final String imageSrc = imageInfo.fileName; Log.i(LOG_TAG, "Setting image source to " + imageSrc); newTag.append(" src=\"").append(imageSrc).append('"'); /* * append all other image attributes to the new tag */ final Matcher matcher2 = PATTERN_ATTR.matcher(imgTag); while(matcher2.find()) { final String a = matcher2.group(0).toLowerCase(); if(!a.startsWith("src") && !a.startsWith("height") && !a.startsWith("width")) { newTag.append(" ").append(matcher2.group(0)); } } newTag.append("/>"); Log.d(LOG_TAG, "dimensions of new image: width="+ imageInfo.width+", height="+ imageInfo.height); if(createLinkToOrigImg) { newTag.insert(0, "<a href=\"" + imageUrl.toExternalForm() + "\">").append("</a>"); } imgOut = newTag.toString(); } } } catch(MalformedURLException ex) { Log.w(LOG_TAG, "ignoring image with invalid path " + sourceRel); } } return imgOut; } /** * Fetches the image in the URL, resizes it to the max size set in maxPixelSize in constructor, * saves it to the base directory and returns the relative path name. * @param imageUrl the URL of the image to fetch. * @param maxPixelSize The maximum size, width or height, of the page, or 0 to return unresized image * @return the path of the image relative to the base dir. * @throws java.io.IOException if an error occured fetching the image. */ public ImageInfo fetchAndResizeImage(URL imageUrl, int maxPixelSize) throws IOException { Log.i(LOG_TAG, "Opening imageUrl: " + imageUrl.toExternalForm()); final String fileName = prefix + Integer.toString(imageCounter++); ImageInfo imageInfo = null; // set up the HttpURLConnection final URLConnection connection = imageUrl.openConnection(); if (connection instanceof HttpURLConnection) { final HttpURLConnection conn = (HttpURLConnection) connection; try { conn.setRequestMethod("GET"); conn.setReadTimeout(HTTPTIMEOUT); conn.setConnectTimeout(HTTPTIMEOUT); conn.setDoInput(true); conn.connect(); // Check if server responds ok 200 // may be we should follow 301/302 redirects here in the future? if (conn.getResponseCode() == HttpURLConnection.HTTP_OK) { final ByteArrayOutputStream baos1 = new ByteArrayOutputStream(); final InputStream is = conn.getInputStream(); try { byte[] buf = new byte[256]; int i; while((i=is.read(buf))>-1) { baos1.write(buf, 0, i); } } finally { IOUtils.closeQuietly(is); } final byte[] origImgBytes = baos1.toByteArray(); Log.d(LOG_TAG, "Size of original image: " + origImgBytes.length); if(maxPixelSize <=0 ) { // Resize disabled, just get the image dimensions and return raw array in Image writeImage(fileName, origImgBytes); imageInfo = new ImageInfo(fileName, origImgBytes); } else try { // A maximum size is set, might have to resize if original size exceeds max // We'll assume so and just load the entire image, converting it to PNG in the process final Bitmap origImg = BitmapFactory.decodeByteArray(origImgBytes, 0, origImgBytes.length); if(origImg == null) { Log.w(LOG_TAG, "Could not create image from connection inputstream"); } else { final int origWidth = origImg.getWidth(); final int origHeight = origImg.getHeight(); // Is resize needed? final boolean resize = origWidth > maxPixelSize || origHeight >maxPixelSize; if (!resize) { writeImage(fileName, origImgBytes); imageInfo = new ImageInfo(fileName, origImgBytes); } else { // Image not within maxSize // Find ratios for height and width final float ratioW = ((float)maxPixelSize)/origWidth; final float ratioH = ((float)maxPixelSize)/origHeight; // Both sides must be < displayLargeSize, so get the smallest of the ratios final float ratio = (ratioW<ratioH ? ratioW : ratioH); // Calculate the new widths imageInfo = new ImageInfo( fileName, Math.round(ratio * origWidth), Math.round(ratio * origHeight), resize); // Resize the image and mark the old for garbage collection Bitmap retImg = Bitmap.createScaledBitmap(origImg, imageInfo.width, imageInfo.height, true); origImg.recycle(); // Save the final image to byte array in PNG format, then mark image for garbage collection final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024); retImg.compress(CompressFormat.PNG, 50, baos); // PNG format ignores image quality param retImg.recycle(); final byte[] newImgBytes = baos.toByteArray(); Log.i(LOG_TAG, "Size of resized image: " + newImgBytes.length); writeImage(fileName, newImgBytes); } } } catch (OutOfMemoryError error) { Log.w(LOG_TAG, "Encountered out of memory while processing image " + imageUrl); imageInfo = new ImageInfo(fileName, origImgBytes); } } else { /* * non-success return code */ Log.w(LOG_TAG, "Failed to download image with return code " + conn.getResponseCode()); } } catch (NullPointerException ex) { // HttpURLConnection.connect() throws this in some strange situations :-( throw new IOException("NullPointerException while trying to connect"); } catch (IndexOutOfBoundsException ex) { // HttpURLConnection.connect() throws this in some strange situations :-( throw new IOException("IndexOutOfBoundsException while trying to connect"); } finally { conn.disconnect(); } } return imageInfo; } private void writeImage(String fileName, byte[] image) throws IOException { IOUtils.ensureDirExists(itemDir); final File file = new File(itemDir, fileName); OutputStream out = null; try { Log.i(LOG_TAG, "Writing image of " + image.length + " bytes to " + file); out = new FileOutputStream(file); out.write(image); } finally { IOUtils.closeQuietly(out); } } /** * Tests if the html fragment given ends within a link tag <a> * @param html The fragment of html to test * @param previousFragmentEndedInLink ??? * @return If the fragment ends within an <a></a> or not */ public static boolean endsWithinLinkTag(String html, boolean previousFragmentEndedInLink) { boolean inLinkTag = previousFragmentEndedInLink; Matcher matcher = ImageProcessor.PATTERN_A_TAG.matcher(html); while(matcher.find()) { String tag = matcher.group(0).toLowerCase(); if(tag.startsWith("</a")) { inLinkTag = false; } else if(tag.startsWith("<a")) { inLinkTag = true; } } return inLinkTag; } public String getPageWithInlineImages() throws IOException { processImages(); return html; } /** * Data transfer class for transfer of resized image with metadata * @author Tor * */ static class ImageInfo { public final String fileName; public final int width; public final int height; public final boolean wasResized; public ImageInfo(String fileName, int width, int height, boolean wasResized) { this.fileName = fileName; this.width = width; this.height = height; this.wasResized = wasResized; } public ImageInfo(String fileName, byte[] image) { this.fileName = fileName; final Options opts = new Options(); opts.inJustDecodeBounds=true; BitmapFactory.decodeByteArray(image, 0, image.length, opts); width = opts.outWidth; height = opts.outHeight; wasResized = false; } } }