/******************************************************************************* * Copyright (c) 2010, 2011 Obeo. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Obeo - initial API and implementation *******************************************************************************/ package org.eclipse.mylyn.docs.intent.markup.resource.wikimedia; import com.google.common.collect.Iterators; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.eclipse.emf.common.notify.Notification; import org.eclipse.emf.common.util.URI; import org.eclipse.emf.ecore.EObject; import org.eclipse.emf.ecore.InternalEObject; import org.eclipse.emf.ecore.resource.URIConverter; import org.eclipse.emf.ecore.resource.impl.ResourceImpl; import org.eclipse.mylyn.docs.intent.markup.builder.ModelDocumentBuilder; import org.eclipse.mylyn.docs.intent.markup.markup.Document; import org.eclipse.mylyn.docs.intent.markup.markup.Image; import org.eclipse.mylyn.docs.intent.markup.markup.Link; import org.eclipse.mylyn.docs.intent.markup.markup.MarkupFactory; import org.eclipse.mylyn.wikitext.core.parser.MarkupParser; import org.eclipse.mylyn.wikitext.core.util.IgnoreDtdEntityResolver; import org.eclipse.mylyn.wikitext.mediawiki.core.MediaWikiLanguage; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; /** * A resource implementation for web-based pages on wikimedia. * * @author <a href="mailto:cedric.brun@obeo.fr">Cedric Brun</a> */ public class WikimediaResource extends ResourceImpl { /** * Constant for the buffer size. */ private static final int BUFFER_SIZE = 0x10000; /** * Creates a new {@link WikimediaResource}. * * @param eUri * the {@link URI} used to create the resource */ public WikimediaResource(URI eUri) { super(eUri); } /** * {@inheritDoc} * * @see org.eclipse.emf.ecore.resource.impl.ResourceImpl#load(java.util.Map) */ @Override public void load(Map<?, ?> options) throws IOException { // let's build the http URI... URI uri = getURI(); WikimediaURI wURI = new WikimediaURI(uri); String pageName = wURI.pageName(); String apiURI = wURI.baseServer() + "/api.php?format=xml&action=query&prop=revisions&titles=" + pageName + "&rvprop=content"; URI eApiURI = URI.createURI(apiURI); Map<?, ?> response = null; if (options != null) { response = (Map<?, ?>)options.get(URIConverter.OPTION_RESPONSE); } if (response == null) { response = new HashMap<Object, Object>(); } InputStream inputStream = getInputStream(eApiURI); URI eImgURI = URI.createURI(wURI.baseServer() + "/api.php?action=query&titles=" + pageName + "&generator=images&prop=imageinfo&iiprop=url&format=xml"); InputStream inputImage = getInputStream(eImgURI); try { wikimediaLoad(inputStream, options); /* * Now let's get more information about the images */ handleImagesData(eImgURI, wURI.baseServer(), inputImage); } catch (SAXException e) { // TODO proper logging throw new RuntimeException(e); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } finally { inputStream.close(); inputImage.close(); Long timeStamp = (Long)response.get(URIConverter.RESPONSE_TIME_STAMP_PROPERTY); if (timeStamp != null) { setTimeStamp(timeStamp); } } prepareProxyFromLinks(); } /** * Gets additional information about the image at the given URI. * * @param eImgURI * the image URI * @param baseServer * the base Server URL * @param input * the input stream * @throws ParserConfigurationException * if parser cannot be created * @throws SAXException * if file is invalid * @throws IOException * if file cannot be properly accessed */ private void handleImagesData(URI eImgURI, String baseServer, InputStream input) throws ParserConfigurationException, SAXException, IOException { final SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setNamespaceAware(true); parserFactory.setValidating(false); SAXParser saxParser = parserFactory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setEntityResolver(IgnoreDtdEntityResolver.getInstance()); ImageFetchingContentHandler contentHandler = new ImageFetchingContentHandler(); xmlReader.setContentHandler(contentHandler); try { xmlReader.parse(new InputSource(input)); } catch (IOException e) { throw new RuntimeException( String.format("Unexpected exception retrieving data from %s", eImgURI), e); //$NON-NLS-1$ } if (contentHandler.imageTitleToUrl.size() > 0) { Iterator<Image> it = Iterators.filter(getAllContents(), Image.class); while (it.hasNext()) { Image cur = it.next(); String completeURL = contentHandler.imageTitleToUrl.get("Image:" + cur.getUrl()); if (completeURL != null) { cur.setUrl(baseServer + "/" + completeURL); } } } } /** * Creates and return an input stream from the given URI. * * @param eApiURI * the URI on which an input stream should be created * @return an input stream from the given URI * @throws IOException * if file cannot be properly accessed */ private InputStream getInputStream(URI eApiURI) throws IOException { // If an input stream can't be created, ensure that the resource is // still considered loaded after the failure, // and do all the same processing we'd do if we actually were able to // create a valid input stream. // InputStream inputStream = null; try { inputStream = getURIConverter().createInputStream(eApiURI); } catch (IOException exception) { Notification notification = setLoaded(true); isLoading = true; if (errors != null) { errors.clear(); } if (warnings != null) { warnings.clear(); } isLoading = false; if (notification != null) { eNotify(notification); } setModified(false); throw exception; } return inputStream; } /** * Creates a proxy for all links, that will be used as long as they are not resolved. */ private void prepareProxyFromLinks() { Iterator<Link> it = Iterators.filter(getAllContents(), Link.class); while (it.hasNext()) { Link lnk = it.next(); String href = lnk.getHrefOrHashName(); if (lnk.getTarget() == null && href.startsWith("/wiki/")) { String targetPageName = href.substring(href.indexOf("/wiki/") + 6); URI uri = getURI(); URI targetUri = uri.trimSegments(uri.segmentCount()); targetUri = URI.createURI(targetUri.toString() + targetPageName + "#/0"); Document proxifiedDoc = MarkupFactory.eINSTANCE.createDocument(); ((InternalEObject)proxifiedDoc).eSetProxyURI(targetUri); lnk.setTarget(proxifiedDoc); } } } /** * Loads the resource (markup elements corresponding to the wiki file held by the given input stream will * be parsed on the fly). * * @param is * the input stream of the wiki file to load as a model * @param options * loading options * @throws SAXException * if file is invalid * @throws IOException * if fille cannot be properly accessed */ private void wikimediaLoad(InputStream is, Map<?, ?> options) throws SAXException, IOException { final char[] buffer = new char[BUFFER_SIZE]; StringBuilder out = new StringBuilder(); Reader in = new InputStreamReader(is, "UTF-8"); int read; do { read = in.read(buffer, 0, buffer.length); if (read > 0) { out.append(buffer, 0, read); } } while (read >= 0); String outString = out.toString(); int begin = outString.indexOf("<rev>") + 4; int end = outString.indexOf("</rev>"); String revisionContent = outString.substring(begin + 1, end - begin); MarkupParser parser = new MarkupParser(new MediaWikiLanguage()); ModelDocumentBuilder builder = new ModelDocumentBuilder(); parser.setBuilder(builder); parser.parse(revisionContent, true); Collection<EObject> roots = builder.getRoots(); getContents().addAll(roots); } }