/* LanguageTool, a natural language style checker
* Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.dev.wikipedia.atom;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* Parse the Atom feed of Wikipedia's latest changes.
* @since 2.4
*/
class AtomFeedParser {
List<AtomFeedItem> getAtomFeedItems(InputStream xml) throws XMLStreamException {
String id = null;
String title = null;
Date date = null;
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLEventReader eventReader = inputFactory.createXMLEventReader(xml);
try {
List<AtomFeedItem> items = new ArrayList<>();
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement()) {
String localPart = event.asStartElement().getName().getLocalPart();
switch (localPart) {
case "id":
id = getCharacterData(eventReader);
break;
case "title":
title = getCharacterData(eventReader);
break;
case "updated":
String dateString = getCharacterData(eventReader);
try {
// e.g. 2013-12-03T09:48:29Z - got this from http://stackoverflow.com/questions/6038136,
// with SimpleDateParser the hour is off by one:
date = DatatypeFactory.newInstance().newXMLGregorianCalendar(dateString).toGregorianCalendar().getTime();
} catch (Exception e) {
throw new RuntimeException("Could not parse date string '" + dateString + "'", e);
}
break;
case "summary":
if (id == null || title == null || date == null) {
throw new RuntimeException("id, title and/or date is null: id=" + id + ", title=" + title + ", date=" + date);
}
items.add(new AtomFeedItem(id, title, getCharacterData(eventReader), date));
id = null;
title = null;
date = null;
break;
}
}
}
return items;
} finally {
eventReader.close();
}
}
private String getCharacterData(XMLEventReader eventReader) throws XMLStreamException {
XMLEvent event = eventReader.nextEvent();
StringBuilder sb = new StringBuilder();
while (event.isCharacters()) {
sb.append(event.asCharacters().getData());
event = eventReader.nextEvent();
}
return sb.toString();
}
}