/*
* Copyright (c) 2010, Thomas F. Morris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Google nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.extension.gdata;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.json.JSONObject;
import com.google.gdata.client.spreadsheet.CellQuery;
import com.google.gdata.client.spreadsheet.SpreadsheetService;
import com.google.gdata.data.spreadsheet.Cell;
import com.google.gdata.data.spreadsheet.CellEntry;
import com.google.gdata.data.spreadsheet.CellFeed;
import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
import com.google.gdata.data.spreadsheet.WorksheetEntry;
import com.google.gdata.util.ServiceException;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
/**
* OpenRefine parser for Google Spreadsheets.
*
* @author Tom Morris <tfmorris@gmail.com>
* @copyright 2010 Thomas F. Morris
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
*/
public class GDataImporter {
static public void parse(
String token,
Project project,
ProjectMetadata metadata,
final ImportingJob job,
int limit,
JSONObject options,
List<Exception> exceptions) {
String docType = JSONUtilities.getString(options, "docType", null);
if ("spreadsheet".equals(docType)) {
SpreadsheetService service = GDataExtension.getSpreadsheetService(token);
parse(
service,
project,
metadata,
job,
limit,
options,
exceptions
);
} else if ("table".equals(docType)) {
FusionTableImporter.parse(token,
project,
metadata,
job,
limit,
options,
exceptions
);
}
}
static public void parse(
SpreadsheetService service,
Project project,
ProjectMetadata metadata,
final ImportingJob job,
int limit,
JSONObject options,
List<Exception> exceptions) {
String docUrlString = JSONUtilities.getString(options, "docUrl", null);
String worksheetUrlString = JSONUtilities.getString(options, "sheetUrl", null);
if (docUrlString != null && worksheetUrlString != null) {
try {
parseOneWorkSheet(
service,
project,
metadata,
job,
new URL(docUrlString),
new URL(worksheetUrlString),
limit,
options,
exceptions);
} catch (MalformedURLException e) {
e.printStackTrace();
exceptions.add(e);
}
}
}
static public void parseOneWorkSheet(
SpreadsheetService service,
Project project,
ProjectMetadata metadata,
final ImportingJob job,
URL docURL,
URL worksheetURL,
int limit,
JSONObject options,
List<Exception> exceptions) {
try {
WorksheetEntry worksheetEntry = service.getEntry(worksheetURL, WorksheetEntry.class);
String spreadsheetName = docURL.toExternalForm();
try {
SpreadsheetEntry spreadsheetEntry = service.getEntry(docURL, SpreadsheetEntry.class);
spreadsheetName = spreadsheetEntry.getTitle().getPlainText();
} catch (ServiceException e) { // RedirectRequiredException among others
// fall back to just using the URL (better for traceability anyway?)
}
String fileSource = spreadsheetName + " # " +
worksheetEntry.getTitle().getPlainText();
setProgress(job, fileSource, 0);
TabularImportingParserBase.readTable(
project,
metadata,
job,
new WorksheetBatchRowReader(job, fileSource, service, worksheetEntry, 20),
fileSource,
limit,
options,
exceptions
);
setProgress(job, fileSource, 100);
} catch (IOException e) {
e.printStackTrace();
exceptions.add(e);
} catch (ServiceException e) {
e.printStackTrace();
exceptions.add(e);
}
}
static private void setProgress(ImportingJob job, String fileSource, int percent) {
job.setProgress(percent, "Reading " + fileSource);
}
static private class WorksheetBatchRowReader implements TableDataReader {
final ImportingJob job;
final String fileSource;
final SpreadsheetService service;
final WorksheetEntry worksheet;
final int batchSize;
final int totalRows;
int nextRow = 0; // 0-based
int batchRowStart = 0; // 0-based
List<List<Object>> rowsOfCells = null;
public WorksheetBatchRowReader(ImportingJob job, String fileSource,
SpreadsheetService service, WorksheetEntry worksheet,
int batchSize) {
this.job = job;
this.fileSource = fileSource;
this.service = service;
this.worksheet = worksheet;
this.batchSize = batchSize;
this.totalRows = worksheet.getRowCount();
}
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (rowsOfCells == null || (nextRow >= batchRowStart + rowsOfCells.size() && nextRow < totalRows)) {
int newBatchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
try {
rowsOfCells = getRowsOfCells(
service,
worksheet,
newBatchRowStart + 1, // convert to 1-based
batchSize);
batchRowStart = newBatchRowStart;
setProgress(job, fileSource, batchRowStart * 100 / totalRows);
} catch (ServiceException e) {
throw new IOException(e);
}
}
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
return rowsOfCells.get(nextRow++ - batchRowStart);
} else {
return null;
}
}
List<List<Object>> getRowsOfCells(
SpreadsheetService service,
WorksheetEntry worksheet,
int startRow, // 1-based
int rowCount
) throws IOException, ServiceException {
URL cellFeedUrl = worksheet.getCellFeedUrl();
int minRow = startRow;
int maxRow = Math.min(worksheet.getRowCount(), startRow + rowCount - 1);
int cols = worksheet.getColCount();
int rows = worksheet.getRowCount();
CellQuery cellQuery = new CellQuery(cellFeedUrl);
cellQuery.setMinimumRow(minRow);
cellQuery.setMaximumRow(maxRow);
cellQuery.setMaximumCol(cols);
cellQuery.setMaxResults(rows * cols);
cellQuery.setReturnEmpty(false);
CellFeed cellFeed = service.query(cellQuery, CellFeed.class);
List<CellEntry> cellEntries = cellFeed.getEntries();
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(rowCount);
for (CellEntry cellEntry : cellEntries) {
Cell cell = cellEntry.getCell();
if (cell != null) {
int row = cell.getRow() - startRow;
int col = cell.getCol() - 1;
while (row >= rowsOfCells.size()) {
rowsOfCells.add(new ArrayList<Object>());
}
List<Object> rowOfCells = rowsOfCells.get(row);
while (col >= rowOfCells.size()) {
rowOfCells.add(null);
}
rowOfCells.set(col, cell.getValue());
}
}
return rowsOfCells;
}
}
}