/* * Copyright (c) 2010,2013 Thomas F. Morris and other contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Google nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.google.refine.extension.gdata; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.json.JSONObject; import com.google.api.services.fusiontables.Fusiontables; import com.google.api.services.fusiontables.model.Column; import com.google.api.services.fusiontables.model.Sqlresponse; import com.google.api.services.fusiontables.model.Table; import com.google.refine.ProjectMetadata; import com.google.refine.importers.TabularImportingParserBase; import com.google.refine.importers.TabularImportingParserBase.TableDataReader; import com.google.refine.importing.ImportingJob; import com.google.refine.model.Project; import com.google.refine.util.JSONUtilities; /** * OpenRefine parser for Google Spreadsheets. * * @author Tom Morris <tfmorris@gmail.com> * @copyright 2010 Thomas F. Morris * @license New BSD http://www.opensource.org/licenses/bsd-license.php */ public class FusionTableImporter { static public void parse( String token, Project project, ProjectMetadata metadata, final ImportingJob job, int limit, JSONObject options, List<Exception> exceptions) { Fusiontables service = FusionTableHandler.getFusionTablesService(token); parse( service, project, metadata, job, limit, options, exceptions ); } static void setProgress(ImportingJob job, String fileSource, int percent) { job.setProgress(percent, "Reading " + fileSource); } static private class FusionTableBatchRowReader implements TableDataReader { final ImportingJob job; final String fileSource; final Fusiontables service; final List<FTColumnData> columns; final int batchSize; final String baseQuery; int nextRow = 0; // 0-based int batchRowStart = 0; // 0-based boolean end = false; List<List<Object>> rowsOfCells = null; boolean usedHeaders = false; public FusionTableBatchRowReader(ImportingJob job, String fileSource, Fusiontables service, String tableId, List<FTColumnData> columns, int batchSize) { this.job = job; this.fileSource = fileSource; this.service = service; this.columns = columns; this.batchSize = batchSize; StringBuffer sb = new StringBuffer(); sb.append("SELECT "); boolean first = true; for (FTColumnData cd : columns) { if (first) { first = false; } else { sb.append(","); } sb.append("'"); sb.append(cd.name); sb.append("'"); } sb.append(" FROM "); sb.append(tableId); baseQuery = sb.toString(); } @Override public List<Object> getNextRowOfCells() throws IOException { if (!usedHeaders) { List<Object> row = new ArrayList<Object>(columns.size()); for (FTColumnData cd : columns) { row.add(cd.name); } usedHeaders = true; return row; } if (rowsOfCells == null || (nextRow >= batchRowStart + rowsOfCells.size() && !end)) { int newBatchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size()); rowsOfCells = getRowsOfCells(newBatchRowStart); batchRowStart = newBatchRowStart; setProgress(job, fileSource, -1 /* batchRowStart * 100 / totalRows */); } if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) { return rowsOfCells.get(nextRow++ - batchRowStart); } else { return null; } } private List<List<Object>> getRowsOfCells(int startRow) throws IOException { List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(batchSize); String query = baseQuery + " OFFSET " + startRow + " LIMIT " + batchSize; Sqlresponse sqlresponse = FusionTableHandler.runFusionTablesSelect(service, query); List<List<Object>> rows = sqlresponse.getRows(); if (rows.size() > 1) { for (int i = 1; i < rows.size(); i++) { List<Object> row = rows.get(i); List<Object> rowOfCells = new ArrayList<Object>(row.size()); for (int j = 0; j < row.size() && j < columns.size(); j++) { String text = String.valueOf(row.get(j)); if (text.isEmpty()) { rowOfCells.add(null); } else { FTColumnData cd = columns.get(j); if (cd.type == FTColumnType.NUMBER) { try { rowOfCells.add(Long.parseLong(text)); continue; } catch (NumberFormatException e) { // ignore } try { double d = Double.parseDouble(text); if (!Double.isInfinite(d) && !Double.isNaN(d)) { rowOfCells.add(d); continue; } } catch (NumberFormatException e) { // ignore } } rowOfCells.add(text); } } rowsOfCells.add(rowOfCells); } } end = rows.size() < batchSize + 1; return rowsOfCells; } } static public void parse( Fusiontables service, Project project, ProjectMetadata metadata, final ImportingJob job, int limit, JSONObject options, List<Exception> exceptions) { String docUrlString = JSONUtilities.getString(options, "docUrl", null); String id = getFTid(docUrlString); // Use GDataExtension.getFusionTableKey(url) ? // TODO: Allow arbitrary Fusion Tables URL instead of (in addition to?) constructing our own? try { List<FTColumnData> columns = new ArrayList<FusionTableImporter.FTColumnData>(); Table table = service.table().get(id).execute(); for (Column col : table.getColumns()) { FTColumnData cd = new FTColumnData(); cd.name = col.getName(); String type = col.getType(); if (type.equals("STRING")) { cd.type = FTColumnType.STRING; } else if (type.equals("NUMBER")) { cd.type = FTColumnType.NUMBER; } else if (type.equals("DATETIME")) { cd.type = FTColumnType.DATETIME; } else if (type.equals("LOCATION")) { cd.type = FTColumnType.LOCATION; } else { // TODO: unknown type cd.type = FTColumnType.STRING; } columns.add(cd); } setProgress(job, docUrlString, -1); // Force these options for the next call because each fusion table // is strictly structured with a single line of headers. JSONUtilities.safePut(options, "ignoreLines", 0); // number of blank lines at the beginning to ignore JSONUtilities.safePut(options, "headerLines", 1); // number of header lines TabularImportingParserBase.readTable( project, metadata, job, new FusionTableBatchRowReader(job, docUrlString, service, id, columns, 100), docUrlString, limit, options, exceptions ); setProgress(job, docUrlString, 100); } catch (IOException e) { e.printStackTrace(); exceptions.add(e); } } static private String getFTid(String url) { if (url == null) { return null; } int equal = url.lastIndexOf('='); if (equal < 0) { return null; } return url.substring(equal + 1); } static enum FTColumnType { STRING, NUMBER, DATETIME, LOCATION } final static class FTColumnData { String name; FTColumnType type; } }