/** * Copyright 2012 Universitat Pompeu Fabra. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * */ package org.onexus.ui.workspace.internal.wizards.data; import org.apache.commons.lang3.StringUtils; import org.apache.wicket.extensions.wizard.WizardModel; import org.apache.wicket.extensions.wizard.WizardStep; import org.apache.wicket.markup.html.form.ListMultipleChoice; import org.apache.wicket.markup.html.form.RadioChoice; import org.apache.wicket.model.IModel; import org.apache.wicket.model.PropertyModel; import org.apache.wicket.request.mapper.parameter.PageParameters; import org.onexus.collection.api.Collection; import org.onexus.collection.api.Field; import org.onexus.collection.api.Link; import org.onexus.collection.api.types.Text; import org.onexus.collection.api.utils.LinkUtils; import org.onexus.data.api.IDataManager; import org.onexus.resource.api.Folder; import org.onexus.resource.api.IResourceManager; import org.onexus.resource.api.Loader; import org.onexus.resource.api.ORI; import org.onexus.resource.api.Parameter; import org.onexus.resource.api.Property; import org.onexus.resource.api.Resource; import org.onexus.ui.api.pages.resource.ResourcesPage; import org.onexus.ui.api.wizards.AbstractWizard; import javax.inject.Inject; import javax.inject.Named; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; public class CreateCollectionWizard extends AbstractWizard { @Inject private IDataManager dataManager; @Inject private IResourceManager resourceManager; // Formats private static final String CSV = "Comma separated values"; private static final String TSV = "Tab separated values"; private static final List<String> FORMATS = Arrays.asList(new String[]{TSV, CSV}); // Maximum lines to load to deduce the datatype private static final int MAXIMUM_LINES = 10000; private static final int MAXIMUM_UNIQUE_VALUES = 1500; private String selected = TSV; private List<String> primaryKeys = new ArrayList<String>(); private ORI sourceURI; // Data information private String headers[]; private Map<String, Set<String>> sampleData; private int nullEmpty = 0; private int nullDash = 0; private int nullString = 0; private int nullNA = 0; public CreateCollectionWizard(String id, IModel<? extends Resource> model) { super(id); sourceURI = model.getObject().getORI(); WizardModel wizardModel = new WizardModel(); wizardModel.add(new ChooseFormat()); wizardModel.add(new PrimaryKeys()); init(wizardModel); } private void readData() throws IOException { String separator = " "; if (selected.equals(CSV)) { separator = ","; } if (selected.equals(TSV)) { separator = "\t"; } Iterator<InputStream> streams = dataManager.load(sourceURI).iterator(); if (!streams.hasNext()) { return; } BufferedReader fr = new BufferedReader(new InputStreamReader(streams.next())); // Get headers headers = fr.readLine().split(separator); // Build values map sampleData = new HashMap<String, Set<String>>(); for (String header : headers) { sampleData.put(header, new HashSet<String>()); } String line = fr.readLine(); for (int i = 0; i < MAXIMUM_LINES && line != null; i++) { String values[] = line.split(separator); for (int h = 0; h < headers.length && h < values.length; h++) { if (values[h] == null || values[h].isEmpty()) { nullEmpty++; } else if (values[h].equalsIgnoreCase("-")) { nullDash++; } else if (values[h].equalsIgnoreCase("NULL")) { nullString++; } else if (values[h].equalsIgnoreCase("NA")) { nullNA++; } Set<String> columnValues = sampleData.get(headers[h]); if (columnValues.size() < MAXIMUM_UNIQUE_VALUES) { sampleData.get(headers[h]).add(values[h]); } } line = fr.readLine(); } fr.close(); } @Override public void onFinish() { super.onFinish(); // Create collection Collection collection = newCollection(); // Collect fields from other collections in the same folder Map<String, Field> otherFields = collectFields(); List<Field> fields = new ArrayList<Field>(); for (String header : headers) { String shortName, title; if (otherFields.containsKey(header)) { Field field = otherFields.get(header); shortName = field.getLabel(); title = field.getTitle(); } else { String lower = StringUtils.lowerCase(header); shortName = StringUtils.abbreviate(lower, 20); title = StringUtils.capitalize(lower); } Field field = new Field(header, shortName, title, deduceClass(sampleData.get(header))); if (header.toLowerCase().contains("pvalue") || header.toLowerCase().contains("qvalue")) { field.setProperties(Arrays.asList(new Property[]{ new Property("BROWSER_DECORATOR", "PVALUE2") })); } if (primaryKeys.contains(header)) { field.setPrimaryKey(Boolean.TRUE); } fields.add(field); } collection.setFields(fields); // Deduce links from other collections in the same folder Map<String, Link> otherLinks = collectLinks(); List<Link> links = new ArrayList<Link>(); List<Collection> allProjectCollections = new ArrayList<Collection>(); addAllCollections(allProjectCollections, resourceManager.getProject(sourceURI.getProjectUrl()).getORI()); for (String header : headers) { if (otherLinks.containsKey(header)) { Link otherLink = otherLinks.get(header); Link link = new Link(); link.setCollection(otherLink.getCollection()); link.getFields().add(otherLink.getFields().get(0)); links.add(link); } else { for (Collection col : allProjectCollections) { Field field = col.getField(header); if (field != null && (header.toLowerCase().endsWith("id") || header.toLowerCase().endsWith("key"))) { // Only link to collections without any link if (col.getLinks() == null || col.getLinks().isEmpty()) { Link link = new Link(); link.setCollection(new ORI((String) null, col.getORI().getPath())); link.getFields().add(header); links.add(link); } } } } } collection.setLinks(links); Loader loader = new Loader(); loader.setPlugin("tsv-loader"); List<Parameter> parameters = new ArrayList<Parameter>(); parameters.add(new Parameter("data", sourceURI.getPath())); if (nullEmpty > nullDash && nullEmpty > nullString && nullEmpty > nullNA) { parameters.add(new Parameter("NULL_VALUE", "")); } if (nullString > nullDash && nullString > nullEmpty && nullString > nullNA) { parameters.add(new Parameter("NULL_VALUE", "NULL")); } if (nullNA > nullDash && nullNA > nullString && nullNA > nullEmpty) { parameters.add(new Parameter("NULL_VALUE", "NA")); } loader.setParameters(parameters); collection.setLoader(loader); resourceManager.save(collection); PageParameters params = new PageParameters().add(ResourcesPage.PARAMETER_RESOURCE, collection.getORI()); setResponsePage(ResourcesPage.class, params); } private Map<String, Link> collectLinks() { Map<String, Link> links = new HashMap<String, Link>(); List<Collection> collections = new ArrayList<Collection>(); addAllCollections(collections, sourceURI.getParent()); for (Collection collection : collections) { if (collection.getLinks() != null) { for (Link link : collection.getLinks()) { // Only simple links (not composed) if (link.getFields().size() == 1) { String field = LinkUtils.getFromFieldName(link.getFields().get(0)); links.put(field, link); } } } } return links; } private Map<String, Field> collectFields() { Map<String, Field> fields = new HashMap<String, Field>(); List<Collection> collections = new ArrayList<Collection>(); addAllCollections(collections, sourceURI.getParent()); for (Collection collection : collections) { for (Field field : collection.getFields()) { fields.put(field.getId(), field); } } return fields; } private Collection newCollection() { String sourceName = sourceURI.getPath(); String collectionName; int punt = sourceName.lastIndexOf('.'); if (punt != -1) { collectionName = sourceName.substring(0, punt); } else { collectionName = sourceName + ".col"; } ORI collectionURI = new ORI(sourceURI.getProjectUrl(), collectionName); Collection collection = new Collection(); collection.setORI(collectionURI); collection.setTitle(collectionName); return collection; } private void addAllCollections(List<Collection> collections, ORI parentUri) { collections.addAll(resourceManager.loadChildren(Collection.class, parentUri)); List<Folder> folders = resourceManager.loadChildren(Folder.class, parentUri); for (Folder folder : folders) { addAllCollections(collections, folder.getORI()); } } public String getSelected() { return selected; } public void setSelected(String selected) { this.selected = selected; } private static Class<?> deduceClass(Set<String> values) { boolean longString = false; boolean integerType = true; boolean doubleType = false; for (String value : values) { // Skip null and empty values if (value == null || value.isEmpty() || value.equals("-")) { continue; } if (integerType) { try { Integer.valueOf(value); } catch (NumberFormatException e) { integerType = false; doubleType = true; } } if (doubleType) { try { Double.valueOf(value); } catch (NumberFormatException e) { doubleType = false; } } if (value.length() > 127) { longString = true; } } if (integerType) { return Integer.class; } if (doubleType) { return Double.class; } if (longString) { return Text.class; } else { return String.class; } } public List<String> getPrimaryKeys() { return primaryKeys; } public void setPrimaryKeys(List<String> primaryKeys) { this.primaryKeys = primaryKeys; } private final class ChooseFormat extends WizardStep { public ChooseFormat() { super("Create collection", "Choose one file format"); RadioChoice<String> commandOptions = new RadioChoice<String>("formats", new PropertyModel<String>(CreateCollectionWizard.this, "selected"), FORMATS); add(commandOptions); } @Override public void applyState() { try { readData(); } catch (IOException e) { error(e.getMessage()); } } } private final class PrimaryKeys extends WizardStep { public PrimaryKeys() { super("Create collection", "Select primary keys"); } @Override protected void onBeforeRender() { addOrReplace(new ListMultipleChoice<String>("primaryKeys", Arrays.asList(headers))); super.onBeforeRender(); } } }