/* * JBoss, Home of Professional Open Source. * See the COPYRIGHT.txt file distributed with this work for information * regarding copyright ownership. Some portions may be licensed * to Red Hat, Inc. under one or more contributor license agreements. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301 USA. */ package org.teiid.query.processor.relational; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.teiid.api.exception.query.ExpressionEvaluationException; import org.teiid.client.plan.PlanNode; import org.teiid.common.buffer.BlockedException; import org.teiid.common.buffer.BufferManager; import org.teiid.common.buffer.TupleBatch; import org.teiid.core.TeiidComponentException; import org.teiid.core.TeiidProcessingException; import org.teiid.core.TeiidRuntimeException; import org.teiid.core.types.ClobImpl; import org.teiid.core.types.ClobType; import org.teiid.core.types.DataTypeManager; import org.teiid.core.types.TransformationException; import org.teiid.dqp.internal.process.RequestWorkItem; import org.teiid.query.QueryPlugin; import org.teiid.query.analysis.AnalysisRecord; import org.teiid.query.processor.ProcessorDataManager; import org.teiid.query.sql.LanguageObject; import org.teiid.query.sql.lang.TextTable; import org.teiid.query.sql.lang.TextTable.TextColumn; import org.teiid.query.sql.symbol.Expression; import org.teiid.query.util.CommandContext; /** * Handles text file processing. * * TODO: allow for a configurable line terminator */ public class TextTableNode extends SubqueryAwareRelationalNode { private TextTable table; //initialized state private int skip = 0; private int header = -1; private boolean noQuote; private char quote; private char delimiter; private int lineWidth; private int[] projectionIndexes; private Map<String, List<String>> parentLines; //per file state private BufferedReader reader; private int textLine = 0; private Map<String, Integer> nameIndexes; private String systemId; private long rowNumber; private boolean cr; private boolean eof; private volatile boolean running; private volatile TeiidRuntimeException asynchException; private int limit = -1; private boolean noTrim; private char newLine = '\n'; private boolean crNewLine = true; public TextTableNode(int nodeID) { super(nodeID); } @Override public void initialize(CommandContext context, BufferManager bufferManager, ProcessorDataManager dataMgr) { super.initialize(context, bufferManager, dataMgr); if (projectionIndexes != null) { return; } if (table.getSkip() != null) { skip = table.getSkip(); } if (table.getHeader() != null) { skip = Math.max(table.getHeader(), skip); header = table.getHeader() - 1; } if (table.isFixedWidth()) { for (TextColumn col : table.getColumns()) { lineWidth += col.getWidth(); } } else { if (table.getDelimiter() == null) { delimiter = ','; } else { delimiter = table.getDelimiter(); } if (table.getQuote() == null) { quote = '"'; } else { noQuote = table.isEscape(); quote = table.getQuote(); } for (TextColumn column : table.getColumns()) { if (column.getSelector() != null) { if (parentLines == null) { parentLines = new HashMap<String, List<String>>(); } parentLines.put(column.getSelector(), null); } } lineWidth = table.getColumns().size() * DataTypeManager.MAX_STRING_LENGTH; } if (table.isUsingRowDelimiter()) { Character c = table.getRowDelimiter(); if (c != null) { this.newLine = c; this.crNewLine = false; } } Map<Expression, Integer> elementMap = createLookupMap(table.getProjectedSymbols()); this.projectionIndexes = getProjectionIndexes(elementMap, getElements()); } @Override public void closeDirect() { super.closeDirect(); reset(); } @Override public void reset() { super.reset(); if (this.reader != null) { try { this.reader.close(); } catch (IOException e) { } this.reader = null; } this.nameIndexes = null; this.textLine = 0; this.rowNumber = 0; this.cr = false; this.eof = false; if (this.parentLines != null) { for (Map.Entry<String, List<String>> entry : this.parentLines.entrySet()) { entry.setValue(null); } } this.running = false; this.asynchException = null; this.limit = -1; } public void setTable(TextTable table) { this.table = table; this.noTrim = table.isNoTrim(); } @Override public TextTableNode clone() { TextTableNode clone = new TextTableNode(getID()); this.copyTo(clone); clone.setTable(table); return clone; } @Override public void open() throws TeiidComponentException, TeiidProcessingException { super.open(); if (getParent() instanceof LimitNode) { LimitNode parent = (LimitNode)getParent(); if (parent.getLimit() > 0) { limit = parent.getLimit() + parent.getOffset(); } } } @Override protected synchronized TupleBatch nextBatchDirect() throws BlockedException, TeiidComponentException, TeiidProcessingException { if (reader == null) { initReader(); } if (reader == null) { terminateBatches(); return pullBatch(); } if (isLastBatch()) { return pullBatch(); } if (isBatchFull()) { TupleBatch result = pullBatch(); processAsynch(); // read ahead return result; } unwrapException(asynchException); processAsynch(); if (this.getContext().getWorkItem() == null) { //this is for compatibility with engine tests that are below the level of using the work item synchronized (this) { while (running) { try { this.wait(); } catch (InterruptedException e) { throw new TeiidRuntimeException(e); } } } } throw BlockedException.block("Blocking on results from file processing."); //$NON-NLS-1$ } private void processAsynch() { if (!running) { running = true; getContext().getExecutor().execute(new Runnable() { @Override public void run() { try { process(); } catch (TeiidRuntimeException e) { asynchException = e; } catch (Throwable e) { asynchException = new TeiidRuntimeException(e); } finally { running = false; RequestWorkItem workItem = TextTableNode.this.getContext().getWorkItem(); if (workItem != null) { workItem.moreWork(); } else { synchronized (TextTableNode.this) { TextTableNode.this.notifyAll(); } } } } }); } } private void process() throws TeiidProcessingException { while (true) { synchronized (this) { if (isBatchFull()) { return; } StringBuilder line = readLine(lineWidth, table.isFixedWidth()); if (line == null) { terminateBatches(); break; } String parentSelector = null; if (table.getSelector() != null) { if (line.length() < table.getSelector().length()) { continue; } if (!line.substring(0, table.getSelector().length()).equals(table.getSelector())) { if (parentLines == null) { continue; //doesn't match any selector } parentSelector = line.substring(0, table.getSelector().length()); if (!parentLines.containsKey(parentSelector)) { continue; //doesn't match any selector } } } List<String> vals = parseLine(line); if (parentSelector != null) { this.parentLines.put(parentSelector, vals); continue; } else if (table.getSelector() != null && !table.getSelector().equals(vals.get(0))) { continue; } rowNumber++; List<Object> tuple = new ArrayList<Object>(projectionIndexes.length); for (int output : projectionIndexes) { TextColumn col = table.getColumns().get(output); String val = null; int index = output; if (col.isOrdinal()) { if (rowNumber > Integer.MAX_VALUE) { throw new TeiidRuntimeException(new TeiidProcessingException(QueryPlugin.Event.TEIID31174, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID31174))); } tuple.add((int)rowNumber); continue; } if (col.getSelector() != null) { vals = this.parentLines.get(col.getSelector()); index = col.getPosition() - 1; } else if (nameIndexes != null) { index = nameIndexes.get(col.getName()); } if (vals == null || index >= vals.size()) { //throw new TeiidProcessingException(QueryPlugin.Util.getString("TextTableNode.no_value", col.getName(), textLine, systemId)); //$NON-NLS-1$ tuple.add(null); continue; } val = vals.get(index); try { tuple.add(DataTypeManager.transformValue(val, table.getColumns().get(output).getSymbol().getType())); } catch (TransformationException e) { throw new TeiidProcessingException(QueryPlugin.Event.TEIID30176, e, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30176, col.getName(), textLine, systemId)); } } addBatchRow(tuple); if (rowNumber == limit) { terminateBatches(); break; } } } } private StringBuilder readLine(int maxLength, boolean exact) throws TeiidProcessingException { if (eof) { return null; } StringBuilder sb = new StringBuilder(exact ? maxLength : (maxLength >> 4)); while (true) { char c = readChar(); if (c == newLine) { if (sb.length() == 0) { if (eof) { return null; } if (table.isUsingRowDelimiter()) { continue; //skip empty lines } } if (table.isUsingRowDelimiter()) { return sb; } } sb.append(c); if (exact && sb.length() == maxLength && !table.isUsingRowDelimiter()) { return sb; } if (sb.length() > maxLength) { if (exact) { sb.deleteCharAt(sb.length() - 1); //we're not forcing them to fully specify the line, so just drop the rest //TODO: there should be a max read length while (readChar() != newLine) { } return sb; } //protects non-fixed width processing from run-away values //TODO it is possible that string values could be desired that are longer than the max and/or returned as clobs throw new TeiidProcessingException(QueryPlugin.Event.TEIID30178, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30178, textLine+1, systemId, maxLength)); } } } private char readChar() throws TeiidProcessingException { try { int c = reader.read(); if (cr) { if (c == newLine) { c = reader.read(); } cr = false; } switch (c) { case '\r': if (crNewLine) { cr = true; textLine++; return newLine; } break; case -1: eof = true; textLine++; return newLine; } if (c == newLine) { textLine++; return newLine; } return (char)c; } catch (IOException e) { throw new TeiidProcessingException(QueryPlugin.Event.TEIID30179, e, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30179, systemId)); } } private void initReader() throws ExpressionEvaluationException, BlockedException, TeiidComponentException, TeiidProcessingException { setReferenceValues(this.table); ClobType file = (ClobType)getEvaluator(Collections.emptyMap()).evaluate(table.getFile(), null); if (file == null) { return; } //get the reader try { this.systemId = "Unknown"; //$NON-NLS-1$ if (file.getReference() instanceof ClobImpl) { this.systemId = ((ClobImpl)file.getReference()).getStreamFactory().getSystemId(); if (this.systemId == null) { this.systemId = "Unknown"; //$NON-NLS-1$ } } Reader r = file.getCharacterStream(); if (!(r instanceof BufferedReader)) { reader = new BufferedReader(r); } else { reader = (BufferedReader)r; } } catch (SQLException e) { throw new TeiidProcessingException(QueryPlugin.Event.TEIID30180, e); } //process the skip field if (skip <= 0) { return; } while (textLine < skip) { boolean isHeader = textLine == header; if (isHeader) { StringBuilder line = readLine(DataTypeManager.MAX_STRING_LENGTH * 16, false); if (line == null) { //just return an empty batch reset(); return; } processHeader(parseLine(line)); } else { while (readChar() != newLine) { } } } } private void processHeader(List<String> line) throws TeiidProcessingException { nameIndexes = new HashMap<String, Integer>(); this.lineWidth = DataTypeManager.MAX_STRING_LENGTH * line.size(); for (String string : line) { if (string == null) { continue; } nameIndexes.put(string.toUpperCase(), nameIndexes.size()); } for (TextColumn col : table.getColumns()) { if (col.isOrdinal()) { continue; } String name = col.getName().toUpperCase(); if (col.getHeader() != null) { name = col.getHeader().toUpperCase(); } Integer index = nameIndexes.get(name); if (index == null) { throw new TeiidProcessingException(QueryPlugin.Event.TEIID30181, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30181, col.getName(), systemId)); } nameIndexes.put(col.getName(), index); } } private List<String> parseLine(StringBuilder line) throws TeiidProcessingException { if (table.isFixedWidth()) { return parseFixedWidth(line); } return parseDelimitedLine(line); } private List<String> parseDelimitedLine(StringBuilder line) throws TeiidProcessingException { ArrayList<String> result = new ArrayList<String>(); StringBuilder builder = new StringBuilder(); boolean escaped = false; boolean wasQualified = false; boolean qualified = false; while (true) { if (line == null) { if (escaped) { //allow for escaped new lines if (cr) { builder.append('\r'); } builder.append(newLine); escaped = false; line = readLine(lineWidth, false); continue; } if (!qualified) { //close the last entry addValue(result, wasQualified || noTrim, builder.toString()); return result; } line = readLine(lineWidth, false); if (line == null) { throw new TeiidProcessingException(QueryPlugin.Event.TEIID30182, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30182, systemId)); } } for (int i = 0; i < line.length(); i++) { char chr = line.charAt(i); if (chr == delimiter) { if (escaped || qualified) { builder.append(chr); escaped = false; } else { addValue(result, wasQualified || noTrim, builder.toString()); wasQualified = false; builder = new StringBuilder(); //next entry } } else if (chr == quote) { if (noQuote) { //it's the escape char if (escaped) { builder.append(quote); } escaped = !escaped; } else { if (qualified) { qualified = false; } else { if (wasQualified) { qualified = true; builder.append(chr); } else { if (builder.toString().trim().length() != 0) { throw new TeiidProcessingException(QueryPlugin.Event.TEIID30183, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30183, textLine, systemId)); } qualified = true; builder = new StringBuilder(); //start the entry over wasQualified = true; } } } } else { if (escaped) { //don't understand other escape sequences yet throw new TeiidProcessingException(QueryPlugin.Event.TEIID30184, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30184, chr, textLine, systemId)); } if (wasQualified && !qualified) { if (!Character.isWhitespace(chr)) { throw new TeiidProcessingException(QueryPlugin.Event.TEIID30183, QueryPlugin.Util.gs(QueryPlugin.Event.TEIID30183, textLine, systemId)); } //else just ignore } else { builder.append(chr); } } } line = null; } } private void addValue(ArrayList<String> result, boolean wasQualified, String val) { if (!wasQualified) { val = val.trim(); if (val.length() == 0) { val = null; } } result.add(val); } private List<String> parseFixedWidth(StringBuilder line) { ArrayList<String> result = new ArrayList<String>(); int beginIndex = 0; for (TextColumn col : table.getColumns()) { if (beginIndex >= line.length()) { result.add(null); } else { String val = new String(line.substring(beginIndex, Math.min(line.length(), beginIndex + col.getWidth()))); addValue(result, col.isNoTrim(), val); beginIndex += col.getWidth(); } } return result; } @Override public Collection<? extends LanguageObject> getObjects() { return Arrays.asList(this.table.getFile()); } @Override public PlanNode getDescriptionProperties() { PlanNode props = super.getDescriptionProperties(); AnalysisRecord.addLanaguageObjects(props, AnalysisRecord.PROP_TABLE_FUNCTION, Arrays.asList(this.table)); return props; } }