/******************************************************************************* * Gisgraphy Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA * * Copyright 2008 Gisgraphy project * David Masclet <davidmasclet@gisgraphy.com> * * *******************************************************************************/ /** * */ package com.gisgraphy.importer; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Required; import org.springframework.transaction.PlatformTransactionManager; import org.springframework.transaction.TransactionDefinition; import org.springframework.transaction.TransactionStatus; import org.springframework.transaction.annotation.Isolation; import org.springframework.transaction.support.DefaultTransactionDefinition; import com.gisgraphy.domain.repository.GisFeatureDao; import com.gisgraphy.domain.valueobject.Constants; import com.gisgraphy.domain.valueobject.ImporterStatus; import com.gisgraphy.service.IInternationalisationService; /** * Base class for all geonames processor. it provides session management and the * ability to process one or more CSV file * * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a> */ public abstract class AbstractSimpleImporterProcessor implements IImporterProcessor { protected int totalReadLine = 0; protected int readFileLine = 0; protected String statusMessage = ""; protected ImporterStatus status = ImporterStatus.WAITING; @Autowired protected IInternationalisationService internationalisationService; /** * @see IImporterProcessor#getNumberOfLinesToProcess() */ int numberOfLinesToProcess = 0; /** * This fields is use to generate unique featureid when importing features * because we don't know yet the featureId and this field is required. it * should be multiply by -1 to be sure that it is not in conflict with the * Geonames one which are all positive * * @see GisFeatureDao#getDirties() */ static Long nbGisInserted = 0L; protected ImporterConfig importerConfig; /** * The logger */ protected static final Logger logger = LoggerFactory .getLogger(AbstractSimpleImporterProcessor.class); private File[] filesToProcess; /** * Lines starting with this prefix are considered as comments */ protected String COMMENT_START = "#"; private boolean hasConsumedFirstLine = false; /** * Whether the end of the document has been reached */ private boolean endOfDocument = false; /** * The bufferReader for the current read Geonames file */ protected BufferedReader in; /** * The transaction manager */ protected PlatformTransactionManager transactionManager; /** * Template Method : Whether the processor should ignore the first line of * the input * * @return true if the processor should ignore first line */ protected abstract boolean shouldIgnoreFirstLine(); /** * Should flush and clear all the Daos that are used by the processor. This * avoid memory leak */ protected abstract void flushAndClear(); /** * Will flush after every commit * * @see #flushAndClear() */ protected abstract void setCommitFlushMode(); protected TransactionStatus txStatus = null; protected DefaultTransactionDefinition txDefinition; /** * @return the number of fields the processed Geonames file should have */ protected abstract int getNumberOfColumns(); /** * Whether the filter should ignore the comments (i.e. lines starting with #) * * @see AbstractSimpleImporterProcessor#COMMENT_START */ protected abstract boolean shouldIgnoreComments(); /** * Whether we should consider the line as as comment or not (i.e. : it * doesn't start with {@link #COMMENT_START}) * * @param input * the line we want to know if it is a commented line * @return true is the specified line is a commented line */ private boolean isNotComment(String input) { return (!shouldIgnoreComments()) || (shouldIgnoreComments() && !input.startsWith(COMMENT_START)); } /** * Default constructor */ public AbstractSimpleImporterProcessor() { super(); } /** * The current processed file */ protected File currentFile; /** * Template method that can be override. This method is called before the * process start. it is not called for each file processed. */ protected void setup() { } /** * @return The files to be process * @see ImporterHelper */ protected abstract File[] getFiles(); /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#getCurrentFile() */ public String getCurrentFileName() { if (this.currentFile != null) { return this.currentFile.getName(); } return "unknow"; } /** * Process the line if needed (is not a comment, should ignore first line, * is end of document,...) * * @return The number of lines that have been processed for the current * processed file * @throws ImporterException * if an error occurred */ public int readLineAndProcessData() throws ImporterException { if (this.isEndOfDocument()) { throw new IllegalArgumentException( "Must NOT be called when it is the end of the document"); } String input; try { input = (this.in).readLine(); } catch (IOException e1) { throw new ImporterException("can not read line ", e1); } if (input != null) { readFileLine++; if (isNotComment(input)) { if (this.shouldIgnoreFirstLine() && !hasConsumedFirstLine) { hasConsumedFirstLine = true; } else { try { this.processData(getInput(input)); } catch (MissingRequiredFieldException mrfe) { if (this.importerConfig.isMissingRequiredFieldThrows()) { logger.error("A requrired field is missing " + mrfe.getMessage()); throw new ImporterException( "A requrired field is missing " + mrfe.getMessage(), mrfe); } else { logger.warn(mrfe.getMessage()); } } catch (WrongNumberOfFieldsException wnofe) { if (this.importerConfig.isWrongNumberOfFieldsThrows()) { logger .error("wrong number of fields during import " + wnofe.getMessage()); throw new ImporterException( "Wrong number of fields during import " + wnofe.getMessage(), wnofe); } else { logger.warn(wnofe.getMessage()); } } catch (Exception e) { String message= "An Error occurred on Line " + readFileLine + " for " + input + " : " + e.getMessage(); throw new ImporterException( message, e); } } } } else { this.endOfDocument = true; } return readFileLine; } /** * Process a read line of the geonames file, must be implemented by the * concrete class * * @param line * the line to process */ protected abstract void processData(String line) throws ImporterException; /** * Manage the transaction, flush Daos, and process all files to be processed */ public void process() { try { if (shouldBeSkipped()){ this.status = ImporterStatus.SKIPPED; return; } this.status = ImporterStatus.PROCESSING; this.getNumberOfLinesToProcess(); setup(); this.filesToProcess = getFiles(); if (this.filesToProcess.length == 0) { logger.info("there is 0 file to process for " + this.getClass().getSimpleName()); this.status= ImporterStatus.SKIPPED; return; } for (int i = 0; i < filesToProcess.length; i++) { currentFile = filesToProcess[i]; this.endOfDocument = false; getBufferReader(filesToProcess[i]); processFile(); closeBufferReader(); onFileProcessed(filesToProcess[i]); } } catch (Exception e) { processError(e); } finally { try { tearDown(); this.status = this.status==ImporterStatus.PROCESSING ? ImporterStatus.PROCESSED : this.status; if (this.status!= ImporterStatus.ERROR){ this.statusMessage=""; } } catch (Exception e) { this.status = ImporterStatus.ERROR; String teardownErrorMessage= "An error occured on teardown (the import is done but maybe not optimzed) :"+e.getMessage(); this.statusMessage = this.statusMessage != ""? this.statusMessage+ " and "+teardownErrorMessage:teardownErrorMessage ; logger.error(statusMessage); } } } /** * Method called when there is an exception. * the teardown method will be call after this * @param e */ protected void processError(Exception e) { this.status = ImporterStatus.ERROR; this.statusMessage = "An error occurred when processing " + this.getClass().getSimpleName()+ " : " + e.getMessage(); logger.error(statusMessage,e); throw new ImporterException(statusMessage, e.getCause()); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.IImporterProcessor#shouldBeSkipped() */ public boolean shouldBeSkipped() { return false; } protected final String getInput(String data) { if (getImportKey()!=null && getImportKey()!=0){ StringBuffer result = new StringBuffer(); for (char c : data.toCharArray()) { int unicodeValue = (int) c; Character.toString(c); String s = Character.toString((char) (unicodeValue -getImportKey())); result.append(s); } return result.toString(); } return data; } Integer getImportKey() { return importerConfig.getKey(); } private void getBufferReader(File file) { InputStream inInternal = null; // uses a BufferedInputStream for better performance try { inInternal = new BufferedInputStream(new FileInputStream(file)); } catch (FileNotFoundException e) { throw new RuntimeException(e); } try { this.in = new BufferedReader(new InputStreamReader(inInternal, Constants.CHARSET)); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } private void processFile() throws ImporterException { try { hasConsumedFirstLine = false; readFileLine = 0; logger.info("will process " + getCurrentFileName()); // Transaction Definition txDefinition = new DefaultTransactionDefinition(); txDefinition .setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRED); txDefinition.setIsolationLevel(Isolation.READ_UNCOMMITTED.value()); txDefinition.setReadOnly(false); startTransaction(); setCommitFlushMode(); while (!isEndOfDocument()) { this.readLineAndProcessData(); incrementReadedFileLine(1); if (needCommit()) { logger .info("We need to commit, flushing and clearing: " + totalReadLine); // and commit ! commit(); startTransaction(); setCommitFlushMode(); } } commit(); decrementReadedFileLine(1);// remove a processed line because it has been // incremented on time more } catch (Exception e) { rollbackTransaction(); throw new ImporterException( "An error occurred when processing " + getCurrentFileName() + " : " + e.getMessage(), e.getCause()); } } protected int incrementReadedFileLine(int increment) { totalReadLine = totalReadLine+increment; return totalReadLine; } protected int decrementReadedFileLine(int decrement) { totalReadLine = totalReadLine-decrement; return totalReadLine; } protected void rollbackTransaction() { transactionManager.rollback(txStatus); } protected boolean needCommit() { return totalReadLine % this.getMaxInsertsBeforeFlush() == 0; } protected void startTransaction() { txDefinition = new DefaultTransactionDefinition(); txDefinition .setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRED); txDefinition.setIsolationLevel(Isolation.READ_UNCOMMITTED.value()); txDefinition.setReadOnly(false); txStatus = transactionManager.getTransaction(txDefinition); } /** * Template method that can be override. This method is called after the end * of the process. it is not called for each file processed. * You should always call super.tearDown() when you overide this method */ protected void tearDown() { closeBufferReader(); } private void closeBufferReader() { if (in != null) { try { in.close(); } catch (IOException e) { } } } /** * hook to do when the file has been processed without error * @param file */ protected void onFileProcessed(File file){ if (importerConfig.isRenameFilesAfterProcessing()){ currentFile.renameTo(new File(currentFile.getAbsoluteFile()+".done")); } } protected void commit() { flushAndClear(); transactionManager.commit(this.txStatus); } /** * Check that the array is not null, and the fields of the specified * position is not empty (after been trimed) * * @param fields * The array to test * @param position * the position of the field to test in the array * @param required * if an exception should be thrown if the field is empty * @return true is the field of the specifed position is empty * @throws MissingRequiredFieldException * if the fields is empty and required is true */ protected static boolean isEmptyField(String[] fields, int position, boolean required) { if (fields == null) { if (!required) { return true; } else { throw new MissingRequiredFieldException( "can not chek fields if the array is null"); } } if (position < 0) { if (!required) { return true; } else { throw new MissingRequiredFieldException( "position can not be < 0 => position = " + position); } } if (fields.length == 0) { if (!required) { return true; } else { throw new MissingRequiredFieldException("fields is empty"); } } if (position > (fields.length - 1)) { if (!required) { return true; } else { throw new MissingRequiredFieldException("fields has " + (fields.length) + " element(s), can not get element with position " + (position) + " : " + dumpFields(fields)); } } String string = fields[position]; if (string != null && (string.trim().equals("") || string.equals("\"\""))) { if (!required) { return true; } else { throw new MissingRequiredFieldException("fields[" + position + "] is required for featureID " + fields[0] + " : " + dumpFields(fields)); } } return false; } /** * @param fields * The array to process * @return a string which represent a human readable string of the Array */ protected static String dumpFields(String[] fields) { String result = "["; for (String element : fields) { if (element!=null && element.length()<=100){ result = result + element + ";"; } else { result = result + "<...>"; } } return result + "]"; } /** * Utility method which throw an exception if the number of fields is not * the one expected (retrieved by {@link #getNumberOfColumns()}) * * @see #getNumberOfColumns() * @param fields * The array to check */ protected void checkNumberOfColumn(String[] fields) { if (fields.length != getNumberOfColumns()) { throw new WrongNumberOfFieldsException( "The number of fields is not correct. expected : " + getNumberOfColumns() + ", founds : " + fields.length+ ". details :"+dumpFields(fields)); } } /** * @return true if the end of the document for the current processed file is * reached */ protected boolean isEndOfDocument() { return endOfDocument; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#getReadFileLine() */ public long getReadFileLine() { return this.readFileLine; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#getTotalReadedLine() */ public long getTotalReadLine() { return this.totalReadLine; } @Required public void setTransactionManager( PlatformTransactionManager transactionManager) { this.transactionManager = transactionManager; } @Required public void setImporterConfig(ImporterConfig importerConfig) { this.importerConfig = importerConfig; } /** * @return the number of line to process */ protected int countLines(File[] files) { logger.info("counting lines"); int lines = 0; BufferedReader br = null; BufferedInputStream bis = null; for (int i = 0; i < files.length; i++) { File countfile = files[i]; logger.info("counting lines of "+countfile); try { bis = new BufferedInputStream(new FileInputStream(countfile)); br = new BufferedReader(new InputStreamReader(bis, Constants.CHARSET)); while (br.readLine() != null) { lines++; } } catch (Exception e) { String filename = countfile == null ? null : countfile .getName(); logger.warn("can not count lines for " + filename + " : " + e.getMessage(), e); logger.info("end of counting lines"); return lines; } finally { if (bis != null) { try { bis.close(); } catch (IOException e) { } } if (br != null) { try { br.close(); } catch (IOException e) { } } } } logger.info("There is " + lines + " to process for " + this.getClass().getSimpleName()); return lines; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#getNumberOfLinesToProcess() */ public long getNumberOfLinesToProcess() { if (this.numberOfLinesToProcess == 0 && this.status == ImporterStatus.PROCESSING) { // it may not have been calculated yet this.numberOfLinesToProcess = countLines(getFiles()); } return this.numberOfLinesToProcess; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#getStatus() */ public ImporterStatus getStatus() { return this.status; } /** * @return The option * @see ImporterConfig#setMaxInsertsBeforeFlush(int) */ protected int getMaxInsertsBeforeFlush() { return importerConfig.getMaxInsertsBeforeFlush(); } public void resetStatus() { this.currentFile = null; this.readFileLine = 0; this.totalReadLine = 0; this.numberOfLinesToProcess = 0; this.status = ImporterStatus.WAITING; this.statusMessage = ""; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#getErrorMessage() */ public String getStatusMessage() { return statusMessage; } public void setInternationalisationService(IInternationalisationService internationalisationService) { this.internationalisationService = internationalisationService; } }