package org.fastcatsearch.job.indexing; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.List; import org.fastcatsearch.cluster.Node; import org.fastcatsearch.cluster.NodeService; import org.fastcatsearch.common.io.Streamable; import org.fastcatsearch.datasource.reader.AbstractDataSourceReader; import org.fastcatsearch.datasource.reader.DefaultDataSourceReaderFactory; import org.fastcatsearch.exception.FastcatSearchException; import org.fastcatsearch.ir.IRService; import org.fastcatsearch.ir.config.CollectionContext; import org.fastcatsearch.ir.config.DataSourceConfig; import org.fastcatsearch.ir.document.Document; import org.fastcatsearch.ir.io.DataInput; import org.fastcatsearch.ir.io.DataOutput; import org.fastcatsearch.ir.settings.FieldSetting; import org.fastcatsearch.ir.settings.SchemaSetting; import org.fastcatsearch.ir.util.Formatter; import org.fastcatsearch.job.Job; import org.fastcatsearch.service.ServiceManager; public class CollectionDatasourceDumpJob extends Job implements Streamable { private static final long serialVersionUID = 7991088210024664812L; @Override public void readFrom(DataInput input) throws IOException { args = input.readString(); } @Override public void writeTo(DataOutput output) throws IOException { output.writeString((String) args); } @Override public JobResult doRun() throws FastcatSearchException { String collectionId = getStringArgs(); IRService irService = ServiceManager.getInstance().getService(IRService.class); CollectionContext collectionContext = irService.collectionContext(collectionId); if(collectionContext == null) { throw new FastcatSearchException("Collection [" + collectionId + "] is not exist."); } String indexNodeId = collectionContext.collectionConfig().getIndexNode(); NodeService nodeService = ServiceManager.getInstance().getService(NodeService.class); Node indexNode = nodeService.getNodeById(indexNodeId); if (!nodeService.isMyNode(indexNode)) { throw new RuntimeException("Invalid index node collection[" + collectionId + "] node[" + indexNodeId + "]"); } DataSourceConfig dataSourceConfig = collectionContext.dataSourceConfig(); File filePath = collectionContext.collectionFilePaths().file(); SchemaSetting schemaSetting = collectionContext.schema().schemaSetting(); SchemaSetting workSchemaSetting = collectionContext.workSchemaSetting(); if (workSchemaSetting != null) { List<FieldSetting> list = workSchemaSetting.getFieldSettingList(); if (list != null && list.size() > 0) { schemaSetting = workSchemaSetting; } } Writer writer = null; AbstractDataSourceReader dataSourceReader = null; try { long startTime = System.currentTimeMillis(); long lapTime = startTime; writer = new OutputStreamWriter(new FileOutputStream(new File(filePath, "datasource."+System.currentTimeMillis()+".txt"))); dataSourceReader = DefaultDataSourceReaderFactory.createFullIndexingSourceReader(collectionContext.collectionId(), filePath, schemaSetting, dataSourceConfig); int count = 0; while (dataSourceReader.hasNext()) { Document document = dataSourceReader.nextDocument(); count++; if (count % 1000 == 0) { logger.info( "{} documents indexed, lap = {} ms, elapsed = {}, mem = {}", new Object[] { count, System.currentTimeMillis() - lapTime, Formatter.getFormatTime(System.currentTimeMillis() - startTime), Formatter.getFormatSize(Runtime.getRuntime().totalMemory()) }); lapTime = System.currentTimeMillis(); } writer.write(document.toString()); writer.write("\n"); } } catch (Exception e) { logger.error("", e); return new JobResult(false); } finally { if(writer != null){ try { writer.close(); } catch (IOException ignore) { } } } return new JobResult(true); } }