package org.basex.build; import static org.basex.core.Text.*; import java.io.*; import java.util.*; import java.util.regex.*; import java.util.zip.*; import org.basex.core.*; import org.basex.core.MainOptions.MainParser; import org.basex.core.cmd.*; import org.basex.io.*; import org.basex.io.in.*; import org.basex.util.*; import org.basex.util.list.*; /** * This class recursively scans files and directories and parses all * relevant files. * * @author BaseX Team 2005-17, BSD License * @author Christian Gruen */ public final class DirParser extends Parser { /** Number of skipped files to log. */ private static final int SKIPLOG = 10; /** Skipped files. */ private final StringList skipped = new StringList(); /** File pattern. */ private final Pattern filter; /** Root directory. */ private final String dir; /** Original path. */ private final String original; /** Parse archives in directories. */ private final boolean archives; /** Skip corrupt files in directories. */ private final boolean skipCorrupt; /** Add ignored files as raw files. */ private final boolean addRaw; /** DTD parsing. */ private final boolean dtd; /** Raw parsing. */ private final boolean rawParser; /** Archive name. */ private final boolean archiveName; /** Database path for storing binary files. */ private IOFile rawPath; /** Last source. */ private IO lastSrc; /** Parser reference. */ private Parser parser; /** Element counter. */ private int c; /** * Constructor. * @param source source path * @param options main options */ public DirParser(final IO source, final MainOptions options) { super(source, options); final boolean isDir = source.isDir(); if(isDir) { dir = source.path().replaceAll("/$", "") + '/'; original = dir; } else { dir = source.dir(); original = source.path(); } skipCorrupt = options.get(MainOptions.SKIPCORRUPT); archives = options.get(MainOptions.ADDARCHIVES); archiveName = options.get(MainOptions.ARCHIVENAME); addRaw = options.get(MainOptions.ADDRAW); dtd = options.get(MainOptions.DTD); rawParser = options.get(MainOptions.PARSER) == MainParser.RAW; filter = !isDir && !source.isArchive() ? null : Pattern.compile(IOFile.regex(options.get(MainOptions.CREATEFILTER))); } /** * Constructor. * @param source source path * @param options main options * @param dbpath future database path (required for binary resources) */ public DirParser(final IO source, final MainOptions options, final IOFile dbpath) { this(source, options); if(dbpath != null && (addRaw || rawParser)) rawPath = new IOFile(dbpath, IO.RAW); } @Override public void parse(final Builder build) throws IOException { build.meta.filesize = 0; build.meta.original = original; parse(build, source); } /** * Parses the specified file or its children. * @param builder builder * @param input current input * @throws IOException I/O exception */ private void parse(final Builder builder, final IO input) throws IOException { if(input instanceof IOFile && input.isDir()) { for(final IO f : ((IOFile) input).children()) parse(builder, f); } else if(archives && input.isArchive()) { final String name = input.name().toLowerCase(Locale.ENGLISH); InputStream in = input.inputStream(); if(name.endsWith(IO.TARSUFFIX) || name.endsWith(IO.TGZSUFFIX) || name.endsWith(IO.TARGZSUFFIX)) { // process TAR files if(!name.endsWith(IO.TARSUFFIX)) in = new GZIPInputStream(in); try(TarInputStream is = new TarInputStream(in)) { for(TarEntry ze; (ze = is.getNextEntry()) != null;) { if(ze.isDirectory()) continue; source = newStream(is, ze.getName(), input); source.length(ze.getSize()); parseResource(builder); } } } else if(name.endsWith(IO.GZSUFFIX)) { // process GZIP archive try(GZIPInputStream is = new GZIPInputStream(in)) { source = newStream(is, input.name().replaceAll("\\..*", IO.XMLSUFFIX), input); parseResource(builder); } } else { // process ZIP archive try(ZipInputStream is = new ZipInputStream(in)) { for(ZipEntry ze; (ze = is.getNextEntry()) != null;) { if(ze.isDirectory()) continue; source = newStream(is, ze.getName(), input); source.length(ze.getSize()); parseResource(builder); } } catch(final IllegalArgumentException ex) { // GH-1351: catch invalid archive encodings throw new IOException(ex); } } } else { // process regular file source = input; parseResource(builder); } } /** * Creates a new stream. * @param is input stream * @param path path inside archive * @param input input * @return stream */ private IOStream newStream(final InputStream is, final String path, final IO input) { return new IOStream(is, archiveName ? (input.path() + '/' + path) : path); } /** * Parses the current source. * @param builder builder instance * @throws IOException I/O exception */ private void parseResource(final Builder builder) throws IOException { builder.checkStop(); // add file size for database meta information final long l = source.length(); if(l != -1) builder.meta.filesize += l; // use global target as path prefix final String name = source.name(); String targ = target; // add relative path without root (prefix) and file name (suffix) String path = source.path(); if(path.endsWith('/' + name)) { path = path.substring(0, path.length() - name.length()); if(path.startsWith(dir)) path = path.substring(dir.length()); targ = (targ + path).replace("//", "/"); } // check if file passes the name filter pattern boolean exclude = false; if(filter != null) { final String nm = Prop.CASE ? name : name.toLowerCase(Locale.ENGLISH); exclude = !filter.matcher(nm).matches(); } if(exclude) { // exclude file: check if will be added as raw file if(addRaw && rawPath != null) { Store.store(source.inputSource(), new IOFile(rawPath, targ + name)); } } else { if(rawParser) { // store input in raw format if database path is known if(rawPath != null) { Store.store(source.inputSource(), new IOFile(rawPath, targ + name)); } } else { // store input as XML boolean ok = true; IO in = source; if(skipCorrupt) { // parse file twice to ensure that it is well-formed try { // cache file contents to allow or speed up a second run if(!(source instanceof IOContent || dtd)) { in = new IOContent(source.read()); in.name(name); } parser = Parser.singleParser(in, options, targ); MemBuilder.build("", parser); } catch(final IOException ex) { Util.debug(ex); skipped.add(source.path()); ok = false; } } // parse file if(ok) { parser = Parser.singleParser(in, options, targ); parser.parse(builder); } parser = null; // dump debug data if(Prop.debug && (++c & 0x3FF) == 0) Util.err(";"); } } } @Override public String info() { final TokenBuilder tb = new TokenBuilder(); if(!skipped.isEmpty()) { tb.add(SKIPPED).add(COL).add(NL); final int s = skipped.size(); for(int i = 0; i < s && i < SKIPLOG; i++) { tb.add(LI).add(skipped.get(i)).add(NL); } if(s > SKIPLOG) { tb.add(LI).addExt(MORE_SKIPPED_X, s - SKIPLOG).add(NL); } } return tb.toString(); } @Override public String detailedInfo() { return parser != null ? parser.detailedInfo() : source.path(); } @Override public double progressInfo() { if(parser != null) return parser.progressInfo(); if(lastSrc == source) return 1; lastSrc = source; return Math.random(); } @Override public void close() throws IOException { if(parser != null) parser.close(); } }