/******************************************************************************* * Copyright (c) 2012-2017 Codenvy, S.A. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Codenvy, S.A. - initial API and implementation *******************************************************************************/ package org.eclipse.che.ide.api.editor.filetype; import org.eclipse.che.ide.api.resources.VirtualFile; import org.eclipse.che.ide.util.loging.Log; import com.google.gwt.regexp.shared.MatchResult; import com.google.gwt.regexp.shared.RegExp; import java.util.Collections; import java.util.List; /** * {@link FileTypeIdentifier} that tries to recognize file by looking at the first line content. * * @author "Mickaƫl Leduque" */ public class FirstLineFileTypeIdentifier implements FileTypeIdentifier { // format: <?xml /** Pattern to recognize xml that has an xml declaration. */ private static final RegExp XML_PATTERN = RegExp.compile("^<\\?xml"); // format: #!<loader> [options]\n /** Pattern to try to recognize scripts with a shebang. */ private static final RegExp SHEBANG_PATTERN = RegExp.compile("^#!([^\\n\\s]+)\\s*([^\\n\\s]+)?.*\\n"); @Override public List<String> identifyType(final VirtualFile file) { // TODO: file's content retrieved asynchronously final String content = ""/*file.getContent()*/; if (isXml(content)) { Log.debug(FirstLineFileTypeIdentifier.class, "Identified file as XML."); return Collections.singletonList("application/xml"); } final String shebangLoader = getShebang(content); if (shebangLoader != null) { final int lastSlash = shebangLoader.lastIndexOf('/'); // need to consider \ as path separator for a feature that exists mostly on unixes ? final String basename = shebangLoader.substring(lastSlash + 1); final String shebangResult = matchShebang(basename); if (shebangResult != null) { return Collections.singletonList(shebangResult); } } return null; } private boolean isXml(final String content) { return XML_PATTERN.test(content); } private String getShebang(final String content) { final MatchResult matchResult = SHEBANG_PATTERN.exec(content); if (matchResult == null || matchResult.getGroup(1) == null || matchResult.getGroup(1).isEmpty()) { return null; } Log.debug(FirstLineFileTypeIdentifier.class, "File may be a script with a shebang."); String loader = matchResult.getGroup(1); // special case for /usr/bin/env if ("/usr/bin/env".equals(loader)) { Log.debug(FirstLineFileTypeIdentifier.class, "Shebang points to /usr/bin/env. Looking at the parameter."); // we must use the first option as hint if (matchResult.getGroup(2) == null || matchResult.getGroup(2).isEmpty()) { return null; } loader = matchResult.getGroup(2); Log.debug(FirstLineFileTypeIdentifier.class, "Shebang parameter kept: " + loader); } else { Log.debug(FirstLineFileTypeIdentifier.class, "Shebang loader kept: " + loader); } return loader; } private String matchShebang(final String shebangLoader) { // the shells that are related to sh - not csh ! if ("sh".equals(shebangLoader) || "bash".equals(shebangLoader) || "dash".equals(shebangLoader) || "ksh".equals(shebangLoader) || "zsh".equals(shebangLoader)) { Log.debug(FirstLineFileTypeIdentifier.class, "File may be a bourne shell script or similar."); return "text/x-sh"; } // python if (shebangLoader.startsWith("python")) { Log.debug(FirstLineFileTypeIdentifier.class, "File may be a python script."); return "text/x-python"; } // perl if (shebangLoader.startsWith("perl")) { Log.debug(FirstLineFileTypeIdentifier.class, "File may be a perl script."); return "text/x-perl"; } // ruby if (shebangLoader.startsWith("ruby")) { Log.debug(FirstLineFileTypeIdentifier.class, "File may be a ruby script."); return "text/x-ruby"; } // are there any other script interpreters commonly used as shebang ? return null; } }