/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.tools.lint.checks; import com.android.annotations.NonNull; import com.android.tools.lint.detector.api.Category; import com.android.tools.lint.detector.api.Implementation; import com.android.tools.lint.detector.api.Issue; import com.android.tools.lint.detector.api.Location; import com.android.tools.lint.detector.api.ResourceXmlDetector; import com.android.tools.lint.detector.api.Scope; import com.android.tools.lint.detector.api.Severity; import com.android.tools.lint.detector.api.Speed; import com.android.tools.lint.detector.api.XmlContext; import org.w3c.dom.Document; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Checks that the encoding used in resource files is always UTF-8 * <p> * TODO: Add a check which looks at files which do not specify the encoding * and check the contents to see if it contains characters where it's ambiguous. */ public class Utf8Detector extends ResourceXmlDetector { /** Detects non-utf8 encodings */ public static final Issue ISSUE = Issue.create( "EnforceUTF8", //$NON-NLS-1$ "Encoding used in resource files is not UTF-8", "XML supports encoding in a wide variety of character sets. However, not all " + "tools handle the XML encoding attribute correctly, and nearly all Android " + "apps use UTF-8, so by using UTF-8 you can protect yourself against subtle " + "bugs when using non-ASCII characters.\n" + "\n" + "In particular, the Android Gradle build system will merge resource XML files " + "assuming the resource files are using UTF-8 encoding.\n", Category.I18N, 5, Severity.FATAL, new Implementation( Utf8Detector.class, Scope.RESOURCE_FILE_SCOPE)); /** See http://www.w3.org/TR/REC-xml/#NT-EncodingDecl */ private static final Pattern ENCODING_PATTERN = Pattern.compile("encoding=['\"](\\S*)['\"]");//$NON-NLS-1$ /** Constructs a new {@link Utf8Detector} */ public Utf8Detector() { } @NonNull @Override public Speed getSpeed() { return Speed.NORMAL; } @Override public void visitDocument(@NonNull XmlContext context, @NonNull Document document) { String xml = context.getContents(); if (xml == null) { return; } // AAPT: The prologue must be in the first line int lineEnd = 0; int max = xml.length(); for (; lineEnd < max; lineEnd++) { char c = xml.charAt(lineEnd); if (c == '\n' || c == '\r') { break; } } for (int i = 16; i < lineEnd - 5; i++) { // +4: Skip at least <?xml encoding=" if ((xml.charAt(i) == 'u' || xml.charAt(i) == 'U') && (xml.charAt(i + 1) == 't' || xml.charAt(i + 1) == 'T') && (xml.charAt(i + 2) == 'f' || xml.charAt(i + 2) == 'F') && (xml.charAt(i + 3) == '-' || xml.charAt(i + 3) == '_') && (xml.charAt(i + 4) == '8')) { return; } } int encodingIndex = xml.lastIndexOf("encoding", lineEnd); //$NON-NLS-1$ if (encodingIndex != -1) { Matcher matcher = ENCODING_PATTERN.matcher(xml); if (matcher.find(encodingIndex)) { String encoding = matcher.group(1); Location location = Location.create(context.file, xml, matcher.start(1), matcher.end(1)); context.report(ISSUE, null, location, String.format( "%1$s: Not using UTF-8 as the file encoding. This can lead to subtle " + "bugs with non-ascii characters", encoding)); } } } }