/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package riotcmd;
import java.io.InputStream ;
import org.apache.jena.atlas.AtlasException ;
import org.apache.jena.atlas.io.IO ;
import org.apache.jena.atlas.io.InStreamUTF8 ;
import org.apache.jena.atlas.io.InputStreamBuffered ;
public class utf8
{
/** Simple program to help hunt down bad UTF-8 encoded characters */
public static void main(String[] args) {
long INIT_LINE = 1 ;
long INIT_COL = 1 ;
if ( args.length == 0 )
args = new String[]{"-"} ;
String label = "" ;
for ( String fn : args ) {
if ( args.length > 1 )
label = fn + ": " ;
InputStream in = IO.openFile(fn) ;
in = new InputStreamBuffered(in) ;
long charCount = 0 ;
long lineNum = INIT_LINE ;
long colNum = INIT_COL ;
InStreamUTF8 utf8 = null ;
try {
utf8 = new InStreamUTF8(in) ;
for ( ; ; ) {
int ch = utf8.read() ;
if ( ch == -1 )
break ;
charCount++ ;
if ( ch == '\n' ) {
lineNum++ ;
colNum = INIT_COL ;
} else
colNum++ ;
if ( !Character.isDefined(ch) )
throw new AtlasException(String.format("No such codepoint: 0x%04X", ch)) ;
}
System.out.printf("%s: chars = %d , lines = %d\n", fn, charCount, lineNum) ;
}
catch (AtlasException ex) {
System.out.printf(label + "[line=%d, col=%d] %s\n", lineNum, colNum, ex.getMessage()) ;
}
finally {
IO.close(utf8) ;
}
}
}
}