001/* 002 * Copyright 2007-2018 The jdeb developers. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.vafer.jdeb.utils; 018 019import java.io.FilterInputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.HashMap; 023import java.util.Map; 024 025public final class InformationInputStream extends FilterInputStream { 026 027 private long i; 028 private long ascii; 029 private long nonascii; 030 private long cr; 031 private long lf; 032 private long zero; 033 034 private final Map<BOM, Integer> bomPositions = new HashMap<BOM, Integer>(); 035 private final Map<Shell, Integer> shellPositions = new HashMap<Shell, Integer>(); 036 037 /** 038 * Byte Order Marks 039 */ 040 private enum BOM { 041 NONE(null), 042 UTF8("UTF-8", 0xEF, 0xBB, 0xBF), 043 UTF16LE("UTF-16LE", 0xFF, 0xFE), 044 UTF16BE("UTF-16BE", 0xFE, 0xFF); 045 046 int[] sequence; 047 String encoding; 048 049 private BOM( String encoding, int... sequence ) { 050 this.encoding = encoding; 051 this.sequence = sequence; 052 } 053 } 054 055 /** 056 * Shebang for shell scripts in various encodings. 057 */ 058 private enum Shell { 059 NONE, 060 ASCII(0x23, 0x21), 061 UTF16BE(0x00, 0x23, 0x00, 0x21), 062 UTF16LE(0x23, 0x00, 0x21, 0x00); 063 064 int[] header; 065 066 private Shell( int... header ) { 067 this.header = header; 068 } 069 } 070 071 private BOM bom = BOM.NONE; 072 private Shell shell = Shell.NONE; 073 074 public InformationInputStream( InputStream in ) { 075 super(in); 076 } 077 078 public boolean hasBom() { 079 return bom != BOM.NONE; 080 } 081 082 public boolean isShell() { 083 return shell != Shell.NONE; 084 } 085 086 public boolean hasUnixLineEndings() { 087 return cr == 0; 088 } 089 090 public String getEncoding() { 091 String encoding = bom.encoding; 092 093 if (encoding == null) { 094 // guess the encoding from the shebang 095 if (shell == Shell.UTF16BE) { 096 encoding = BOM.UTF16BE.encoding; 097 } else if (shell == Shell.UTF16LE) { 098 encoding = BOM.UTF16LE.encoding; 099 } 100 } 101 102 return encoding; 103 } 104 105 private void add( int c ) { 106 if (i < 10) { 107 if (shell == Shell.NONE) { 108 for (Shell shell : Shell.values()) { 109 int position = shellPositions.containsKey(shell) ? shellPositions.get(shell) : 0; 110 if (position < shell.header.length) { 111 if (c == shell.header[position]) { 112 shellPositions.put(shell, position + 1); 113 } else { 114 shellPositions.put(shell, 0); 115 } 116 } else { 117 this.shell = shell; 118 } 119 } 120 } 121 122 if (bom == BOM.NONE) { 123 for (BOM bom : BOM.values()) { 124 int position = bomPositions.containsKey(bom) ? bomPositions.get(bom) : 0; 125 if (position < bom.sequence.length) { 126 if (c == bom.sequence[position] && position == i) { 127 bomPositions.put(bom, position + 1); 128 } else { 129 bomPositions.put(bom, 0); 130 } 131 } else { 132 this.bom = bom; 133 } 134 } 135 } 136 } 137 138 i++; 139 140 if (c == '\n') { 141 lf++; 142 return; 143 } 144 if (c == '\r') { 145 cr++; 146 return; 147 } 148 if (c >= ' ' && c <= '~') { 149 ascii++; 150 return; 151 } 152 if (c == 0) { 153 zero++; 154 return; 155 } 156 nonascii++; 157 } 158 159 public int read() throws IOException { 160 int b = super.read(); 161 if (b != -1) { 162 add(b & 0xFF); 163 } 164 return b; 165 } 166 167 public int read( byte[] b, int off, int len ) throws IOException { 168 int length = super.read(b, off, len); 169 for (int i = 0; i < length; i++) { 170 add(b[off + i] & 0xFF); 171 } 172 return length; 173 } 174 175 public String toString() { 176 StringBuilder sb = new StringBuilder(); 177 sb.append("{"); 178 sb.append("total=").append(i); 179 sb.append(",noascii=").append(nonascii); 180 sb.append(",ascii=").append(ascii); 181 sb.append(",cr=").append(cr); 182 sb.append(",lf=").append(lf); 183 sb.append(",zero=").append(zero); 184 sb.append("}"); 185 return sb.toString(); 186 } 187}