001/*
002 * Copyright 2007-2018 The jdeb developers.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.vafer.jdeb.utils;
018
019import java.io.FilterInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.HashMap;
023import java.util.Map;
024
025public final class InformationInputStream extends FilterInputStream {
026
027    private long i;
028    private long ascii;
029    private long nonascii;
030    private long cr;
031    private long lf;
032    private long zero;
033
034    private final Map<BOM, Integer> bomPositions = new HashMap<BOM, Integer>();
035    private final Map<Shell, Integer> shellPositions = new HashMap<Shell, Integer>();
036
037    /**
038     * Byte Order Marks
039     */
040    private enum BOM {
041        NONE(null),
042        UTF8("UTF-8", 0xEF, 0xBB, 0xBF),
043        UTF16LE("UTF-16LE", 0xFF, 0xFE),
044        UTF16BE("UTF-16BE", 0xFE, 0xFF);
045
046        int[] sequence;
047        String encoding;
048
049        private BOM( String encoding, int... sequence ) {
050            this.encoding = encoding;
051            this.sequence = sequence;
052        }
053    }
054
055    /**
056     * Shebang for shell scripts in various encodings.
057     */
058    private enum Shell {
059        NONE,
060        ASCII(0x23, 0x21),
061        UTF16BE(0x00, 0x23, 0x00, 0x21),
062        UTF16LE(0x23, 0x00, 0x21, 0x00);
063
064        int[] header;
065
066        private Shell( int... header ) {
067            this.header = header;
068        }
069    }
070
071    private BOM bom = BOM.NONE;
072    private Shell shell = Shell.NONE;
073
074    public InformationInputStream( InputStream in ) {
075        super(in);
076    }
077
078    public boolean hasBom() {
079        return bom != BOM.NONE;
080    }
081
082    public boolean isShell() {
083        return shell != Shell.NONE;
084    }
085
086    public boolean hasUnixLineEndings() {
087        return cr == 0;
088    }
089
090    public String getEncoding() {
091        String encoding = bom.encoding;
092
093        if (encoding == null) {
094            // guess the encoding from the shebang
095            if (shell == Shell.UTF16BE) {
096                encoding = BOM.UTF16BE.encoding;
097            } else if (shell == Shell.UTF16LE) {
098                encoding = BOM.UTF16LE.encoding;
099            }
100        }
101
102        return encoding;
103    }
104
105    private void add( int c ) {
106        if (i < 10) {
107            if (shell == Shell.NONE) {
108                for (Shell shell : Shell.values()) {
109                    int position = shellPositions.containsKey(shell) ? shellPositions.get(shell) : 0;
110                    if (position < shell.header.length) {
111                        if (c == shell.header[position]) {
112                            shellPositions.put(shell, position + 1);
113                        } else {
114                            shellPositions.put(shell, 0);
115                        }
116                    } else {
117                        this.shell = shell;
118                    }
119                }
120            }
121
122            if (bom == BOM.NONE) {
123                for (BOM bom : BOM.values()) {
124                    int position = bomPositions.containsKey(bom) ? bomPositions.get(bom) : 0;
125                    if (position < bom.sequence.length) {
126                        if (c == bom.sequence[position] && position == i) {
127                            bomPositions.put(bom, position + 1);
128                        } else {
129                            bomPositions.put(bom, 0);
130                        }
131                    } else {
132                        this.bom = bom;
133                    }
134                }
135            }
136        }
137
138        i++;
139
140        if (c == '\n') {
141            lf++;
142            return;
143        }
144        if (c == '\r') {
145            cr++;
146            return;
147        }
148        if (c >= ' ' && c <= '~') {
149            ascii++;
150            return;
151        }
152        if (c == 0) {
153            zero++;
154            return;
155        }
156        nonascii++;
157    }
158
159    public int read() throws IOException {
160        int b = super.read();
161        if (b != -1) {
162            add(b & 0xFF);
163        }
164        return b;
165    }
166
167    public int read( byte[] b, int off, int len ) throws IOException {
168        int length = super.read(b, off, len);
169        for (int i = 0; i < length; i++) {
170            add(b[off + i] & 0xFF);
171        }
172        return length;
173    }
174
175    public String toString() {
176        StringBuilder sb = new StringBuilder();
177        sb.append("{");
178        sb.append("total=").append(i);
179        sb.append(",noascii=").append(nonascii);
180        sb.append(",ascii=").append(ascii);
181        sb.append(",cr=").append(cr);
182        sb.append(",lf=").append(lf);
183        sb.append(",zero=").append(zero);
184        sb.append("}");
185        return sb.toString();
186    }
187}