001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.csv.writer;
020    
021    import java.io.BufferedReader;
022    import java.io.InputStream;
023    import java.io.InputStreamReader;
024    
025    /**
026     * Tries to guess a config based on an InputStream.
027     *
028     * @author Martin van den Bemt
029     * @version $Id: $
030     */
031    public class CSVConfigGuesser {
032    
033        /**
034         * The stream to read
035         */
036        private InputStream in;
037        /**
038         * if the file has a field header (need this info, to be able to guess better)
039         * Defaults to false
040         */
041        private boolean hasFieldHeader = false;
042        /**
043         * The found config
044         */
045        protected CSVConfig config;
046    
047        /**
048         *
049         */
050        public CSVConfigGuesser() {
051            this.config = new CSVConfig();
052        }
053    
054        /**
055         * @param in the inputstream to guess from
056         */
057        public CSVConfigGuesser(InputStream in) {
058            this();
059            setInputStream(in);
060        }
061    
062        public void setInputStream(InputStream in) {
063            this.in = in;
064        }
065    
066        /**
067         * Allow override.
068         *
069         * @return the inputstream that was set.
070         */
071        protected InputStream getInputStream() {
072            return in;
073        }
074    
075        /**
076         * Guess the config based on the first 10 (or less when less available)
077         * records of a CSV file.
078         *
079         * @return the guessed config.
080         */
081        public CSVConfig guess() {
082            try {
083                // tralalal
084                BufferedReader bIn = new BufferedReader(new InputStreamReader((getInputStream())));
085                String[] lines = new String[10];
086                String line = null;
087                int counter = 0;
088                while ((line = bIn.readLine()) != null && counter <= 10) {
089                    lines[counter] = line;
090                    counter++;
091                }
092                if (counter < 10) {
093                    // remove nulls from the array, so we can skip the null checking.
094                    String[] newLines = new String[counter];
095                    System.arraycopy(lines, 0, newLines, 0, counter);
096                    lines = newLines;
097                }
098                analyseLines(lines);
099            } catch (Exception e) {
100                e.printStackTrace();
101            } finally {
102                if (in != null) {
103                    try {
104                        in.close();
105                    } catch (Exception e) {
106                        // ignore exception.
107                    }
108                }
109            }
110            CSVConfig conf = config;
111            // cleanup the config.
112            config = null;
113            return conf;
114        }
115    
116        protected void analyseLines(String[] lines) {
117            guessFixedWidth(lines);
118            guessFieldSeperator(lines);
119        }
120    
121        /**
122         * Guess if this file is fixedwidth.
123         * Just basing the fact on all lines being of the same length
124         *
125         * @param lines
126         */
127        protected void guessFixedWidth(String[] lines) {
128            int lastLength = 0;
129            // assume fixedlength.
130            config.setFixedWidth(true);
131            for (int i = 0; i < lines.length; i++) {
132                if (i == 0) {
133                    lastLength = lines[i].length();
134                } else {
135                    if (lastLength != lines[i].length()) {
136                        config.setFixedWidth(false);
137                    }
138                }
139            }
140        }
141    
142    
143        protected void guessFieldSeperator(String[] lines) {
144            if (config.isFixedWidth()) {
145                guessFixedWidthSeperator(lines);
146                return;
147            }
148            for (int i = 0; i < lines.length; i++) {
149            }
150        }
151    
152        protected void guessFixedWidthSeperator(String[] lines) {
153            // keep track of the fieldlength
154            int previousMatch = -1;
155            for (int i = 0; i < lines[0].length(); i++) {
156                char last = ' ';
157                boolean charMatches = true;
158                for (int j = 0; j < lines.length; j++) {
159                    if (j == 0) {
160                        last = lines[j].charAt(i);
161                    }
162                    if (last != lines[j].charAt(i)) {
163                        charMatches = false;
164                        break;
165                    }
166                }
167                if (charMatches) {
168                    if (previousMatch == -1) {
169                        previousMatch = 0;
170                    }
171                    CSVField field = new CSVField();
172                    field.setName("field" + config.getFields().length + 1);
173                    field.setSize((i - previousMatch));
174                    config.addField(field);
175                }
176            }
177        }
178    
179        /**
180         * @return if the field uses a field header. Defaults to false.
181         */
182        public boolean hasFieldHeader() {
183            return hasFieldHeader;
184        }
185    
186        /**
187         * Specify if the CSV file has a field header
188         *
189         * @param hasFieldHeader true or false
190         */
191        public void setHasFieldHeader(boolean hasFieldHeader) {
192            this.hasFieldHeader = hasFieldHeader;
193        }
194    
195    
196    }