001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.csv;
018    
019    import java.io.IOException;
020    import java.io.OutputStream;
021    import java.io.PrintWriter;
022    import java.io.Writer;
023    
024    /**
025     * Print values as a comma separated list.
026     */
027    public class CSVPrinter {
028    
029        /**
030         * The place that the values get written.
031         */
032        protected final Writer out;
033        protected final CSVStrategy strategy;
034    
035        /**
036         * True if we just began a new line.
037         */
038        protected boolean newLine = true;
039    
040        protected char[] buf = new char[0];  // temporary buffer
041    
042        /**
043         * Create a printer that will print values to the given
044         * stream following the CSVStrategy.
045         * <p/>
046         * Currently, only a pure encapsulation strategy or a pure escaping strategy
047         * is supported.  Hybrid strategies (encapsulation and escaping with a different character) are not supported.
048         *
049         * @param out      stream to which to print.
050         * @param strategy describes the CSV variation.
051         */
052        public CSVPrinter(Writer out, CSVStrategy strategy) {
053            this.out = out;
054            this.strategy = strategy == null ? CSVStrategy.DEFAULT_STRATEGY : strategy;
055        }
056    
057        // ======================================================
058        //  printing implementation
059        // ======================================================
060    
061        /**
062         * Output a blank line
063         */
064        public void println() throws IOException {
065            out.write(strategy.getPrinterNewline());
066            newLine = true;
067        }
068    
069        public void flush() throws IOException {
070            out.flush();
071        }
072    
073    
074        /**
075         * Print a single line of comma separated values.
076         * The values will be quoted if needed.  Quotes and
077         * newLine characters will be escaped.
078         *
079         * @param values values to be outputted.
080         */
081        public void println(String[] values) throws IOException {
082            for (int i = 0; i < values.length; i++) {
083                print(values[i]);
084            }
085            println();
086        }
087    
088    
089        /**
090         * Put a comment among the comma separated values.
091         * Comments will always begin on a new line and occupy a
092         * least one full line. The character specified to star
093         * comments and a space will be inserted at the beginning of
094         * each new line in the comment.
095         *
096         * @param comment the comment to output
097         */
098        public void printlnComment(String comment) throws IOException {
099            if (this.strategy.isCommentingDisabled()) {
100                return;
101            }
102            if (!newLine) {
103                println();
104            }
105            out.write(this.strategy.getCommentStart());
106            out.write(' ');
107            for (int i = 0; i < comment.length(); i++) {
108                char c = comment.charAt(i);
109                switch (c) {
110                    case '\r':
111                        if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
112                            i++;
113                        }
114                        // break intentionally excluded.
115                    case '\n':
116                        println();
117                        out.write(this.strategy.getCommentStart());
118                        out.write(' ');
119                        break;
120                    default:
121                        out.write(c);
122                        break;
123                }
124            }
125            println();
126        }
127    
128    
129        public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
130            if (!checkForEscape) {
131                printSep();
132                out.write(value, offset, len);
133                return;
134            }
135    
136            if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) {
137                printAndEncapsulate(value, offset, len);
138            } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) {
139                printAndEscape(value, offset, len);
140            } else {
141                printSep();
142                out.write(value, offset, len);
143            }
144        }
145    
146        void printSep() throws IOException {
147            if (newLine) {
148                newLine = false;
149            } else {
150                out.write(this.strategy.getDelimiter());
151            }
152        }
153    
154        void printAndEscape(char[] value, int offset, int len) throws IOException {
155            int start = offset;
156            int pos = offset;
157            int end = offset + len;
158    
159            printSep();
160    
161            char delim = this.strategy.getDelimiter();
162            char escape = this.strategy.getEscape();
163    
164            while (pos < end) {
165                char c = value[pos];
166                if (c == '\r' || c == '\n' || c == delim || c == escape) {
167                    // write out segment up until this char
168                    int l = pos - start;
169                    if (l > 0) {
170                        out.write(value, start, l);
171                    }
172                    if (c == '\n') {
173                        c = 'n';
174                    } else if (c == '\r') {
175                        c = 'r';
176                    }
177    
178                    out.write(escape);
179                    out.write(c);
180    
181                    start = pos + 1; // start on the current char after this one
182                }
183    
184                pos++;
185            }
186    
187            // write last segment
188            int l = pos - start;
189            if (l > 0) {
190                out.write(value, start, l);
191            }
192        }
193    
194        void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
195            boolean first = newLine;  // is this the first value on this line?
196            boolean quote = false;
197            int start = offset;
198            int pos = offset;
199            int end = offset + len;
200    
201            printSep();
202    
203            char delim = this.strategy.getDelimiter();
204            char encapsulator = this.strategy.getEncapsulator();
205    
206            if (len <= 0) {
207                // always quote an empty token that is the first
208                // on the line, as it may be the only thing on the
209                // line. If it were not quoted in that case,
210                // an empty line has no tokens.
211                if (first) {
212                    quote = true;
213                }
214            } else {
215                char c = value[pos];
216    
217                // Hmmm, where did this rule come from?
218                if (first
219                        && (c < '0'
220                        || (c > '9' && c < 'A')
221                        || (c > 'Z' && c < 'a')
222                        || (c > 'z'))) {
223                    quote = true;
224                    // } else if (c == ' ' || c == '\f' || c == '\t') {
225                } else if (c <= '#') {
226                    // Some other chars at the start of a value caused the parser to fail, so for now
227                    // encapsulate if we start in anything less than '#'.  We are being conservative
228                    // by including the default comment char too.
229                    quote = true;
230                } else {
231                    while (pos < end) {
232                        c = value[pos];
233                        if (c == '\n' || c == '\r' || c == encapsulator || c == delim) {
234                            quote = true;
235                            break;
236                        }
237                        pos++;
238                    }
239    
240                    if (!quote) {
241                        pos = end - 1;
242                        c = value[pos];
243                        // if (c == ' ' || c == '\f' || c == '\t') {
244                        // Some other chars at the end caused the parser to fail, so for now
245                        // encapsulate if we end in anything less than ' '
246                        if (c <= ' ') {
247                            quote = true;
248                        }
249                    }
250                }
251            }
252    
253            if (!quote) {
254                // no encapsulation needed - write out the original value
255                out.write(value, offset, len);
256                return;
257            }
258    
259            // we hit something that needed encapsulation
260            out.write(encapsulator);
261    
262            // Pick up where we left off: pos should be positioned on the first character that caused
263            // the need for encapsulation.
264            while (pos < end) {
265                char c = value[pos];
266                if (c == encapsulator) {
267                    // write out the chunk up until this point
268    
269                    // add 1 to the length to write out the encapsulator also
270                    out.write(value, start, pos - start + 1);
271                    // put the next starting position on the encapsulator so we will
272                    // write it out again with the next string (effectively doubling it)
273                    start = pos;
274                }
275                pos++;
276            }
277    
278            // write the last segment
279            out.write(value, start, pos - start);
280            out.write(encapsulator);
281        }
282    
283        /**
284         * Print the string as the next value on the line. The value
285         * will be escaped or encapsulated as needed if checkForEscape==true
286         *
287         * @param value value to be outputted.
288         */
289        public void print(String value, boolean checkForEscape) throws IOException {
290            if (!checkForEscape) {
291                // write directly from string
292                printSep();
293                out.write(value);
294                return;
295            }
296    
297            if (buf.length < value.length()) {
298                buf = new char[value.length()];
299            }
300    
301            value.getChars(0, value.length(), buf, 0);
302            print(buf, 0, value.length(), checkForEscape);
303        }
304    
305        /**
306         * Print the string as the next value on the line. The value
307         * will be escaped or encapsulated as needed.
308         *
309         * @param value value to be outputted.
310         */
311        public void print(String value) throws IOException {
312            print(value, true);
313        }
314    }