1 package au.com.bytecode.opencsv;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 import java.io.IOException;
20 import java.util.ArrayList;
21 import java.util.List;
22
23
24
25
26
27
28
29
30
31 public class CSVParser {
32
33 private final char separator;
34
35 private final char quotechar;
36
37 private final char escape;
38
39 private final boolean strictQuotes;
40
41 private String pending;
42
43
44 public static final char DEFAULT_SEPARATOR = ',';
45
46 public static final int INITIAL_READ_SIZE = 128;
47
48
49
50
51
52 public static final char DEFAULT_QUOTE_CHARACTER = '"';
53
54
55
56
57
58
59 public static final char DEFAULT_ESCAPE_CHARACTER = '\\';
60
61
62
63
64
65 public static final boolean DEFAULT_STRICT_QUOTES = false;
66
67
68
69
70 public CSVParser() {
71 this(DEFAULT_SEPARATOR, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER);
72 }
73
74
75
76
77
78
79 public CSVParser(char separator) {
80 this(separator, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER);
81 }
82
83
84
85
86
87
88
89
90
91
92 public CSVParser(char separator, char quotechar) {
93 this(separator, quotechar, DEFAULT_ESCAPE_CHARACTER);
94 }
95
96
97
98
99
100
101
102
103
104
105 public CSVParser(char separator, char quotechar, char escape) {
106 this(separator, quotechar, escape, DEFAULT_STRICT_QUOTES);
107 }
108
109
110
111
112
113
114
115
116
117
118
119
120
121 public CSVParser(char separator, char quotechar, char escape, boolean strictQuotes) {
122 this.separator = separator;
123 this.quotechar = quotechar;
124 this.escape = escape;
125 this.strictQuotes = strictQuotes;
126 }
127
128
129
130
131
132 public boolean isPending() {
133 return pending != null;
134 }
135
136 public String[] parseLineMulti(String nextLine) throws IOException {
137 return parseLine(nextLine, true);
138 }
139
140 public String[] parseLine(String nextLine) throws IOException {
141 return parseLine(nextLine, false);
142 }
143
144
145
146
147
148
149
150
151
152 private String[] parseLine(String nextLine, boolean multi) throws IOException {
153
154 if (!multi && pending != null) {
155 pending = null;
156 }
157
158 if (nextLine == null) {
159 if (pending != null) {
160 String s = pending;
161 pending = null;
162 return new String[] {s};
163 } else {
164 return null;
165 }
166 }
167
168 List<String>tokensOnThisLine = new ArrayList<String>();
169 StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE);
170 boolean inQuotes = false;
171 if (pending != null) {
172 sb.append(pending);
173 pending = null;
174 inQuotes = true;
175 }
176 for (int i = 0; i < nextLine.length(); i++) {
177
178 char c = nextLine.charAt(i);
179 if (c == this.escape) {
180 if( isNextCharacterEscapable(nextLine, inQuotes, i) ){
181 sb.append(nextLine.charAt(i+1));
182 i++;
183 }
184 } else if (c == quotechar) {
185 if( isNextCharacterEscapedQuote(nextLine, inQuotes, i) ){
186 sb.append(nextLine.charAt(i+1));
187 i++;
188 }else{
189 inQuotes = !inQuotes;
190
191 if (!strictQuotes) {
192 if(i>2
193 && nextLine.charAt(i-1) != this.separator
194 && nextLine.length()>(i+1) &&
195 nextLine.charAt(i+1) != this.separator
196 ){
197 sb.append(c);
198 }
199 }
200 }
201 } else if (c == separator && !inQuotes) {
202 tokensOnThisLine.add(sb.toString());
203 sb = new StringBuilder(INITIAL_READ_SIZE);
204 } else {
205 if (!strictQuotes || inQuotes)
206 sb.append(c);
207 }
208 }
209
210 if (inQuotes) {
211 if (multi) {
212
213 sb.append("\n");
214 pending = sb.toString();
215 sb = null;
216 } else {
217 throw new IOException("Un-terminated quoted field at end of CSV line");
218 }
219 }
220 if (sb != null) {
221 tokensOnThisLine.add(sb.toString());
222 }
223 return tokensOnThisLine.toArray(new String[tokensOnThisLine.size()]);
224
225 }
226
227
228
229
230
231
232
233
234 private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) {
235 return inQuotes
236 && nextLine.length() > (i+1)
237 && nextLine.charAt(i+1) == quotechar;
238 }
239
240
241
242
243
244
245
246
247 protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) {
248 return inQuotes
249 && nextLine.length() > (i+1)
250 && ( nextLine.charAt(i+1) == quotechar || nextLine.charAt(i+1) == this.escape);
251 }
252 }