View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */ 
19  package org.apache.rat.report.xml.writer.impl.base;
20  
21  import java.io.IOException;
22  import java.io.Writer;
23  import java.util.Arrays;
24  import java.util.HashSet;
25  import java.util.Set;
26  
27  import org.apache.commons.collections.ArrayStack;
28  
29  import org.apache.rat.report.xml.writer.IXmlWriter;
30  import org.apache.rat.report.xml.writer.InvalidXmlException;
31  import org.apache.rat.report.xml.writer.OperationNotAllowedException;
32  
33  /**
34   * <p>Lightweight {@link IXmlWriter} implementation.</p>
35   * <p>
36   * Requires a wrapper to be used safely in a multithreaded
37   * environment.</p>
38   * <p>
39   * Not intended to be subclassed. Please copy and hack!</p>
40   */
41  public final class XmlWriter implements IXmlWriter {
42  
43      private static final byte NAME_START_MASK = 1 << 1;
44      private static final byte NAME_MASK = 1 << 2;
45      private static final byte NAME_BODY_CHAR = NAME_MASK;
46      private static final byte NAME_START_OR_BODY_CHAR = NAME_MASK | NAME_START_MASK;
47      
48      private final static boolean[] ALLOWED_CHARACTERS = new boolean[1 << 16]; 
49      
50      static {
51          Arrays.fill(ALLOWED_CHARACTERS, false);
52          ALLOWED_CHARACTERS[0x9] = true;
53          ALLOWED_CHARACTERS[0xA] = true;
54          ALLOWED_CHARACTERS[0xD] = true;
55          Arrays.fill(ALLOWED_CHARACTERS, 0x20, 0xD7FF, true);
56          Arrays.fill(ALLOWED_CHARACTERS, 0xE000, 0xFFFD, true);
57      }
58      
59      private final static byte[] CHARACTER_CODES = new byte[1 << 16]; 
60                                
61      static {
62          // Name ::= (Letter | '_' | ':') (NameChar)*
63          CHARACTER_CODES['_'] = NAME_START_OR_BODY_CHAR;
64          CHARACTER_CODES[':'] = NAME_START_OR_BODY_CHAR;
65          // Letter ::= BaseChar | Ideographic
66          // BaseChar
67          Arrays.fill(CHARACTER_CODES, 0x0041, 0x005A, NAME_START_OR_BODY_CHAR);
68          Arrays.fill(CHARACTER_CODES, 0x0061, 0x007A, NAME_START_OR_BODY_CHAR);
69          Arrays.fill(CHARACTER_CODES, 0x00C0, 0x00D6, NAME_START_OR_BODY_CHAR);
70          Arrays.fill(CHARACTER_CODES, 0x00D8, 0x00F6, NAME_START_OR_BODY_CHAR);
71          Arrays.fill(CHARACTER_CODES, 0x00F8, 0x00FF, NAME_START_OR_BODY_CHAR);
72          Arrays.fill(CHARACTER_CODES, 0x0100, 0x0131, NAME_START_OR_BODY_CHAR);
73          Arrays.fill(CHARACTER_CODES, 0x0134, 0x013E, NAME_START_OR_BODY_CHAR);
74          Arrays.fill(CHARACTER_CODES, 0x0141, 0x0148, NAME_START_OR_BODY_CHAR);
75          Arrays.fill(CHARACTER_CODES, 0x014A, 0x017E, NAME_START_OR_BODY_CHAR);
76          Arrays.fill(CHARACTER_CODES, 0x0180, 0x01C3, NAME_START_OR_BODY_CHAR);
77          Arrays.fill(CHARACTER_CODES, 0x01CD, 0x01F0, NAME_START_OR_BODY_CHAR);
78          Arrays.fill(CHARACTER_CODES, 0x01F4, 0x01F5, NAME_START_OR_BODY_CHAR);
79          Arrays.fill(CHARACTER_CODES, 0x01FA, 0x0217, NAME_START_OR_BODY_CHAR);
80          Arrays.fill(CHARACTER_CODES, 0x0250, 0x02A8, NAME_START_OR_BODY_CHAR);
81          Arrays.fill(CHARACTER_CODES, 0x02BB, 0x02C1, NAME_START_OR_BODY_CHAR);
82          CHARACTER_CODES[0x0386] = NAME_START_OR_BODY_CHAR;
83          Arrays.fill(CHARACTER_CODES, 0x0388, 0x038A, NAME_START_OR_BODY_CHAR);
84          CHARACTER_CODES[0x038C] = NAME_START_OR_BODY_CHAR;
85          Arrays.fill(CHARACTER_CODES, 0x038E, 0x03A1, NAME_START_OR_BODY_CHAR);
86          Arrays.fill(CHARACTER_CODES, 0x03A3, 0x03CE, NAME_START_OR_BODY_CHAR);
87          Arrays.fill(CHARACTER_CODES, 0x03D0, 0x03D6, NAME_START_OR_BODY_CHAR);
88          CHARACTER_CODES[0x03DA] = NAME_START_OR_BODY_CHAR;
89          CHARACTER_CODES[0x03DC] = NAME_START_OR_BODY_CHAR;
90          CHARACTER_CODES[0x03DE] = NAME_START_OR_BODY_CHAR;
91          CHARACTER_CODES[0x03E0] = NAME_START_OR_BODY_CHAR;
92          Arrays.fill(CHARACTER_CODES, 0x03E2, 0x03F3, NAME_START_OR_BODY_CHAR);
93          Arrays.fill(CHARACTER_CODES, 0x0401, 0x040C, NAME_START_OR_BODY_CHAR);
94          Arrays.fill(CHARACTER_CODES, 0x040E, 0x044F, NAME_START_OR_BODY_CHAR);
95          Arrays.fill(CHARACTER_CODES, 0x0451, 0x045C, NAME_START_OR_BODY_CHAR);
96          Arrays.fill(CHARACTER_CODES, 0x045E, 0x0481, NAME_START_OR_BODY_CHAR);
97          Arrays.fill(CHARACTER_CODES, 0x0490, 0x04C4, NAME_START_OR_BODY_CHAR);
98          Arrays.fill(CHARACTER_CODES, 0x04C7, 0x04C8, NAME_START_OR_BODY_CHAR);
99          Arrays.fill(CHARACTER_CODES, 0x04CB, 0x04CC, NAME_START_OR_BODY_CHAR);
100         Arrays.fill(CHARACTER_CODES, 0x04D0, 0x04EB, NAME_START_OR_BODY_CHAR);
101         Arrays.fill(CHARACTER_CODES, 0x04EE, 0x04F5, NAME_START_OR_BODY_CHAR);
102         Arrays.fill(CHARACTER_CODES, 0x04F8, 0x04F9, NAME_START_OR_BODY_CHAR);
103         Arrays.fill(CHARACTER_CODES, 0x0531, 0x0556, NAME_START_OR_BODY_CHAR);
104         CHARACTER_CODES[0x0559] = NAME_START_OR_BODY_CHAR;
105         Arrays.fill(CHARACTER_CODES, 0x0561, 0x0586, NAME_START_OR_BODY_CHAR);
106         Arrays.fill(CHARACTER_CODES, 0x05D0, 0x05EA, NAME_START_OR_BODY_CHAR);
107         Arrays.fill(CHARACTER_CODES, 0x05F0, 0x05F2, NAME_START_OR_BODY_CHAR);
108         Arrays.fill(CHARACTER_CODES, 0x0621, 0x063A, NAME_START_OR_BODY_CHAR);
109         Arrays.fill(CHARACTER_CODES, 0x0641, 0x064A, NAME_START_OR_BODY_CHAR);
110         Arrays.fill(CHARACTER_CODES, 0x0671, 0x06B7, NAME_START_OR_BODY_CHAR);
111         Arrays.fill(CHARACTER_CODES, 0x06BA, 0x06BE, NAME_START_OR_BODY_CHAR);
112         Arrays.fill(CHARACTER_CODES, 0x06C0, 0x06CE, NAME_START_OR_BODY_CHAR);
113         Arrays.fill(CHARACTER_CODES, 0x06D0, 0x06D3, NAME_START_OR_BODY_CHAR);
114         CHARACTER_CODES[0x06D5] = NAME_START_OR_BODY_CHAR;
115         Arrays.fill(CHARACTER_CODES, 0x06E5, 0x06E6, NAME_START_OR_BODY_CHAR);
116         Arrays.fill(CHARACTER_CODES, 0x0905, 0x0939, NAME_START_OR_BODY_CHAR);
117         CHARACTER_CODES[0x093D] = NAME_START_OR_BODY_CHAR;
118         Arrays.fill(CHARACTER_CODES, 0x0958, 0x0961, NAME_START_OR_BODY_CHAR);
119         Arrays.fill(CHARACTER_CODES, 0x0985, 0x098C, NAME_START_OR_BODY_CHAR);
120         Arrays.fill(CHARACTER_CODES, 0x098F, 0x0990, NAME_START_OR_BODY_CHAR);
121         Arrays.fill(CHARACTER_CODES, 0x0993, 0x09A8, NAME_START_OR_BODY_CHAR);
122         Arrays.fill(CHARACTER_CODES, 0x09AA, 0x09B0, NAME_START_OR_BODY_CHAR);
123         CHARACTER_CODES[0x09B2] = NAME_START_OR_BODY_CHAR;
124         Arrays.fill(CHARACTER_CODES, 0x09B6, 0x09B9, NAME_START_OR_BODY_CHAR);
125         Arrays.fill(CHARACTER_CODES, 0x09DC, 0x09DD, NAME_START_OR_BODY_CHAR);
126         Arrays.fill(CHARACTER_CODES, 0x09DF, 0x09E1, NAME_START_OR_BODY_CHAR);
127         Arrays.fill(CHARACTER_CODES, 0x09F0, 0x09F1, NAME_START_OR_BODY_CHAR);
128         Arrays.fill(CHARACTER_CODES, 0x0A05, 0x0A0A, NAME_START_OR_BODY_CHAR);
129         Arrays.fill(CHARACTER_CODES, 0x0A0F, 0x0A10, NAME_START_OR_BODY_CHAR);
130         Arrays.fill(CHARACTER_CODES, 0x0A13, 0x0A28, NAME_START_OR_BODY_CHAR);
131         Arrays.fill(CHARACTER_CODES, 0x0A2A, 0x0A30, NAME_START_OR_BODY_CHAR);
132         Arrays.fill(CHARACTER_CODES, 0x0A32, 0x0A33, NAME_START_OR_BODY_CHAR);
133         Arrays.fill(CHARACTER_CODES, 0x0A35, 0x0A36, NAME_START_OR_BODY_CHAR);
134         Arrays.fill(CHARACTER_CODES, 0x0A38, 0x0A39, NAME_START_OR_BODY_CHAR);
135         Arrays.fill(CHARACTER_CODES, 0x0A59, 0x0A5C, NAME_START_OR_BODY_CHAR);
136         CHARACTER_CODES[0x0A5E] = NAME_START_OR_BODY_CHAR;
137         Arrays.fill(CHARACTER_CODES, 0x0A72, 0x0A74, NAME_START_OR_BODY_CHAR);
138         Arrays.fill(CHARACTER_CODES, 0x0A85, 0x0A8B, NAME_START_OR_BODY_CHAR);
139         CHARACTER_CODES[0x0A8D] = NAME_START_OR_BODY_CHAR;
140         Arrays.fill(CHARACTER_CODES, 0x0A8F, 0x0A91, NAME_START_OR_BODY_CHAR);
141         Arrays.fill(CHARACTER_CODES, 0x0A93, 0x0AA8, NAME_START_OR_BODY_CHAR);
142         Arrays.fill(CHARACTER_CODES, 0x0AAA, 0x0AB0, NAME_START_OR_BODY_CHAR);
143         Arrays.fill(CHARACTER_CODES, 0x0AB2, 0x0AB3, NAME_START_OR_BODY_CHAR);
144         Arrays.fill(CHARACTER_CODES, 0x0AB5, 0x0AB9, NAME_START_OR_BODY_CHAR);
145         CHARACTER_CODES[0x0ABD] = NAME_START_OR_BODY_CHAR;
146         CHARACTER_CODES[0x0AE0] = NAME_START_OR_BODY_CHAR;
147         Arrays.fill(CHARACTER_CODES, 0x0B05, 0x0B0C, NAME_START_OR_BODY_CHAR);
148         Arrays.fill(CHARACTER_CODES, 0x0B0F, 0x0B10, NAME_START_OR_BODY_CHAR);
149         Arrays.fill(CHARACTER_CODES, 0x0B13, 0x0B28, NAME_START_OR_BODY_CHAR);
150         Arrays.fill(CHARACTER_CODES, 0x0B2A, 0x0B30, NAME_START_OR_BODY_CHAR);
151         Arrays.fill(CHARACTER_CODES, 0x0B32, 0x0B33, NAME_START_OR_BODY_CHAR);
152         Arrays.fill(CHARACTER_CODES, 0x0B36, 0x0B39, NAME_START_OR_BODY_CHAR);
153         CHARACTER_CODES[0x0B3D] = NAME_START_OR_BODY_CHAR;
154         Arrays.fill(CHARACTER_CODES, 0x0B5C, 0x0B5D, NAME_START_OR_BODY_CHAR);
155         Arrays.fill(CHARACTER_CODES, 0x0B5F, 0x0B61, NAME_START_OR_BODY_CHAR);
156         Arrays.fill(CHARACTER_CODES, 0x0B85, 0x0B8A, NAME_START_OR_BODY_CHAR);
157         Arrays.fill(CHARACTER_CODES, 0x0B8E, 0x0B90, NAME_START_OR_BODY_CHAR);
158         Arrays.fill(CHARACTER_CODES, 0x0B92, 0x0B95, NAME_START_OR_BODY_CHAR);
159         Arrays.fill(CHARACTER_CODES, 0x0B99, 0x0B9A, NAME_START_OR_BODY_CHAR);
160         CHARACTER_CODES[0x0B9C] = NAME_START_OR_BODY_CHAR;
161         Arrays.fill(CHARACTER_CODES, 0x0B9E, 0x0B9F, NAME_START_OR_BODY_CHAR);
162         Arrays.fill(CHARACTER_CODES, 0x0BA3, 0x0BA4, NAME_START_OR_BODY_CHAR);
163         Arrays.fill(CHARACTER_CODES, 0x0BA8, 0x0BAA, NAME_START_OR_BODY_CHAR);
164         Arrays.fill(CHARACTER_CODES, 0x0BAE, 0x0BB5, NAME_START_OR_BODY_CHAR);
165         Arrays.fill(CHARACTER_CODES, 0x0BB7, 0x0BB9, NAME_START_OR_BODY_CHAR);
166         Arrays.fill(CHARACTER_CODES, 0x0C05, 0x0C0C, NAME_START_OR_BODY_CHAR);
167         Arrays.fill(CHARACTER_CODES, 0x0C0E, 0x0C10, NAME_START_OR_BODY_CHAR);
168         Arrays.fill(CHARACTER_CODES, 0x0C12, 0x0C28, NAME_START_OR_BODY_CHAR);
169         Arrays.fill(CHARACTER_CODES, 0x0C2A, 0x0C33, NAME_START_OR_BODY_CHAR);
170         Arrays.fill(CHARACTER_CODES, 0x0C35, 0x0C39, NAME_START_OR_BODY_CHAR);
171         Arrays.fill(CHARACTER_CODES, 0x0C60, 0x0C61, NAME_START_OR_BODY_CHAR);
172         Arrays.fill(CHARACTER_CODES, 0x0C85, 0x0C8C, NAME_START_OR_BODY_CHAR);
173         Arrays.fill(CHARACTER_CODES, 0x0C8E, 0x0C90, NAME_START_OR_BODY_CHAR);
174         Arrays.fill(CHARACTER_CODES, 0x0C92, 0x0CA8, NAME_START_OR_BODY_CHAR);
175         Arrays.fill(CHARACTER_CODES, 0x0CAA, 0x0CB3, NAME_START_OR_BODY_CHAR);
176         Arrays.fill(CHARACTER_CODES, 0x0CB5, 0x0CB9, NAME_START_OR_BODY_CHAR);
177         CHARACTER_CODES[0x0CDE] = NAME_START_OR_BODY_CHAR;
178         Arrays.fill(CHARACTER_CODES, 0x0CE0, 0x0CE1, NAME_START_OR_BODY_CHAR);
179         Arrays.fill(CHARACTER_CODES, 0x0D05, 0x0D0C, NAME_START_OR_BODY_CHAR);
180         Arrays.fill(CHARACTER_CODES, 0x0D0E, 0x0D10, NAME_START_OR_BODY_CHAR);
181         Arrays.fill(CHARACTER_CODES, 0x0D12, 0x0D28, NAME_START_OR_BODY_CHAR);
182         Arrays.fill(CHARACTER_CODES, 0x0D2A, 0x0D39, NAME_START_OR_BODY_CHAR);
183         Arrays.fill(CHARACTER_CODES, 0x0D60, 0x0D61, NAME_START_OR_BODY_CHAR);
184         Arrays.fill(CHARACTER_CODES, 0x0E01, 0x0E2E, NAME_START_OR_BODY_CHAR);
185         CHARACTER_CODES[0x0E30] = NAME_START_OR_BODY_CHAR;
186         Arrays.fill(CHARACTER_CODES, 0x0E32, 0x0E33, NAME_START_OR_BODY_CHAR);
187         Arrays.fill(CHARACTER_CODES, 0x0E40, 0x0E45, NAME_START_OR_BODY_CHAR);
188         Arrays.fill(CHARACTER_CODES, 0x0E81, 0x0E82, NAME_START_OR_BODY_CHAR);
189         CHARACTER_CODES[0x0E84] = NAME_START_OR_BODY_CHAR;
190         Arrays.fill(CHARACTER_CODES, 0x0E87, 0x0E88, NAME_START_OR_BODY_CHAR);
191         CHARACTER_CODES[0x0E8A] = NAME_START_OR_BODY_CHAR;
192         CHARACTER_CODES[0x0E8D] = NAME_START_OR_BODY_CHAR;
193         Arrays.fill(CHARACTER_CODES, 0x0E94, 0x0E97, NAME_START_OR_BODY_CHAR);
194         Arrays.fill(CHARACTER_CODES, 0x0E99, 0x0E9F, NAME_START_OR_BODY_CHAR);
195         Arrays.fill(CHARACTER_CODES, 0x0EA1, 0x0EA3, NAME_START_OR_BODY_CHAR);
196         CHARACTER_CODES[0x0EA5] = NAME_START_OR_BODY_CHAR;
197         CHARACTER_CODES[0x0EA7] = NAME_START_OR_BODY_CHAR;
198         Arrays.fill(CHARACTER_CODES, 0x0EAA, 0x0EAB, NAME_START_OR_BODY_CHAR);
199         Arrays.fill(CHARACTER_CODES, 0x0EAD, 0x0EAE, NAME_START_OR_BODY_CHAR);
200         CHARACTER_CODES[0x0EB0] = NAME_START_OR_BODY_CHAR;
201         Arrays.fill(CHARACTER_CODES, 0x0EB2, 0x0EB3, NAME_START_OR_BODY_CHAR);
202         CHARACTER_CODES[0x0EBD] = NAME_START_OR_BODY_CHAR;
203         Arrays.fill(CHARACTER_CODES, 0x0EC0, 0x0EC4, NAME_START_OR_BODY_CHAR);
204         Arrays.fill(CHARACTER_CODES, 0x0F40, 0x0F47, NAME_START_OR_BODY_CHAR);
205         Arrays.fill(CHARACTER_CODES, 0x0F49, 0x0F69, NAME_START_OR_BODY_CHAR);
206         Arrays.fill(CHARACTER_CODES, 0x10A0, 0x10C5, NAME_START_OR_BODY_CHAR);
207         Arrays.fill(CHARACTER_CODES, 0x10D0, 0x10F6, NAME_START_OR_BODY_CHAR);
208         CHARACTER_CODES[0x1100] = NAME_START_OR_BODY_CHAR;
209         Arrays.fill(CHARACTER_CODES, 0x1102, 0x1103, NAME_START_OR_BODY_CHAR);
210         Arrays.fill(CHARACTER_CODES, 0x1105, 0x1107, NAME_START_OR_BODY_CHAR);
211         CHARACTER_CODES[0x1109] = NAME_START_OR_BODY_CHAR;
212         Arrays.fill(CHARACTER_CODES, 0x110B, 0x110C, NAME_START_OR_BODY_CHAR);
213         Arrays.fill(CHARACTER_CODES, 0x110E, 0x1112, NAME_START_OR_BODY_CHAR);
214         CHARACTER_CODES[0x113C] = NAME_START_OR_BODY_CHAR;
215         CHARACTER_CODES[0x113E] = NAME_START_OR_BODY_CHAR;
216         CHARACTER_CODES[0x1140] = NAME_START_OR_BODY_CHAR;
217         CHARACTER_CODES[0x114C] = NAME_START_OR_BODY_CHAR;
218         CHARACTER_CODES[0x114E] = NAME_START_OR_BODY_CHAR;
219         CHARACTER_CODES[0x1150] = NAME_START_OR_BODY_CHAR;
220         Arrays.fill(CHARACTER_CODES, 0x1154, 0x1155, NAME_START_OR_BODY_CHAR);
221         CHARACTER_CODES[0x1159] = NAME_START_OR_BODY_CHAR;
222         Arrays.fill(CHARACTER_CODES, 0x115F, 0x1161, NAME_START_OR_BODY_CHAR);
223         CHARACTER_CODES[0x1163] = NAME_START_OR_BODY_CHAR;
224         CHARACTER_CODES[0x1165] = NAME_START_OR_BODY_CHAR;
225         CHARACTER_CODES[0x1167] = NAME_START_OR_BODY_CHAR;
226         CHARACTER_CODES[0x1169] = NAME_START_OR_BODY_CHAR;
227         Arrays.fill(CHARACTER_CODES, 0x116D, 0x116E, NAME_START_OR_BODY_CHAR);
228         Arrays.fill(CHARACTER_CODES, 0x1172, 0x1173, NAME_START_OR_BODY_CHAR);
229         CHARACTER_CODES[0x1175] = NAME_START_OR_BODY_CHAR;
230         CHARACTER_CODES[0x119E] = NAME_START_OR_BODY_CHAR;
231         CHARACTER_CODES[0x11A8] = NAME_START_OR_BODY_CHAR;
232         CHARACTER_CODES[0x11AB] = NAME_START_OR_BODY_CHAR;
233         Arrays.fill(CHARACTER_CODES, 0x11AE, 0x11AF, NAME_START_OR_BODY_CHAR);
234         Arrays.fill(CHARACTER_CODES, 0x11B7, 0x11B8, NAME_START_OR_BODY_CHAR);
235         CHARACTER_CODES[0x11BA] = NAME_START_OR_BODY_CHAR;
236         Arrays.fill(CHARACTER_CODES, 0x11BC, 0x11C2, NAME_START_OR_BODY_CHAR);
237         CHARACTER_CODES[0x11EB] = NAME_START_OR_BODY_CHAR;
238         CHARACTER_CODES[0x11F0] = NAME_START_OR_BODY_CHAR;
239         CHARACTER_CODES[0x11F9] = NAME_START_OR_BODY_CHAR;
240         Arrays.fill(CHARACTER_CODES, 0x1E00, 0x1E9B, NAME_START_OR_BODY_CHAR);
241         Arrays.fill(CHARACTER_CODES, 0x1EA0, 0x1EF9, NAME_START_OR_BODY_CHAR);
242         Arrays.fill(CHARACTER_CODES, 0x1F00, 0x1F15, NAME_START_OR_BODY_CHAR);
243         Arrays.fill(CHARACTER_CODES, 0x1F18, 0x1F1D, NAME_START_OR_BODY_CHAR);
244         Arrays.fill(CHARACTER_CODES, 0x1F20, 0x1F45, NAME_START_OR_BODY_CHAR);
245         Arrays.fill(CHARACTER_CODES, 0x1F48, 0x1F4D, NAME_START_OR_BODY_CHAR);
246         Arrays.fill(CHARACTER_CODES, 0x1F50, 0x1F57, NAME_START_OR_BODY_CHAR);
247         CHARACTER_CODES[0x1F59] = NAME_START_OR_BODY_CHAR;
248         CHARACTER_CODES[0x1F5B] = NAME_START_OR_BODY_CHAR;
249         CHARACTER_CODES[0x1F5D] = NAME_START_OR_BODY_CHAR;
250         Arrays.fill(CHARACTER_CODES, 0x1F5F, 0x1F7D, NAME_START_OR_BODY_CHAR);
251         Arrays.fill(CHARACTER_CODES, 0x1F80, 0x1FB4, NAME_START_OR_BODY_CHAR);
252         Arrays.fill(CHARACTER_CODES, 0x1FB6, 0x1FBC, NAME_START_OR_BODY_CHAR);
253         CHARACTER_CODES[0x1FBE] = NAME_START_OR_BODY_CHAR;
254         Arrays.fill(CHARACTER_CODES, 0x1FC2, 0x1FC4, NAME_START_OR_BODY_CHAR);
255         Arrays.fill(CHARACTER_CODES, 0x1FC6, 0x1FCC, NAME_START_OR_BODY_CHAR);
256         Arrays.fill(CHARACTER_CODES, 0x1FD0, 0x1FD3, NAME_START_OR_BODY_CHAR);
257         Arrays.fill(CHARACTER_CODES, 0x1FD6, 0x1FDB, NAME_START_OR_BODY_CHAR);
258         Arrays.fill(CHARACTER_CODES, 0x1FE0, 0x1FEC, NAME_START_OR_BODY_CHAR);
259         Arrays.fill(CHARACTER_CODES, 0x1FF2, 0x1FF4, NAME_START_OR_BODY_CHAR);
260         Arrays.fill(CHARACTER_CODES, 0x1FF6, 0x1FFC, NAME_START_OR_BODY_CHAR);
261         CHARACTER_CODES[0x2126] = NAME_START_OR_BODY_CHAR;
262         Arrays.fill(CHARACTER_CODES, 0x212A, 0x212B, NAME_START_OR_BODY_CHAR);
263         CHARACTER_CODES[0x212E] = NAME_START_OR_BODY_CHAR;
264         Arrays.fill(CHARACTER_CODES, 0x2180, 0x2182, NAME_START_OR_BODY_CHAR);
265         Arrays.fill(CHARACTER_CODES, 0x3041, 0x3094, NAME_START_OR_BODY_CHAR);
266         Arrays.fill(CHARACTER_CODES, 0x30A1, 0x30FA, NAME_START_OR_BODY_CHAR);
267         Arrays.fill(CHARACTER_CODES, 0x3105, 0x312C, NAME_START_OR_BODY_CHAR);
268         Arrays.fill(CHARACTER_CODES, 0xAC00, 0xD7A3, NAME_START_OR_BODY_CHAR);
269         // Ideographic
270         Arrays.fill(CHARACTER_CODES, 0x4E00, 0x9FA5, NAME_START_OR_BODY_CHAR);
271         CHARACTER_CODES[0x3007] = NAME_START_OR_BODY_CHAR;
272         Arrays.fill(CHARACTER_CODES, 0x3021, 0x3029, NAME_START_OR_BODY_CHAR);
273         // NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
274         CHARACTER_CODES['.'] = NAME_BODY_CHAR;
275         CHARACTER_CODES['-'] = NAME_BODY_CHAR;
276         // CombiningChar 
277         Arrays.fill(CHARACTER_CODES, 0x0300, 0x0345, NAME_BODY_CHAR);
278         Arrays.fill(CHARACTER_CODES, 0x0360, 0x0361, NAME_BODY_CHAR);
279         Arrays.fill(CHARACTER_CODES, 0x0483, 0x0486, NAME_BODY_CHAR);
280         Arrays.fill(CHARACTER_CODES, 0x0591, 0x05A1, NAME_BODY_CHAR);
281         Arrays.fill(CHARACTER_CODES, 0x05A3, 0x05B9, NAME_BODY_CHAR);
282         Arrays.fill(CHARACTER_CODES, 0x05BB, 0x05BD, NAME_BODY_CHAR);
283         CHARACTER_CODES[0x05BF] = NAME_BODY_CHAR;
284         Arrays.fill(CHARACTER_CODES, 0x05C1, 0x05C2, NAME_BODY_CHAR);
285         CHARACTER_CODES[0x05C4] = NAME_BODY_CHAR;
286         Arrays.fill(CHARACTER_CODES, 0x064B, 0x0652, NAME_BODY_CHAR);
287         CHARACTER_CODES[0x0670] = NAME_BODY_CHAR;
288         Arrays.fill(CHARACTER_CODES, 0x06D6, 0x06DC, NAME_BODY_CHAR);
289         Arrays.fill(CHARACTER_CODES, 0x06DD, 0x06DF, NAME_BODY_CHAR);
290         Arrays.fill(CHARACTER_CODES, 0x06E0, 0x06E4, NAME_BODY_CHAR);
291         Arrays.fill(CHARACTER_CODES, 0x06E7, 0x06E8, NAME_BODY_CHAR);
292         Arrays.fill(CHARACTER_CODES, 0x06EA, 0x06ED, NAME_BODY_CHAR);
293         Arrays.fill(CHARACTER_CODES, 0x0901, 0x0903, NAME_BODY_CHAR);
294         CHARACTER_CODES[0x093C] = NAME_BODY_CHAR;
295         Arrays.fill(CHARACTER_CODES, 0x093E, 0x094C, NAME_BODY_CHAR);
296         CHARACTER_CODES[0x094D] = NAME_BODY_CHAR;
297         Arrays.fill(CHARACTER_CODES, 0x0951, 0x0954, NAME_BODY_CHAR);
298         Arrays.fill(CHARACTER_CODES, 0x0962, 0x0963, NAME_BODY_CHAR);
299         Arrays.fill(CHARACTER_CODES, 0x0981, 0x0983, NAME_BODY_CHAR);
300         CHARACTER_CODES[0x09BC] = NAME_BODY_CHAR;
301         CHARACTER_CODES[0x09BE] = NAME_BODY_CHAR;
302         CHARACTER_CODES[0x09BF] = NAME_BODY_CHAR;
303         Arrays.fill(CHARACTER_CODES, 0x09C0, 0x09C4, NAME_BODY_CHAR);
304         Arrays.fill(CHARACTER_CODES, 0x09C7, 0x09C8, NAME_BODY_CHAR);
305         Arrays.fill(CHARACTER_CODES, 0x09CB, 0x09CD, NAME_BODY_CHAR);
306         CHARACTER_CODES[0x09D7] = NAME_BODY_CHAR;
307         Arrays.fill(CHARACTER_CODES, 0x09E2, 0x09E3, NAME_BODY_CHAR);
308         CHARACTER_CODES[0x0A02] = NAME_BODY_CHAR;
309         CHARACTER_CODES[0x0A3C] = NAME_BODY_CHAR;
310         CHARACTER_CODES[0x0A3E] = NAME_BODY_CHAR;
311         CHARACTER_CODES[0x0A3F] = NAME_BODY_CHAR;
312         Arrays.fill(CHARACTER_CODES, 0x0A40, 0x0A42, NAME_BODY_CHAR);
313         Arrays.fill(CHARACTER_CODES, 0x0A47, 0x0A48, NAME_BODY_CHAR);
314         Arrays.fill(CHARACTER_CODES, 0x0A4B, 0x0A4D, NAME_BODY_CHAR);
315         Arrays.fill(CHARACTER_CODES, 0x0A70, 0x0A71, NAME_BODY_CHAR);
316         Arrays.fill(CHARACTER_CODES, 0x0A81, 0x0A83, NAME_BODY_CHAR);
317         CHARACTER_CODES[0x0ABC] = NAME_BODY_CHAR;
318         Arrays.fill(CHARACTER_CODES, 0x0ABE, 0x0AC5, NAME_BODY_CHAR);
319         Arrays.fill(CHARACTER_CODES, 0x0AC7, 0x0AC9, NAME_BODY_CHAR);
320         Arrays.fill(CHARACTER_CODES, 0x0ACB, 0x0ACD, NAME_BODY_CHAR);
321         Arrays.fill(CHARACTER_CODES, 0x0B01, 0x0B03, NAME_BODY_CHAR);
322         CHARACTER_CODES[0x0B3C] = NAME_BODY_CHAR;
323         Arrays.fill(CHARACTER_CODES, 0x0B3E, 0x0B43, NAME_BODY_CHAR);
324         Arrays.fill(CHARACTER_CODES, 0x0B47, 0x0B48, NAME_BODY_CHAR);
325         Arrays.fill(CHARACTER_CODES, 0x0B4B, 0x0B4D, NAME_BODY_CHAR);
326         Arrays.fill(CHARACTER_CODES, 0x0B56, 0x0B57, NAME_BODY_CHAR);
327         Arrays.fill(CHARACTER_CODES, 0x0B82, 0x0B83, NAME_BODY_CHAR);
328         Arrays.fill(CHARACTER_CODES, 0x0BBE, 0x0BC2, NAME_BODY_CHAR);
329         Arrays.fill(CHARACTER_CODES, 0x0BC6, 0x0BC8, NAME_BODY_CHAR);
330         Arrays.fill(CHARACTER_CODES, 0x0BCA, 0x0BCD, NAME_BODY_CHAR);
331         CHARACTER_CODES[0x0BD7] = NAME_BODY_CHAR;
332         Arrays.fill(CHARACTER_CODES, 0x0C01, 0x0C03, NAME_BODY_CHAR);
333         Arrays.fill(CHARACTER_CODES, 0x0C3E, 0x0C44, NAME_BODY_CHAR);
334         Arrays.fill(CHARACTER_CODES, 0x0C46, 0x0C48, NAME_BODY_CHAR);
335         Arrays.fill(CHARACTER_CODES, 0x0C4A, 0x0C4D, NAME_BODY_CHAR);
336         Arrays.fill(CHARACTER_CODES, 0x0C55, 0x0C56, NAME_BODY_CHAR);
337         Arrays.fill(CHARACTER_CODES, 0x0C82, 0x0C83, NAME_BODY_CHAR);
338         Arrays.fill(CHARACTER_CODES, 0x0CBE, 0x0CC4, NAME_BODY_CHAR);
339         Arrays.fill(CHARACTER_CODES, 0x0CC6, 0x0CC8, NAME_BODY_CHAR);
340         Arrays.fill(CHARACTER_CODES, 0x0CCA, 0x0CCD, NAME_BODY_CHAR);
341         Arrays.fill(CHARACTER_CODES, 0x0CD5, 0x0CD6, NAME_BODY_CHAR);
342         Arrays.fill(CHARACTER_CODES, 0x0D02, 0x0D03, NAME_BODY_CHAR);
343         Arrays.fill(CHARACTER_CODES, 0x0D3E, 0x0D43, NAME_BODY_CHAR);
344         Arrays.fill(CHARACTER_CODES, 0x0D46, 0x0D48, NAME_BODY_CHAR);
345         Arrays.fill(CHARACTER_CODES, 0x0D4A, 0x0D4D, NAME_BODY_CHAR);
346         CHARACTER_CODES[0x0D57] = NAME_BODY_CHAR;
347         CHARACTER_CODES[0x0E31] = NAME_BODY_CHAR;
348         Arrays.fill(CHARACTER_CODES, 0x0E34, 0x0E3A, NAME_BODY_CHAR);
349         Arrays.fill(CHARACTER_CODES, 0x0E47, 0x0E4E, NAME_BODY_CHAR);
350         CHARACTER_CODES[0x0EB1] = NAME_BODY_CHAR;
351         Arrays.fill(CHARACTER_CODES, 0x0EB4, 0x0EB9, NAME_BODY_CHAR);
352         Arrays.fill(CHARACTER_CODES, 0x0EBB, 0x0EBC, NAME_BODY_CHAR);
353         Arrays.fill(CHARACTER_CODES, 0x0EC8, 0x0ECD, NAME_BODY_CHAR);
354         Arrays.fill(CHARACTER_CODES, 0x0F18, 0x0F19, NAME_BODY_CHAR);
355         CHARACTER_CODES[0x0F35] = NAME_BODY_CHAR;
356         CHARACTER_CODES[0x0F37] = NAME_BODY_CHAR;
357         CHARACTER_CODES[0x0F39] = NAME_BODY_CHAR;
358         CHARACTER_CODES[0x0F3E] = NAME_BODY_CHAR;
359         CHARACTER_CODES[0x0F3F] = NAME_BODY_CHAR;
360         Arrays.fill(CHARACTER_CODES, 0x0F71, 0x0F84, NAME_BODY_CHAR);
361         Arrays.fill(CHARACTER_CODES, 0x0F86, 0x0F8B, NAME_BODY_CHAR);
362         Arrays.fill(CHARACTER_CODES, 0x0F90, 0x0F95, NAME_BODY_CHAR);
363         CHARACTER_CODES[0x0F97] = NAME_BODY_CHAR;
364         Arrays.fill(CHARACTER_CODES, 0x0F99, 0x0FAD, NAME_BODY_CHAR);
365         Arrays.fill(CHARACTER_CODES, 0x0FB1, 0x0FB7, NAME_BODY_CHAR);
366         CHARACTER_CODES[0x0FB9] = NAME_BODY_CHAR;
367         Arrays.fill(CHARACTER_CODES, 0x20D0, 0x20DC, NAME_BODY_CHAR);
368         CHARACTER_CODES[0x20E1] = NAME_BODY_CHAR;
369         Arrays.fill(CHARACTER_CODES, 0x302A, 0x302F, NAME_BODY_CHAR);
370         CHARACTER_CODES[0x3099] = NAME_BODY_CHAR;
371         CHARACTER_CODES[0x309A] = NAME_BODY_CHAR;
372         // Digit 
373         Arrays.fill(CHARACTER_CODES, 0x0030, 0x0039, NAME_BODY_CHAR);
374         Arrays.fill(CHARACTER_CODES, 0x0660, 0x0669, NAME_BODY_CHAR);
375         Arrays.fill(CHARACTER_CODES, 0x06F0, 0x06F9, NAME_BODY_CHAR);
376         Arrays.fill(CHARACTER_CODES, 0x0966, 0x096F, NAME_BODY_CHAR);
377         Arrays.fill(CHARACTER_CODES, 0x09E6, 0x09EF, NAME_BODY_CHAR);
378         Arrays.fill(CHARACTER_CODES, 0x0A66, 0x0A6F, NAME_BODY_CHAR);
379         Arrays.fill(CHARACTER_CODES, 0x0AE6, 0x0AEF, NAME_BODY_CHAR);
380         Arrays.fill(CHARACTER_CODES, 0x0B66, 0x0B6F, NAME_BODY_CHAR);
381         Arrays.fill(CHARACTER_CODES, 0x0BE7, 0x0BEF, NAME_BODY_CHAR);
382         Arrays.fill(CHARACTER_CODES, 0x0C66, 0x0C6F, NAME_BODY_CHAR);
383         Arrays.fill(CHARACTER_CODES, 0x0CE6, 0x0CEF, NAME_BODY_CHAR);
384         Arrays.fill(CHARACTER_CODES, 0x0D66, 0x0D6F, NAME_BODY_CHAR);
385         Arrays.fill(CHARACTER_CODES, 0x0E50, 0x0E59, NAME_BODY_CHAR);
386         Arrays.fill(CHARACTER_CODES, 0x0ED0, 0x0ED9, NAME_BODY_CHAR);
387         Arrays.fill(CHARACTER_CODES, 0x0F20, 0x0F29, NAME_BODY_CHAR);
388         // Extender 
389         CHARACTER_CODES[0x00B7] = NAME_BODY_CHAR;
390         CHARACTER_CODES[0x02D0] = NAME_BODY_CHAR;
391         CHARACTER_CODES[0x02D1] = NAME_BODY_CHAR;
392         CHARACTER_CODES[0x0387] = NAME_BODY_CHAR;
393         CHARACTER_CODES[0x0640] = NAME_BODY_CHAR;
394         CHARACTER_CODES[0x0E46] = NAME_BODY_CHAR;
395         CHARACTER_CODES[0x0EC6] = NAME_BODY_CHAR;
396         CHARACTER_CODES[0x3005] = NAME_BODY_CHAR;
397         Arrays.fill(CHARACTER_CODES, 0x3031, 0x3035, NAME_BODY_CHAR);
398         Arrays.fill(CHARACTER_CODES, 0x309D, 0x309E, NAME_BODY_CHAR);
399         Arrays.fill(CHARACTER_CODES, 0x30FC, 0x30FE, NAME_BODY_CHAR);
400 
401     }
402     
403     private final Writer writer;
404     private final ArrayStack elementNames;
405     private final Set currentAttributes = new HashSet();
406     
407     boolean elementsWritten = false;
408     boolean inElement = false;
409     boolean prologWritten = false;
410     
411     public XmlWriter(final Writer writer) {
412         this.writer = writer;
413         this.elementNames = new ArrayStack();
414     }
415     
416     /**
417      * Starts a document by writing a prolog.
418      * Calling this method is optional.
419      * When writing a document fragment, it should <em>not</em> be called.
420      * @return this object
421      * @throws OperationNotAllowedException 
422      * if called after the first element has been written
423      * or once a prolog has already been written
424      */
425     public IXmlWriter startDocument() throws IOException {
426         if (elementsWritten) {
427             throw new OperationNotAllowedException("Document already started");
428         }
429         if (prologWritten) {
430             throw new OperationNotAllowedException("Only one prolog allowed");
431         }
432         writer.write("<?xml version='1.0'?>");
433         prologWritten = true;
434         return this;
435     }
436     
437     /**
438      * Writes the start of an element.
439      * 
440      * @param elementName the name of the element, not null
441      * @return this object 
442      * @throws InvalidXmlException if the name is not valid for an xml element
443      * @throws OperationNotAllowedException 
444      * if called after the first element has been closed
445      */
446     public IXmlWriter openElement(final CharSequence elementName) throws IOException {
447         if (elementsWritten && elementNames.isEmpty()) {
448             throw new OperationNotAllowedException("Root element already closed. Cannot open new element.");
449         }
450         if (!isValidName(elementName)) {
451             throw new InvalidXmlException("'" + elementName + "' is not a valid element name");
452         }
453         elementsWritten = true;
454         if (inElement) {
455             writer.write('>');
456         }
457         writer.write('<');
458         rawWrite(elementName);
459         inElement = true;
460         elementNames.push(elementName);
461         currentAttributes.clear();
462         return this;
463     }
464     
465     /**
466      * Writes an attribute of an element.
467      * Note that this is only allowed directly after {@link #openElement(CharSequence)}
468      * or {@link #attribute}.
469      * 
470      * @param name the attribute name, not null
471      * @param value the attribute value, not null
472      * @return this object
473      * @throws InvalidXmlException if the name is not valid for an xml attribute 
474      * or if a value for the attribute has already been written
475      * @throws OperationNotAllowedException if called after {@link #content(CharSequence)} 
476      * or {@link #closeElement()} or before any call to {@link #openElement(CharSequence)}
477      */
478     public IXmlWriter attribute(CharSequence name, CharSequence value) throws IOException {
479         if (elementNames.isEmpty()) {
480             if (elementsWritten) {
481                 throw new OperationNotAllowedException("Root element has already been closed.");
482             } else {
483                 throw new OperationNotAllowedException("Close called before an element has been opened.");            
484             }
485         }
486         if (!isValidName(name)) {
487             throw new InvalidXmlException("'" + name + "' is not a valid attribute name.");
488         }
489         if (!inElement) {
490             throw new InvalidXmlException("Attributes can only be written in elements");
491         }
492         if (currentAttributes.contains(name)) {
493             throw new InvalidXmlException("Each attribute can only be written once");
494         }
495         writer.write(' ');
496         rawWrite(name);
497         writer.write('=');
498         writer.write('\'');
499         writeAttributeContent(value);
500         writer.write('\'');
501         currentAttributes.add(name);
502         return this;
503     }
504     
505     private void writeAttributeContent(CharSequence content) throws IOException {
506         writeEscaped(content, true);
507     }
508 
509     /**
510      * Writes content.
511      * Calling this method will automatically 
512      * Note that this method does not use CDATA.
513      * 
514      * @param content the content to write
515      * @return this object
516      * @throws OperationNotAllowedException 
517      * if called before any call to {@link #openElement} 
518      * or after the first element has been closed
519      */
520     public IXmlWriter content(CharSequence content) throws IOException {
521         if (elementNames.isEmpty()) {
522             if (elementsWritten) {
523                 throw new OperationNotAllowedException("Root element has already been closed.");
524             } else {
525                 throw new OperationNotAllowedException("An element must be opened before content can be written.");            
526             }
527         }
528         if (inElement) {
529             writer.write('>');
530         }
531         writeBodyContent(content);
532         inElement = false;
533         return this;
534     }
535     
536     private void writeBodyContent(final CharSequence content) throws IOException {
537         writeEscaped(content, false);
538     }
539 
540     private void writeEscaped(final CharSequence content, boolean isAttributeContent) throws IOException {
541         final int length = content.length();
542         for (int i=0;i<length;i++) {
543             char character = content.charAt(i);
544             if (character == '&') {
545                 writer.write("&amp;");
546             } else if (character == '<') {
547                 writer.write("&lt;");
548             } else if (character == '>') {
549                 writer.write("&gt;");
550             } else if (isAttributeContent && character == '\'') {
551                 writer.write("&apos;");
552             } else if (isAttributeContent && character == '\"') {
553                 writer.write("&quot;");
554             } else if (isOutOfRange(character)) {
555                 writer.write('?');
556             } else {
557                 writer.write(character);
558             }
559         }
560     }
561     
562     private boolean isOutOfRange(final char character) {
563         final boolean result = !ALLOWED_CHARACTERS[character];
564         return result;
565     }
566 
567     /**
568      * Closes the last element written.
569      * 
570      * @return this object
571      * @throws OperationNotAllowedException 
572      * if called before any call to {@link #openElement} 
573      * or after the first element has been closed
574      */
575     public IXmlWriter closeElement() throws IOException {
576         if (elementNames.isEmpty()) {
577             if (elementsWritten) {
578                 throw new OperationNotAllowedException("Root element has already been closed.");
579             } else {
580                 throw new OperationNotAllowedException("Close called before an element has been opened.");            
581             }
582         }
583         final CharSequence elementName = (CharSequence) elementNames.pop();
584         if (inElement) {
585             writer.write('/');
586             writer.write('>');
587         } else {
588             writer.write('<');
589             writer.write('/');
590             rawWrite(elementName);
591             writer.write('>');
592         }
593         writer.flush();
594         inElement = false;
595         return this;
596     }
597                                                          
598     
599     /**
600      * Closes all pending elements.
601      * When appropriate, resources are also flushed and closed.
602      * No exception is raised when called upon a document whose
603      * root element has already been closed.
604      * @return this object
605      * @throws OperationNotAllowedException 
606      * if called before any call to {@link #openElement} 
607      */
608     public IXmlWriter closeDocument() throws IOException {
609         if (elementNames.isEmpty()) {
610             if (!elementsWritten) {
611                 throw new OperationNotAllowedException("Close called before an element has been opened.");            
612             }
613         }
614         while(!elementNames.isEmpty()) {
615             closeElement();
616         }
617         writer.flush();
618         return this;
619     }
620     
621     private void rawWrite(final CharSequence sequence) throws IOException {
622         for (int i=0;i<sequence.length();i++) {
623             final char charAt = sequence.charAt(i);
624             writer.write(charAt);
625         }
626     }
627     
628     private boolean isValidName(final CharSequence sequence) {
629         boolean result = true;
630         final int length = sequence.length();
631         for (int i=0;i<length;i++) {
632             char character = sequence.charAt(i);
633             if (i==0) {
634                 if (!isValidNameStart(character)) {
635                     result = false;
636                     break;
637                 }
638             } else {
639                 if (!isValidNameBody(character)) {
640                     result = false;
641                     break;
642                 }
643             }
644         }
645         return result;
646     }
647     
648     private boolean isValidNameStart(final char character) {
649         final byte code = CHARACTER_CODES[character];
650         final boolean result = (code & NAME_START_MASK) > 0;
651         return result;
652     }
653     
654     private boolean isValidNameBody(final char character) {
655         final byte code = CHARACTER_CODES[character];
656         final boolean result = (code & NAME_MASK) > 0;
657         return result;
658     }
659 }