1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.rat.document.impl.guesser;
20
21 import java.io.IOException;
22 import java.io.Reader;
23 import java.util.Locale;
24
25 import org.apache.rat.api.Document;
26
27
28
29
30 public class BinaryGuesser {
31
32 private static boolean isBinaryDocument(Document document) {
33 boolean result = false;
34 Reader reader = null;
35 try
36 {
37 reader = document.reader();
38 result = isBinary(reader);
39 }
40 catch (IOException e)
41 {
42 result = false;
43 }
44 finally
45 {
46 try
47 {
48 if (reader != null)
49 {
50 reader.close();
51 }
52 }
53 catch (IOException e)
54 {
55
56 }
57 }
58 return result;
59 }
60
61
62
63
64 public static boolean isBinary(Reader in) {
65 boolean result = false;
66 char[] taste = new char[100];
67 try {
68 int bytesRead = in.read(taste);
69 if (bytesRead > 0) {
70 int highBytes = 0;
71 for (int i=0;i<bytesRead;i++) {
72 if (taste[i] > BinaryGuesser.NON_ASCII_THREASHOLD
73 || taste[i] <= BinaryGuesser.ASCII_CHAR_THREASHOLD) {
74 highBytes++;
75 }
76 }
77 if (highBytes * BinaryGuesser.HIGH_BYTES_RATIO
78 > bytesRead * BinaryGuesser.TOTAL_READ_RATIO) {
79 result = true;
80 }
81 }
82 } catch (IOException e) {
83
84 }
85 return result;
86 }
87
88 public static final boolean isBinaryData(final String name) {
89 return extensionMatches(name, DATA_EXTENSIONS);
90 }
91
92
93
94
95 public static final boolean isNonBinary(final String name) {
96 if (name == null) {return false;}
97 return extensionMatches(name.toUpperCase(Locale.US),
98 BinaryGuesser.NON_BINARY_EXTENSIONS);
99 }
100
101 public static final boolean isExecutable(final String name) {
102 return name.equals(BinaryGuesser.JAVA) || extensionMatches(name, EXE_EXTENSIONS)
103 || containsExtension(name, EXE_EXTENSIONS);
104 }
105
106 public static boolean containsExtension(final String name,
107 final String[] exts) {
108 boolean result = false;
109 for (int i = 0; !result && i < exts.length; i++) {
110 result = name.indexOf("." + exts[i] + ".") >= 0;
111 }
112 return result;
113 }
114
115 public static boolean extensionMatches(final String name,
116 final String[] exts) {
117 boolean result = false;
118 for (int i = 0; !result && i < exts.length; i++) {
119 result = name.endsWith("." + exts[i]);
120 }
121 return result;
122 }
123
124 public static boolean isBytecode(final String name) {
125 return BinaryGuesser.extensionMatches(name, BYTECODE_EXTENSIONS);
126 }
127
128 public static final boolean isImage(final String name) {
129 return BinaryGuesser.extensionMatches(name, IMAGE_EXTENSIONS);
130 }
131
132 public static final boolean isKeystore(final String name) {
133 return BinaryGuesser.extensionMatches(name, KEYSTORE_EXTENSIONS);
134 }
135
136
137
138
139 public static final boolean isBinary(final String name) {
140 if (name == null) {return false;}
141 String normalisedName = GuessUtils.normalise(name);
142 return BinaryGuesser.JAR_MANIFEST.equals(name) || BinaryGuesser.isImage(normalisedName)
143 || BinaryGuesser.isKeystore(normalisedName) || BinaryGuesser.isBytecode(normalisedName)
144 || BinaryGuesser.isBinaryData(normalisedName) || BinaryGuesser.isExecutable(normalisedName);
145 }
146
147 public static final String[] DATA_EXTENSIONS = {
148 "DAT", "DOC",
149 "NCB", "IDB",
150 "SUO", "XCF",
151 "RAJ", "CERT",
152 "KS", "TS",
153 "ODP",
154 };
155 public static final String[] EXE_EXTENSIONS = {
156 "EXE", "DLL",
157 "LIB", "SO",
158 "A", "EXP",
159 };
160 public static final String[] KEYSTORE_EXTENSIONS = {
161 "JKS", "KEYSTORE", "PEM", "CRL"
162 };
163 public static final String[] IMAGE_EXTENSIONS = {
164 "PNG", "PDF",
165 "GIF", "GIFF",
166 "TIF", "TIFF",
167 "JPG", "JPEG",
168 "ICO", "ICNS",
169 };
170 public static final String[] BYTECODE_EXTENSIONS = {
171 "CLASS", "PYD",
172 "OBJ", "PYC",
173 };
174
175
176
177
178 public static final String[] NON_BINARY_EXTENSIONS = {
179 "AART",
180 "AC",
181 "AM",
182 "BAT",
183 "C",
184 "CAT",
185 "CGI",
186 "CLASSPATH",
187 "CMD",
188 "CONFIG",
189 "CPP",
190 "CSS",
191 "CWIKI",
192 "DATA",
193 "DCL",
194 "DTD",
195 "EGRM",
196 "ENT",
197 "FT",
198 "FN",
199 "FV",
200 "GRM",
201 "G",
202 "H",
203 "HTACCESS",
204 "HTML",
205 "IHTML",
206 "IN",
207 "JAVA",
208 "JMX",
209 "JSP",
210 "JS",
211 "JUNIT",
212 "JX",
213 "MANIFEST",
214 "M4",
215 "MF",
216 "MF",
217 "META",
218 "MOD",
219 "N3",
220 "PEN",
221 "PL",
222 "PM",
223 "POD",
224 "POM",
225 "PROJECT",
226 "PROPERTIES",
227 "PY",
228 "RB",
229 "RDF",
230 "RNC",
231 "RNG",
232 "RNX",
233 "ROLES",
234 "RSS",
235 "SH",
236 "SQL",
237 "SVG",
238 "TLD",
239 "TXT",
240 "TYPES",
241 "VM",
242 "VSL",
243 "WSDD",
244 "WSDL",
245 "XARGS",
246 "XCAT",
247 "XCONF",
248 "XEGRM",
249 "XGRM",
250 "XLEX",
251 "XLOG",
252 "XMAP",
253 "XML",
254 "XROLES",
255 "XSAMPLES",
256 "XSD",
257 "XSL",
258 "XSLT",
259 "XSP",
260 "XUL",
261 "XWEB",
262 "XWELCOME",
263 };
264 public static final String JAR_MANIFEST = "MANIFEST.MF";
265 public static final String JAVA = "JAVA";
266 public static final int HIGH_BYTES_RATIO = 100;
267 public static final int TOTAL_READ_RATIO = 30;
268 public static final int NON_ASCII_THREASHOLD = 256;
269 public static final int ASCII_CHAR_THREASHOLD = 8;
270
271 public static final boolean isBinary(final Document document) {
272
273
274
275 final String name = document.getName();
276 boolean result = isBinary(name);
277 if (!result)
278 {
279
280 result = isBinaryDocument(document);
281 }
282 return result;
283 }
284
285
286
287 }