1 package com.randomnoun.common.db;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.InputStreamReader;
6 import java.io.Reader;
7 import java.text.ParseException;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
12
13 /** A class to read an InputStream containing SQL statements (e.g. a MySQL input file) and split it into
14 * individual SQL statements.
15 *
16 * <p>Some MySQL command-line directives (e.g. 'DELIMITER') are processed whilst parsing.
17 */
18 public class SqlParser {
19
20 private static Pattern MYSQL_DELIMITER_COMMAND = Pattern.compile("delimiter\\s+(.*)$", Pattern.CASE_INSENSITIVE);
21
22 // could have a ParseState that contains things like the current delimiter, or current database
23 // (for 'use' commands), which can be modified by the InputStream, or the database vendor
24 // if we wanted to support escape sequences, which appear to be database-specific.
25
26 /** Convert an InputStream of SQL statements into a List of individual
27 * statements. Statements are delimited by ";" strings that occur outside of strings or comments.
28 * The delimiter string is not included in the returned list of statements.
29 *
30 * <p>The delimiter string may be changed using the 'delimiter str' command.
31 * This command may end with a newline instead of a delimiter.
32 *
33 * <p>Comments may also be returned in the result.
34 * <p>Comments are defined by '-- to-end-of-line' or '/* within slash-star star-slash **/' syntax.
35 * Comments that are created with '--' and that occur within a statement are returned before that statement
36 * has finished parsing.
37 *
38 * <p>NB: Does not handle escape sequences found within double or single quotes
39 * (i.e. escape sequences are handled by the underlying database, not by this Parser)
40 *
41 * @param is inputStream to parse
42 * @param includeComments include comment strings in result
43 *
44 * @throws IOException
45 * @throws ParseException unclosed /*-style comment or single/double-quoted string
46 */
47 public List<String> parseStatements(InputStream is, boolean includeComments) throws IOException, ParseException {
48 return parseStatements(new InputStreamReader(is), includeComments);
49 }
50
51 /** Convert an InputStream of SQL statements into a List of individual
52 * statements. Statements are delimited by ";" strings that occur outside of strings or comments.
53 * The delimiter string is not included in the returned list of statements.
54 *
55 * <p>The delimiter string may be changed using the 'delimiter str' command.
56 * This command may end with a newline instead of a delimiter.
57 *
58 * <p>Comments may also be returned in the result.
59 * <p>Comments are defined by '-- to-end-of-line' or '/* within slash-star star-slash **/' syntax.
60 * Comments that are created with '--' and that occur within a statement are returned before that statement
61 * has finished parsing.
62 *
63 * <p>NB: Does not handle escape sequences found within double or single quotes
64 * (i.e. escape sequences are handled by the underlying database, not by this Parser)
65 *
66 * @param reader Reader to parse
67 * @param includeComments include comment strings in result
68 *
69 * @throws IOException
70 * @throws ParseException unclosed /*-style comment or single/double-quoted string
71 */
72 public List<String> parseStatements(Reader r, boolean includeComments)
73 throws IOException, ParseException
74 {
75 final List<String> allSql = new ArrayList<String>();
76 consumeStatements(r, includeComments, new Consumer<String>() {
77 @Override
78 public void consume(String s) {
79 allSql.add(s);
80 }
81 });
82 return allSql;
83 }
84
85 public static interface Consumer<T> {
86 public void consume(T s);
87 }
88
89 /** convert a text file of SQL statements into a List of individual
90 * statements. Comments may also be returned in the result.
91 *
92 * <p>NB: Does not handle escape sequences found within double or single quotes
93 *
94 * @param includeComments include comment strings in result
95 *
96 * @throws IOException
97 * @throws ParseException unclosed /*-style comment or single/double-quoted string
98 */
99 public void consumeStatements(InputStream is, boolean includeComments, Consumer<String> consumer)
100 throws IOException, ParseException {
101 consumeStatements(new InputStreamReader(is), includeComments, consumer);
102 }
103
104 public void consumeStatements(Reader is, boolean includeComments, Consumer<String> consumer)
105 throws IOException, ParseException
106 {
107 // @TODO some error handling
108 // List allSql = new ArrayList<String>();
109
110 int state = 0; // command parsing [to states 0, 1, 2, 4, 7, 8]
111 // 1 // parsed double quote, in double quotes [to states 0, 1, 9]
112 // 2 // parsed - [to states 0, 3]
113 // 3 // parsed -- [ to states 0, 3]
114 // 4 // parsed / [ to states 0, 5]
115 // 5 // parsed /* [ to states 6, 5]
116 // 6 // parsed * from state 5 [ to states 0, 5]
117 // 7 // parse single quote, in single quotes [to states 0, 7, 10]
118 // 8 // parsed backslash in SQL, next character will be emitted without changing state [then to state 0]
119 // 9 // parsed backslash in double quote, next char emitted without changing state [then to state 1]
120 // 10 // parsed backslash in single quote, next char emitted without changing state [then to state 7]
121
122 String s = ""; // current statement
123 String snc = ""; // current statment without comments
124 String c = ""; // current comment
125 String delimiter = ";"; // default delimiter
126
127 int intch = is.read();
128 int delimIdx = 0; // number of delimiter characters read
129 while (intch!=-1) {
130 char ch = (char) intch;
131
132 if (state==0) {
133 if (ch == delimiter.charAt(delimIdx)) {
134 delimIdx++;
135 if (delimiter.length()==delimIdx) {
136 s = s.trim(); delimIdx = 0; // was s.trim() + ";"
137 // could check for all commands at http://dev.mysql.com/doc/refman/5.7/en/mysql-commands.html
138 // but let's just implement 'DELIMITER' for now
139 Matcher m = MYSQL_DELIMITER_COMMAND.matcher(s);
140 if (m.matches()) {
141 delimiter = m.group(1); // set the new delimiter
142 } else {
143 consumer.consume(s);
144 }
145 s="";
146 } else {
147 state = 0;
148 }
149 } else {
150 if (delimIdx > 0) {
151 // could push these back onto the inputStream in case the delimiter startsWith a " or -, but that seems a bit fiddly
152 s = s + delimiter.substring(0, delimIdx); delimIdx = 0;
153 }
154 switch(ch) {
155 case '"' : state = 1; s = s + ch; break;
156 case '-' : state = 2; break;
157 case '/' : state = 4; break;
158 case '\'' : state = 7; s = s + ch; break;
159 case '\r' :
160 case '\n' :
161 // if this is a delimiter command, process it
162 Matcher m = MYSQL_DELIMITER_COMMAND.matcher(s.trim());
163 if (m.matches()) {
164 delimiter = m.group(1); // set the new delimiter
165 s = ""; delimIdx = 0;
166 } else {
167 s = s + ch;
168 }
169 break;
170 case '\\': state = 8; s = s + ch; break;
171 default: s = s + ch;
172 }
173 }
174
175 } else if (state==1) {
176 switch(ch) {
177 case '"' : state = 0; s = s + ch; break;
178 case '\\': state = 9; s = s + ch; break;
179 default: s = s + ch;
180 }
181 } else if (state==2) {
182 switch(ch) {
183 case '-' : state = 3; break;
184 default: state = 0; s = s + "-" + ch;
185 }
186 } else if (state==3) {
187 switch(ch) {
188 case '\r' :
189 case '\n' :
190 state = 0;
191 if (includeComments) {
192 // include comments in the statements the comments appear in, unless this is a comment outside of a statement
193 s += "-- " + c.trim();
194 if (s.trim().startsWith("--")) {
195 consumer.consume(s.trim());
196 s = "";
197 } else {
198 s += "\n";
199 }
200 }
201 c = ""; break;
202 default :
203 c = c + ch;
204 }
205 } else if (state==4) {
206 switch(ch) {
207 case '*' : state = 5; break;
208 default: state = 0; s = s + "/" + ch;
209 }
210 } else if (state==5) {
211 switch(ch) {
212 case '*' : state = 6; break;
213 default: c = c + ch;
214 }
215 } else if (state==6) {
216 switch(ch) {
217 case '/' :
218 state = 0;
219 if (includeComments) {
220 // include comments in the statements the comments appear in, unless this is a comment outside of a statement
221 s += ("/* " + c.trim() + " */");
222 if (s.startsWith("/*")) {
223 consumer.consume(s);
224 s = "";
225 }
226 }
227 c=""; break;
228 default: c = c + "*" + ch;
229 }
230 } else if (state==7) {
231 switch(ch) {
232 case '\'' : state = 0; s = s + ch; break;
233 case '\\' : state = 10; s = s + ch; break;
234 default: s = s + ch;
235 }
236 } else if (state==8) {
237 // could check ch is printable here, but probably ok
238 s = s + ch;
239 state = 0;
240 } else if (state==9) {
241 s = s + ch;
242 state = 1;
243 } else if (state==10) {
244 s = s + ch;
245 state = 7;
246 }
247
248 intch = is.read();
249 }
250
251 if (state==5) {
252 // unclosed /*-style comment
253 // ignore for the time being
254 throw new ParseException("Unclosed /*-style comment before EOF", -1);
255 } else if (state==1) {
256 // unclosed quoted string
257 // ignore for the time being
258 throw new ParseException("Unclosed double quoted string before EOF", -1);
259 } else if (state==7) {
260 // unclosed quoted string
261 // ignore for the time being
262 throw new ParseException("Unclosed single quoted string before EOF", -1);
263 } else if (state==0) {
264 if (!s.trim().equals("")) {
265 // unterminated statement at end of InputStream; add to list
266 consumer.consume(s.trim());
267 }
268 }
269
270 // return allSql;
271 }
272
273
274
275 }