View Javadoc
1   package com.randomnoun.common.db;
2   
3   import java.io.IOException;
4   import java.io.InputStream;
5   import java.io.InputStreamReader;
6   import java.io.Reader;
7   import java.text.ParseException;
8   import java.util.ArrayList;
9   import java.util.List;
10  import java.util.regex.Matcher;
11  import java.util.regex.Pattern;
12  
13  /** A class to read an InputStream containing SQL statements (e.g. a MySQL input file) and split it into 
14   * individual SQL statements.
15   * 
16   * <p>Some MySQL command-line directives (e.g. 'DELIMITER') are processed whilst parsing. 
17   */
18  public class SqlParser {
19  	
20  	private static Pattern MYSQL_DELIMITER_COMMAND = Pattern.compile("delimiter\\s+(.*)$", Pattern.CASE_INSENSITIVE);
21  	
22  	// could have a ParseState that contains things like the current delimiter, or current database
23  	// (for 'use' commands), which can be modified by the InputStream, or the database vendor
24  	// if we wanted to support escape sequences, which appear to be database-specific.
25  	
26  	/** Convert an InputStream of SQL statements into a List of individual
27  	 * statements. Statements are delimited by ";" strings that occur outside of strings or comments.
28  	 * The delimiter string is not included in the returned list of statements.
29  	 * 
30  	 * <p>The delimiter string may be changed using the 'delimiter str' command. 
31  	 * This command may end with a newline instead of a delimiter.
32  	 * 
33  	 * <p>Comments may also be returned in the result.
34  	 * <p>Comments are defined by '-- to-end-of-line' or '/* within slash-star star-slash *&#42;/' syntax.
35  	 * Comments that are created with '--' and that occur within a statement are returned before that statement
36  	 * has finished parsing.
37  	 * 
38  	 * <p>NB: Does not handle escape sequences found within double or single quotes
39  	 * (i.e. escape sequences are handled by the underlying database, not by this Parser)
40  	 * 
41  	 * @param is inputStream to parse
42  	 * @param includeComments include comment strings in result
43  	 * 
44  	 * @throws IOException 
45  	 * @throws ParseException unclosed /*-style comment or single/double-quoted string
46  	 */
47  	public List<String> parseStatements(InputStream is, boolean includeComments) throws IOException, ParseException {
48  		return parseStatements(new InputStreamReader(is), includeComments);
49  	}
50  	
51  	/** Convert an InputStream of SQL statements into a List of individual
52  	 * statements. Statements are delimited by ";" strings that occur outside of strings or comments.
53  	 * The delimiter string is not included in the returned list of statements.
54  	 * 
55  	 * <p>The delimiter string may be changed using the 'delimiter str' command. 
56  	 * This command may end with a newline instead of a delimiter.
57  	 * 
58  	 * <p>Comments may also be returned in the result.
59  	 * <p>Comments are defined by '-- to-end-of-line' or '/* within slash-star star-slash *&#42;/' syntax.
60  	 * Comments that are created with '--' and that occur within a statement are returned before that statement
61  	 * has finished parsing.
62  	 * 
63  	 * <p>NB: Does not handle escape sequences found within double or single quotes
64  	 * (i.e. escape sequences are handled by the underlying database, not by this Parser)
65  	 * 
66  	 * @param reader Reader to parse
67  	 * @param includeComments include comment strings in result
68  	 * 
69  	 * @throws IOException 
70  	 * @throws ParseException unclosed /*-style comment or single/double-quoted string
71  	 */
72  	public List<String> parseStatements(Reader r, boolean includeComments) 
73  		throws IOException, ParseException 
74  	{
75  		final List<String> allSql = new ArrayList<String>();
76  		consumeStatements(r, includeComments, new Consumer<String>() {
77  			@Override
78  			public void consume(String s) {
79  				allSql.add(s);
80  			}
81  		});
82  		return allSql;
83  	}
84  
85  	public static interface Consumer<T> {
86  		public void consume(T s);
87  	}
88  	
89  	/** convert a text file of SQL statements into a List of individual
90  	 * statements. Comments may also be returned in the result.
91  	 * 
92  	 * <p>NB: Does not handle escape sequences found within double or single quotes
93  	 * 
94  	 * @param includeComments include comment strings in result
95  	 * 
96  	 * @throws IOException 
97  	 * @throws ParseException unclosed /*-style comment or single/double-quoted string
98  	 */
99  	public void consumeStatements(InputStream is, boolean includeComments, Consumer<String> consumer) 
100 			throws IOException, ParseException {
101 		consumeStatements(new InputStreamReader(is), includeComments, consumer);
102 	}
103 	
104 	public void consumeStatements(Reader is, boolean includeComments, Consumer<String> consumer) 
105 		throws IOException, ParseException 
106 	{
107 		// @TODO some error handling
108 		// List allSql = new ArrayList<String>();
109 		
110 		int state = 0; // command parsing [to states 0, 1, 2, 4, 7, 8]
111 		// 1 // parsed double quote, in double quotes [to states 0, 1, 9]
112 		// 2 // parsed - [to states 0, 3]
113 		// 3 // parsed -- [ to states 0, 3]
114 		// 4 // parsed /  [ to states 0, 5]
115 		// 5 // parsed /* [ to states 6, 5]
116 		// 6 // parsed * from state 5  [ to states 0, 5]
117 		// 7 // parse single quote, in single quotes [to states 0, 7, 10]
118 		// 8 // parsed backslash in SQL, next character will be emitted without changing state [then to state 0]
119 		// 9 // parsed backslash in double quote, next char emitted without changing state [then to state 1]
120 		// 10 // parsed backslash in single quote, next char emitted without changing state [then to state 7]
121 
122 		String s = "";   // current statement
123 		String snc = ""; // current statment without comments
124 		String c = "";   // current comment
125 		String delimiter = ";"; // default delimiter
126 
127 		int intch = is.read();
128 		int delimIdx = 0; // number of delimiter characters read
129 		while (intch!=-1) {
130 			char ch = (char) intch;
131 			
132 			if (state==0) {
133 				if (ch == delimiter.charAt(delimIdx)) {
134 					delimIdx++;
135 					if (delimiter.length()==delimIdx) {
136 						s = s.trim(); delimIdx = 0; // was s.trim() + ";"
137 						// could check for all commands at http://dev.mysql.com/doc/refman/5.7/en/mysql-commands.html
138 						// but let's just implement 'DELIMITER' for now
139 						Matcher m = MYSQL_DELIMITER_COMMAND.matcher(s);
140 						if (m.matches()) {
141 							delimiter = m.group(1); // set the new delimiter
142 						} else {
143 							consumer.consume(s); 
144 						}
145 						s="";
146 					} else {
147 						state = 0; 
148 					}
149 				} else {
150 					if (delimIdx > 0) {
151 						// could push these back onto the inputStream in case the delimiter startsWith a " or -, but that seems a bit fiddly
152 						s = s + delimiter.substring(0, delimIdx); delimIdx = 0; 
153 					}
154 					switch(ch) {
155 						case '"' : state = 1; s = s + ch; break;
156 						case '-' : state = 2; break;
157 						case '/' : state = 4; break;
158 						case '\'' : state = 7; s = s + ch; break;
159 						case '\r' :
160 						case '\n' :
161 							// if this is a delimiter command, process it
162 							Matcher m = MYSQL_DELIMITER_COMMAND.matcher(s.trim());
163 							if (m.matches()) {
164 								delimiter = m.group(1); // set the new delimiter
165 								s = ""; delimIdx = 0;
166 							} else {
167 								s = s + ch;
168 							}
169 							break;
170 						case '\\': state = 8; s = s + ch; break;							
171 						default: s = s + ch;
172 					}
173 				}
174 				
175 			} else if (state==1) {
176 				switch(ch) {
177 					case '"' : state = 0; s = s + ch; break;
178 					case '\\': state = 9; s = s + ch; break;
179 					default: s = s + ch;
180 				}
181 			} else if (state==2) {
182 				switch(ch) {
183 					case '-' : state = 3; break;
184 					default: state = 0; s = s + "-" + ch;
185 				}
186 			} else if (state==3) {
187 				switch(ch) {
188 					case '\r' : 
189 					case '\n' : 
190 						state = 0; 
191 						if (includeComments) {
192 							// include comments in the statements the comments appear in, unless this is a comment outside of a statement
193 							s += "-- " + c.trim();
194 							if (s.trim().startsWith("--")) {
195 								consumer.consume(s.trim());
196 								s = "";
197 							} else {
198 								s += "\n";
199 							}
200 						}
201 						c = ""; break;
202 					default :
203 						c = c + ch;
204 				}
205 			} else if (state==4) {
206 				switch(ch) {
207 					case '*' : state = 5; break;
208 					default: state = 0; s = s + "/" + ch;
209 				}
210 			} else if (state==5) {
211 				switch(ch) {
212 					case '*' : state = 6; break;
213 					default: c = c + ch;
214 				}
215 			} else if (state==6) {
216 				switch(ch) {
217 					case '/' : 
218 						state = 0; 
219 						if (includeComments) {
220 							// include comments in the statements the comments appear in, unless this is a comment outside of a statement
221 							s += ("/* " + c.trim() + " */");
222 							if (s.startsWith("/*")) {
223 								consumer.consume(s);
224 								s = "";
225 							}
226 						} 
227 						c=""; break;
228 					default: c = c + "*" + ch;
229 				}
230 			} else if (state==7) {
231 				switch(ch) {
232 					case '\'' : state = 0; s = s + ch; break;
233 					case '\\' : state = 10; s = s + ch; break;
234 					default: s = s + ch;
235 				}
236 			} else if (state==8) {
237 				// could check ch is printable here, but probably ok
238 				s = s + ch;
239 				state = 0;
240 			} else if (state==9) {
241 				s = s + ch;
242 				state = 1;
243 			} else if (state==10) {
244 				s = s + ch;
245 				state = 7;
246 			}
247 			
248 			intch = is.read();
249 		}
250 		
251 		if (state==5) {
252 			// unclosed /*-style comment
253 			// ignore for the time being
254 			throw new ParseException("Unclosed /*-style comment before EOF", -1);
255 		} else if (state==1) {
256 			// unclosed quoted string
257 			// ignore for the time being
258 			throw new ParseException("Unclosed double quoted string before EOF", -1);
259 		} else if (state==7) {
260 			// unclosed quoted string
261 			// ignore for the time being
262 			throw new ParseException("Unclosed single quoted string before EOF", -1);
263 		} else if (state==0) {
264 			if (!s.trim().equals("")) {
265 				// unterminated statement at end of InputStream; add to list
266 				consumer.consume(s.trim());
267 			}
268 		}
269 		
270 		// return allSql;
271 	}
272 
273 
274 
275 }