001package com.randomnoun.common.db; 002 003import java.io.IOException; 004import java.io.InputStream; 005import java.io.InputStreamReader; 006import java.io.Reader; 007import java.text.ParseException; 008import java.util.ArrayList; 009import java.util.List; 010import java.util.regex.Matcher; 011import java.util.regex.Pattern; 012 013/** A class to read an InputStream containing SQL statements (e.g. a MySQL input file) and split it into 014 * individual SQL statements. 015 * 016 * <p>Some MySQL command-line directives (e.g. 'DELIMITER') are processed whilst parsing. 017 */ 018public class SqlParser { 019 020 private static Pattern MYSQL_DELIMITER_COMMAND = Pattern.compile("delimiter\\s+(.*)$", Pattern.CASE_INSENSITIVE); 021 022 // could have a ParseState that contains things like the current delimiter, or current database 023 // (for 'use' commands), which can be modified by the InputStream, or the database vendor 024 // if we wanted to support escape sequences, which appear to be database-specific. 025 026 /** Convert an InputStream of SQL statements into a List of individual 027 * statements. Statements are delimited by ";" strings that occur outside of strings or comments. 028 * The delimiter string is not included in the returned list of statements. 029 * 030 * <p>The delimiter string may be changed using the 'delimiter str' command. 031 * This command may end with a newline instead of a delimiter. 032 * 033 * <p>Comments may also be returned in the result. 034 * <p>Comments are defined by '-- to-end-of-line' or '/* within slash-star star-slash **/' syntax. 035 * Comments that are created with '--' and that occur within a statement are returned before that statement 036 * has finished parsing. 037 * 038 * <p>NB: Does not handle escape sequences found within double or single quotes 039 * (i.e. escape sequences are handled by the underlying database, not by this Parser) 040 * 041 * @param is inputStream to parse 042 * @param includeComments include comment strings in result 043 * 044 * @throws IOException 045 * @throws ParseException unclosed /*-style comment or single/double-quoted string 046 */ 047 public List<String> parseStatements(InputStream is, boolean includeComments) throws IOException, ParseException { 048 return parseStatements(new InputStreamReader(is), includeComments); 049 } 050 051 /** Convert an InputStream of SQL statements into a List of individual 052 * statements. Statements are delimited by ";" strings that occur outside of strings or comments. 053 * The delimiter string is not included in the returned list of statements. 054 * 055 * <p>The delimiter string may be changed using the 'delimiter str' command. 056 * This command may end with a newline instead of a delimiter. 057 * 058 * <p>Comments may also be returned in the result. 059 * <p>Comments are defined by '-- to-end-of-line' or '/* within slash-star star-slash **/' syntax. 060 * Comments that are created with '--' and that occur within a statement are returned before that statement 061 * has finished parsing. 062 * 063 * <p>NB: Does not handle escape sequences found within double or single quotes 064 * (i.e. escape sequences are handled by the underlying database, not by this Parser) 065 * 066 * @param reader Reader to parse 067 * @param includeComments include comment strings in result 068 * 069 * @throws IOException 070 * @throws ParseException unclosed /*-style comment or single/double-quoted string 071 */ 072 public List<String> parseStatements(Reader r, boolean includeComments) 073 throws IOException, ParseException 074 { 075 final List<String> allSql = new ArrayList<String>(); 076 consumeStatements(r, includeComments, new Consumer<String>() { 077 @Override 078 public void consume(String s) { 079 allSql.add(s); 080 } 081 }); 082 return allSql; 083 } 084 085 public static interface Consumer<T> { 086 public void consume(T s); 087 } 088 089 /** convert a text file of SQL statements into a List of individual 090 * statements. Comments may also be returned in the result. 091 * 092 * <p>NB: Does not handle escape sequences found within double or single quotes 093 * 094 * @param includeComments include comment strings in result 095 * 096 * @throws IOException 097 * @throws ParseException unclosed /*-style comment or single/double-quoted string 098 */ 099 public void consumeStatements(InputStream is, boolean includeComments, Consumer<String> consumer) 100 throws IOException, ParseException { 101 consumeStatements(new InputStreamReader(is), includeComments, consumer); 102 } 103 104 public void consumeStatements(Reader is, boolean includeComments, Consumer<String> consumer) 105 throws IOException, ParseException 106 { 107 // @TODO some error handling 108 // List allSql = new ArrayList<String>(); 109 110 int state = 0; // command parsing [to states 0, 1, 2, 4, 7, 8] 111 // 1 // parsed double quote, in double quotes [to states 0, 1, 9] 112 // 2 // parsed - [to states 0, 3] 113 // 3 // parsed -- [ to states 0, 3] 114 // 4 // parsed / [ to states 0, 5] 115 // 5 // parsed /* [ to states 6, 5] 116 // 6 // parsed * from state 5 [ to states 0, 5] 117 // 7 // parse single quote, in single quotes [to states 0, 7, 10] 118 // 8 // parsed backslash in SQL, next character will be emitted without changing state [then to state 0] 119 // 9 // parsed backslash in double quote, next char emitted without changing state [then to state 1] 120 // 10 // parsed backslash in single quote, next char emitted without changing state [then to state 7] 121 122 String s = ""; // current statement 123 String snc = ""; // current statment without comments 124 String c = ""; // current comment 125 String delimiter = ";"; // default delimiter 126 127 int intch = is.read(); 128 int delimIdx = 0; // number of delimiter characters read 129 while (intch!=-1) { 130 char ch = (char) intch; 131 132 if (state==0) { 133 if (ch == delimiter.charAt(delimIdx)) { 134 delimIdx++; 135 if (delimiter.length()==delimIdx) { 136 s = s.trim(); delimIdx = 0; // was s.trim() + ";" 137 // could check for all commands at http://dev.mysql.com/doc/refman/5.7/en/mysql-commands.html 138 // but let's just implement 'DELIMITER' for now 139 Matcher m = MYSQL_DELIMITER_COMMAND.matcher(s); 140 if (m.matches()) { 141 delimiter = m.group(1); // set the new delimiter 142 } else { 143 consumer.consume(s); 144 } 145 s=""; 146 } else { 147 state = 0; 148 } 149 } else { 150 if (delimIdx > 0) { 151 // could push these back onto the inputStream in case the delimiter startsWith a " or -, but that seems a bit fiddly 152 s = s + delimiter.substring(0, delimIdx); delimIdx = 0; 153 } 154 switch(ch) { 155 case '"' : state = 1; s = s + ch; break; 156 case '-' : state = 2; break; 157 case '/' : state = 4; break; 158 case '\'' : state = 7; s = s + ch; break; 159 case '\r' : 160 case '\n' : 161 // if this is a delimiter command, process it 162 Matcher m = MYSQL_DELIMITER_COMMAND.matcher(s.trim()); 163 if (m.matches()) { 164 delimiter = m.group(1); // set the new delimiter 165 s = ""; delimIdx = 0; 166 } else { 167 s = s + ch; 168 } 169 break; 170 case '\\': state = 8; s = s + ch; break; 171 default: s = s + ch; 172 } 173 } 174 175 } else if (state==1) { 176 switch(ch) { 177 case '"' : state = 0; s = s + ch; break; 178 case '\\': state = 9; s = s + ch; break; 179 default: s = s + ch; 180 } 181 } else if (state==2) { 182 switch(ch) { 183 case '-' : state = 3; break; 184 default: state = 0; s = s + "-" + ch; 185 } 186 } else if (state==3) { 187 switch(ch) { 188 case '\r' : 189 case '\n' : 190 state = 0; 191 if (includeComments) { 192 // include comments in the statements the comments appear in, unless this is a comment outside of a statement 193 s += "-- " + c.trim(); 194 if (s.trim().startsWith("--")) { 195 consumer.consume(s.trim()); 196 s = ""; 197 } else { 198 s += "\n"; 199 } 200 } 201 c = ""; break; 202 default : 203 c = c + ch; 204 } 205 } else if (state==4) { 206 switch(ch) { 207 case '*' : state = 5; break; 208 default: state = 0; s = s + "/" + ch; 209 } 210 } else if (state==5) { 211 switch(ch) { 212 case '*' : state = 6; break; 213 default: c = c + ch; 214 } 215 } else if (state==6) { 216 switch(ch) { 217 case '/' : 218 state = 0; 219 if (includeComments) { 220 // include comments in the statements the comments appear in, unless this is a comment outside of a statement 221 s += ("/* " + c.trim() + " */"); 222 if (s.startsWith("/*")) { 223 consumer.consume(s); 224 s = ""; 225 } 226 } 227 c=""; break; 228 default: c = c + "*" + ch; 229 } 230 } else if (state==7) { 231 switch(ch) { 232 case '\'' : state = 0; s = s + ch; break; 233 case '\\' : state = 10; s = s + ch; break; 234 default: s = s + ch; 235 } 236 } else if (state==8) { 237 // could check ch is printable here, but probably ok 238 s = s + ch; 239 state = 0; 240 } else if (state==9) { 241 s = s + ch; 242 state = 1; 243 } else if (state==10) { 244 s = s + ch; 245 state = 7; 246 } 247 248 intch = is.read(); 249 } 250 251 if (state==5) { 252 // unclosed /*-style comment 253 // ignore for the time being 254 throw new ParseException("Unclosed /*-style comment before EOF", -1); 255 } else if (state==1) { 256 // unclosed quoted string 257 // ignore for the time being 258 throw new ParseException("Unclosed double quoted string before EOF", -1); 259 } else if (state==7) { 260 // unclosed quoted string 261 // ignore for the time being 262 throw new ParseException("Unclosed single quoted string before EOF", -1); 263 } else if (state==0) { 264 if (!s.trim().equals("")) { 265 // unterminated statement at end of InputStream; add to list 266 consumer.consume(s.trim()); 267 } 268 } 269 270 // return allSql; 271 } 272 273 274 275}