001package com.randomnoun.common.jessop; 002 003/* (c) 2016 randomnoun. All Rights Reserved. This work is licensed under a 004 * BSD Simplified License. ( http://www.randomnoun.com/bsd-simplified.html ) 005 */ 006 007import javax.script.ScriptException; 008 009import org.apache.log4j.Logger; 010 011// could have some kind of lineCountingPrintWriter, but let's just keep that in the JSB class 012// going to use unix EOLs for everything for now 013// so should I have a Lexer here as well ? hmm. skip it for now. 014 015public class Tokeniser { 016 017 Logger logger = Logger.getLogger(Tokeniser.class); 018 int state; // parse state 019 int charOffset; // character number (from start of file); starts at 0 020 int line; // source line number; starts at 1 021 int eline; // expression start line. Whenever we emit anything, reset the eline to line 022 String unclosed; // used in the error message if we hit EOF in an invalid state 023 StringBuilder sb; // output stringBuilder 024 StringBuilder esb; // expression (or directive) stringBuilder 025 026 JessopScriptEngine jse; // only used to access the registry of JessopScriptBuilders 027 JessopScriptBuilder jsb; // emit() methods are called on this during parsing 028 public Tokeniser(JessopScriptEngine jse, JessopScriptBuilder jsb) { 029 state = 0; 030 line = 1; eline = 1; 031 charOffset = 0; 032 this.jse = jse; 033 this.jsb = jsb; 034 sb = new StringBuilder(); 035 esb = new StringBuilder(); 036 } 037 public void setJessopScriptBuilder(JessopScriptBuilder jsb) { 038 // use this to switch languages within the tokeniser 039 this.jsb = jsb; 040 } 041 public void parseChar(char ch) throws ScriptException { 042 charOffset++; 043 // logger.debug("state " + state + " ch " + ch ); 044 switch (state) { 045 case 0: // initial state; parsing text to display 046 if (ch=='<') { 047 state = 1; 048 } else { 049 sb.append(ch); 050 } 051 break; 052 053 case 1: // parsed initial '<' 054 if (ch=='%') { // <% ... %> or <%= ... %> 055 if (sb.length()>0) { 056 jsb.emitText(eline, sb.toString()); 057 sb.setLength(0); 058 eline = line; 059 } 060 unclosed = "<%"; 061 state = 2; 062 } else if (ch=='<') { 063 // normal '<' followed by an possible initial '<' 064 sb.append(ch); 065 unclosed = null; 066 state = 1; 067 } else { 068 // just a normal tag 069 sb.append('<'); 070 sb.append(ch); 071 unclosed = null; 072 state = 0; 073 } 074 break; 075 076 case 2: // parsed initial '<%' 077 if (ch == '=') { // <%= ... %> 078 unclosed = "<%="; 079 state = 3; 080 } else if (ch=='@') { // <%@ ... %> declaration 081 unclosed = "<%@"; 082 state = 5; 083 } else if (ch=='!') { // <%! ... %> block 084 unclosed = "<%!"; 085 state = 6; 086 } else if (ch=='-') { // <%-- ... --%> block 087 unclosed = "<%--"; 088 state = 7; 089 } else { // <% ... %> NB: no space required after '<%' 090 esb.append(ch); 091 state = 4; 092 } 093 break; 094 095 case 3: 096 if (ch=='%') { 097 state = 13; // possibly closing % of <%= ... %> 098 } else { 099 esb.append(ch); 100 } 101 break; 102 103 case 4: 104 if (ch=='%') { 105 state = 14; // possibly closing % of <% ... %> 106 } else { 107 esb.append(ch); 108 } 109 break; 110 111 case 5: 112 if (ch=='"') { 113 state = 16; // start of directive attribute 114 esb.append(ch); 115 } else if (ch=='%') { // closing % of <%@ ... %> 116 state = 15; 117 } else { 118 esb.append(ch); 119 } 120 break; 121 122 case 6: 123 if (ch=='%') { 124 state = 16; // possibly closing % of <%! ... %> 125 } else { 126 esb.append(ch); 127 } 128 break; 129 130 case 7: 131 if (ch=='-') { 132 state = 8; // second '-' of starting <%-- ... --%> 133 } else { 134 // could say that this is in state 4; e.g. <%-someFunction%> 135 // but I'm going to chuck an exception 136 throw new ScriptException("'<%-' can only start a '<%--' comment block", null, line); // charOffset 137 } 138 break; 139 140 case 8: 141 if (ch=='-') { 142 state = 9; // possibly close '-' of <%-- ... --%> 143 } else { 144 // stay in state 8 145 // ignore comments 146 } 147 break; 148 149 case 9: 150 if (ch=='-') { 151 state = 10; // possibly closing '--' of <%-- ... --%> 152 } else { 153 state = 8; 154 // ignore comments 155 } 156 break; 157 158 case 10: 159 if (ch=='%') { // possibly closing '--%' of <%-- ... --%> 160 state = 11; 161 } else { 162 state = 8; 163 // ignore comments 164 } 165 166 case 11: 167 if (ch=='>') { // closing '--%>' of <%-- ... --%> 168 unclosed = null; 169 state = 0; 170 } else { 171 state = 8; 172 } 173 break; 174 175 case 13: 176 if (ch=='>') { // closing '%>' of <%= ... %> 177 jsb.emitExpression(eline, esb.toString()); 178 esb.setLength(0); 179 eline = line; 180 unclosed = null; 181 state = 0; 182 } else { 183 esb.append(ch); 184 state = 3; 185 } 186 break; 187 188 case 14: 189 if (ch=='>') { // closing '%>' of <% ... %> 190 jsb.emitScriptlet(eline, esb.toString()); 191 esb.setLength(0); 192 eline = line; 193 unclosed = null; 194 state = 0; 195 } else { 196 esb.append(ch); 197 state = 4; 198 } 199 break; 200 201 case 15: 202 if (ch=='>') { // closing '%>' of <%@ ... %> declaration 203 jsb.emitDeclaration(eline, esb.toString()); 204 esb.setLength(0); 205 eline = line; 206 unclosed = null; 207 state = 0; 208 } else { 209 esb.append(ch); 210 state = 5; 211 } 212 break; 213 214 case 16: 215 if (ch=='"') { // closing quote of <%@ ... %> declaration attribute 216 esb.append(ch); 217 state = 5; 218 } else { 219 esb.append(ch); 220 // stay in state 16 221 } 222 break; 223 } 224 225 if (ch=='\n') { line++; } 226 } 227 228 public void parseEndOfFile() throws ScriptException { 229 // emit anything that's left, raise exceptions if in invalid state 230 // logger.debug("state " + state + " EOF"); 231 if (state != 0 && unclosed != null) { 232 throw new ScriptException("unexpected EOF (unclosed '" + unclosed + "')", null, line); // charOffset 233 } 234 if (sb.length()>0) { 235 jsb.emitText(eline, sb.toString()); 236 sb.setLength(0); 237 eline = line; 238 } 239 } 240}