001package com.randomnoun.common.jessop;
002
003/* (c) 2016 randomnoun. All Rights Reserved. This work is licensed under a
004 * BSD Simplified License. ( http://www.randomnoun.com/bsd-simplified.html ) 
005 */
006
007import javax.script.ScriptException;
008
009import org.apache.log4j.Logger;
010
011// could have some kind of lineCountingPrintWriter, but let's just keep that in the JSB class
012// going to use unix EOLs for everything for now
013// so should I have a Lexer here as well ? hmm. skip it for now.
014
015public class Tokeniser {
016
017        Logger logger = Logger.getLogger(Tokeniser.class);
018        int state;         // parse state
019        int charOffset;    // character number (from start of file); starts at 0
020        int line;          // source line number; starts at 1
021        int eline;         // expression start line. Whenever we emit anything, reset the eline to line
022        String unclosed;   // used in the error message if we hit EOF in an invalid state
023        StringBuilder sb;  // output stringBuilder
024        StringBuilder esb; // expression (or directive) stringBuilder
025
026        JessopScriptEngine jse;  // only used to access the registry of JessopScriptBuilders
027        JessopScriptBuilder jsb; // emit() methods are called on this during parsing
028        public Tokeniser(JessopScriptEngine jse, JessopScriptBuilder jsb) {
029                state = 0;
030                line = 1; eline = 1; 
031                charOffset = 0;
032                this.jse = jse;
033                this.jsb = jsb;
034                sb = new StringBuilder();
035                esb = new StringBuilder();
036        }
037        public void setJessopScriptBuilder(JessopScriptBuilder jsb) {
038                // use this to switch languages within the tokeniser
039                this.jsb = jsb;
040        }
041        public void parseChar(char ch) throws ScriptException {
042                charOffset++;
043                // logger.debug("state " + state + " ch " + ch );
044                switch (state) {
045                        case 0:  // initial state; parsing text to display
046                                if (ch=='<') {
047                                        state = 1;
048                                } else {
049                                        sb.append(ch);
050                                }
051                                break;
052                                
053                        case 1:  // parsed initial '<'
054                                if (ch=='%') {  // <% ... %> or <%= ... %>
055                                        if (sb.length()>0) {
056                                                jsb.emitText(eline, sb.toString());
057                                                sb.setLength(0);
058                                                eline = line;
059                                        }
060                                        unclosed = "<%";
061                                        state = 2;
062                                } else if (ch=='<') {
063                                        // normal '<' followed by an possible initial '<'
064                                        sb.append(ch);
065                                        unclosed = null;
066                                        state = 1;
067                                } else {
068                                        // just a normal tag
069                                        sb.append('<');
070                                        sb.append(ch);
071                                        unclosed = null;
072                                        state = 0;
073                                }
074                                break;
075                                
076                        case 2: // parsed initial '<%'
077                                if (ch == '=') {  // <%= ... %>
078                                        unclosed = "<%=";
079                                        state = 3;
080                                } else if (ch=='@') { // <%@ ... %> declaration 
081                                        unclosed = "<%@";
082                                        state = 5;
083                                } else if (ch=='!') { // <%! ... %> block
084                                        unclosed = "<%!";
085                                        state = 6;
086                                } else if (ch=='-') { // <%-- ... --%> block
087                                        unclosed = "<%--";
088                                        state = 7;
089                                } else {        // <%  ... %>   NB: no space required after '<%'
090                                        esb.append(ch);
091                                        state = 4;
092                                }
093                                break;
094                                
095                        case 3:
096                                if (ch=='%') {
097                                        state = 13;  // possibly closing % of <%= ... %> 
098                                } else {
099                                        esb.append(ch);
100                                }
101                                break;
102                                
103                        case 4:
104                                if (ch=='%') {
105                                        state = 14;  // possibly closing % of <% ... %> 
106                                } else {
107                                        esb.append(ch);
108                                }
109                                break;
110                                
111                        case 5:
112                                if (ch=='"') {
113                                        state = 16;  // start of directive attribute
114                                        esb.append(ch);
115                                } else if (ch=='%') { // closing % of <%@ ... %>
116                                        state = 15;
117                                } else {
118                                        esb.append(ch);
119                                }
120                                break;
121
122                        case 6:
123                                if (ch=='%') {
124                                        state = 16;  // possibly closing % of <%! ... %> 
125                                } else {
126                                        esb.append(ch);
127                                }
128                                break;
129                                
130                        case 7:
131                                if (ch=='-') {
132                                        state = 8;   // second '-' of starting <%-- ... --%>
133                                } else {
134                                        // could say that this is in state 4; e.g. <%-someFunction%>
135                                        // but I'm going to chuck an exception
136                                        throw new ScriptException("'<%-' can only start a '<%--' comment block", null, line);  // charOffset
137                                }
138                                break;
139                                
140                        case 8:
141                                if (ch=='-') {
142                                        state = 9;   // possibly close '-' of <%-- ... --%>
143                                } else {
144                                        // stay in state 8
145                                        // ignore comments
146                                }
147                                break;
148                                
149                        case 9:
150                                if (ch=='-') {
151                                        state = 10;  // possibly closing '--' of <%-- ... --%>
152                                } else {
153                                        state = 8;
154                                        // ignore comments
155                                }
156                                break;
157                        
158                        case 10:
159                                if (ch=='%') {   // possibly closing '--%' of <%-- ... --%>
160                                        state = 11;
161                                } else {
162                                        state = 8;
163                                        // ignore comments
164                                }
165                                
166                        case 11:
167                                if (ch=='>') {   // closing '--%>' of <%-- ... --%>
168                                        unclosed = null;
169                                        state = 0;
170                                } else {
171                                        state = 8; 
172                                }
173                                break;
174                                
175                        case 13:
176                                if (ch=='>') {   // closing '%>' of <%= ... %>
177                                        jsb.emitExpression(eline, esb.toString());
178                                        esb.setLength(0);
179                                        eline = line;
180                                        unclosed = null;
181                                        state = 0;
182                                } else {
183                                        esb.append(ch);
184                                        state = 3; 
185                                }
186                                break;
187                        
188                        case 14:
189                                if (ch=='>') {   // closing '%>' of <% ... %>
190                                        jsb.emitScriptlet(eline, esb.toString());
191                                        esb.setLength(0);
192                                        eline = line;
193                                        unclosed = null;
194                                        state = 0;
195                                } else {
196                                        esb.append(ch);
197                                        state = 4; 
198                                }
199                                break;
200                                
201                        case 15:
202                                if (ch=='>') {   // closing '%>' of <%@ ... %> declaration
203                                        jsb.emitDeclaration(eline, esb.toString());
204                                        esb.setLength(0);
205                                        eline = line;
206                                        unclosed = null;
207                                        state = 0;
208                                } else {
209                                        esb.append(ch);
210                                        state = 5; 
211                                }
212                                break;
213                        
214                        case 16:
215                                if (ch=='"') {   // closing quote of <%@ ... %> declaration attribute
216                                        esb.append(ch);
217                                        state = 5;
218                                } else {
219                                        esb.append(ch);
220                                        // stay in state 16
221                                }
222                                break;
223                }
224                
225                if (ch=='\n') { line++; }                       
226        }
227        
228        public void parseEndOfFile() throws ScriptException {
229                // emit anything that's left, raise exceptions if in invalid state
230                // logger.debug("state " + state + " EOF");
231                if (state != 0 && unclosed != null) {
232                        throw new ScriptException("unexpected EOF (unclosed '" + unclosed + "')", null, line); // charOffset
233                }
234                if (sb.length()>0) {
235                        jsb.emitText(eline, sb.toString());
236                        sb.setLength(0);
237                        eline = line;
238                }
239        }
240}