001package org.hl7.fhir.dstu3.utils; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import org.hl7.fhir.dstu3.model.ExpressionNode; 035import org.hl7.fhir.dstu3.model.ExpressionNode.SourceLocation; 036import org.hl7.fhir.exceptions.FHIRException; 037import org.hl7.fhir.utilities.Utilities; 038 039// shared lexer for concrete syntaxes 040// - FluentPath 041// - Mapping language 042 043public class FHIRLexer { 044 public class FHIRLexerException extends FHIRException { 045 046 public FHIRLexerException() { 047 super(); 048 } 049 050 public FHIRLexerException(String message, Throwable cause) { 051 super(message, cause); 052 } 053 054 public FHIRLexerException(String message) { 055 super(message); 056 } 057 058 public FHIRLexerException(Throwable cause) { 059 super(cause); 060 } 061 062 } 063 private String source; 064 private int cursor; 065 private int currentStart; 066 private String current; 067 private SourceLocation currentLocation; 068 private SourceLocation currentStartLocation; 069 private int id; 070 071 public FHIRLexer(String source) throws FHIRLexerException { 072 this.source = source; 073 currentLocation = new SourceLocation(1, 1); 074 next(); 075 } 076 public String getCurrent() { 077 return current; 078 } 079 public SourceLocation getCurrentLocation() { 080 return currentLocation; 081 } 082 083 public boolean isConstant(boolean incDoubleQuotes) { 084 return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 085 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 086 current.equals("true") || current.equals("false") || current.equals("{}"); 087 } 088 089 public boolean isStringConstant() { 090 return current.charAt(0) == '\'' || current.charAt(0) == '"'; 091 } 092 093 public String take() throws FHIRLexerException { 094 String s = current; 095 next(); 096 return s; 097 } 098 099 public int takeInt() throws FHIRLexerException { 100 String s = current; 101 if (!Utilities.isInteger(s)) 102 throw error("Found "+current+" expecting an integer"); 103 next(); 104 return Integer.parseInt(s); 105 } 106 107 public boolean isToken() { 108 if (Utilities.noString(current)) 109 return false; 110 111 if (current.startsWith("$")) 112 return true; 113 114 if (current.equals("*") || current.equals("**")) 115 return true; 116 117 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 118 for (int i = 1; i < current.length(); i++) 119 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 120 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 121 return false; 122 return true; 123 } 124 return false; 125 } 126 127 public FHIRLexerException error(String msg) { 128 return error(msg, currentLocation.toString()); 129 } 130 131 public FHIRLexerException error(String msg, String location) { 132 return new FHIRLexerException("Error at "+location+": "+msg); 133 } 134 135 public void next() throws FHIRLexerException { 136 current = null; 137 boolean last13 = false; 138 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) { 139 if (source.charAt(cursor) == '\r') { 140 currentLocation.setLine(currentLocation.getLine() + 1); 141 currentLocation.setColumn(1); 142 last13 = true; 143 } else if (!last13 && (source.charAt(cursor) == '\n')) { 144 currentLocation.setLine(currentLocation.getLine() + 1); 145 currentLocation.setColumn(1); 146 last13 = false; 147 } else { 148 last13 = false; 149 currentLocation.setColumn(currentLocation.getColumn() + 1); 150 } 151 cursor++; 152 } 153 currentStart = cursor; 154 currentStartLocation = currentLocation; 155 if (cursor < source.length()) { 156 char ch = source.charAt(cursor); 157 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 158 cursor++; 159 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')) 160 cursor++; 161 current = source.substring(currentStart, cursor); 162 } else if (ch == '.' ) { 163 cursor++; 164 if (cursor < source.length() && (source.charAt(cursor) == '.')) 165 cursor++; 166 current = source.substring(currentStart, cursor); 167 } else if (ch >= '0' && ch <= '9') { 168 cursor++; 169 boolean dotted = false; 170 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 171 if (source.charAt(cursor) == '.') 172 dotted = true; 173 cursor++; 174 } 175 if (source.charAt(cursor-1) == '.') 176 cursor--; 177 current = source.substring(currentStart, cursor); 178 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 179 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 180 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 181 cursor++; 182 current = source.substring(currentStart, cursor); 183 } else if (ch == '%') { 184 cursor++; 185 if (cursor < source.length() && (source.charAt(cursor) == '"')) { 186 cursor++; 187 while (cursor < source.length() && (source.charAt(cursor) != '"')) 188 cursor++; 189 cursor++; 190 } else 191 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 192 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 193 cursor++; 194 current = source.substring(currentStart, cursor); 195 } else if (ch == '/') { 196 cursor++; 197 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 198 cursor++; 199 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 200 cursor++; 201 } 202 current = source.substring(currentStart, cursor); 203 } else if (ch == '$') { 204 cursor++; 205 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 206 cursor++; 207 current = source.substring(currentStart, cursor); 208 } else if (ch == '{') { 209 cursor++; 210 ch = source.charAt(cursor); 211 if (ch == '}') 212 cursor++; 213 current = source.substring(currentStart, cursor); 214 } else if (ch == '"'){ 215 cursor++; 216 boolean escape = false; 217 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 218 if (escape) 219 escape = false; 220 else 221 escape = (source.charAt(cursor) == '\\'); 222 cursor++; 223 } 224 if (cursor == source.length()) 225 throw error("Unterminated string"); 226 cursor++; 227 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 228 } else if (ch == '\''){ 229 cursor++; 230 char ech = ch; 231 boolean escape = false; 232 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 233 if (escape) 234 escape = false; 235 else 236 escape = (source.charAt(cursor) == '\\'); 237 cursor++; 238 } 239 if (cursor == source.length()) 240 throw error("Unterminated string"); 241 cursor++; 242 current = source.substring(currentStart, cursor); 243 if (ech == '\'') 244 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 245 } else if (ch == '@'){ 246 cursor++; 247 while (cursor < source.length() && isDateChar(source.charAt(cursor))) 248 cursor++; 249 current = source.substring(currentStart, cursor); 250 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 251 cursor++; 252 current = source.substring(currentStart, cursor); 253 } 254 } 255 } 256 257 258 private boolean isDateChar(char ch) { 259 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch); 260 } 261 public boolean isOp() { 262 return ExpressionNode.Operation.fromCode(current) != null; 263 } 264 public boolean done() { 265 return currentStart >= source.length(); 266 } 267 public int nextId() { 268 id++; 269 return id; 270 } 271 public SourceLocation getCurrentStartLocation() { 272 return currentStartLocation; 273 } 274 275 // special case use 276 public void setCurrent(String current) { 277 this.current = current; 278 } 279 280 public boolean hasComment() { 281 return !done() && current.startsWith("//"); 282 } 283 public boolean hasToken(String kw) { 284 return !done() && kw.equals(current); 285 } 286 public boolean hasToken(String... names) { 287 if (done()) 288 return false; 289 for (String s : names) 290 if (s.equals(current)) 291 return true; 292 return false; 293 } 294 295 public void token(String kw) throws FHIRLexerException { 296 if (!kw.equals(current)) 297 throw error("Found \""+current+"\" expecting \""+kw+"\""); 298 next(); 299 } 300 301 public String readConstant(String desc) throws FHIRLexerException { 302 if (!isStringConstant()) 303 throw error("Found "+current+" expecting \"["+desc+"]\""); 304 305 return processConstant(take()); 306 } 307 308 public String processConstant(String s) throws FHIRLexerException { 309 StringBuilder b = new StringBuilder(); 310 int i = 1; 311 while (i < s.length()-1) { 312 char ch = s.charAt(i); 313 if (ch == '\\') { 314 i++; 315 switch (s.charAt(i)) { 316 case 't': 317 b.append('\t'); 318 break; 319 case 'r': 320 b.append('\r'); 321 break; 322 case 'n': 323 b.append('\n'); 324 break; 325 case 'f': 326 b.append('\f'); 327 break; 328 case '\'': 329 b.append('\''); 330 break; 331 case '\\': 332 b.append('\\'); 333 break; 334 case '/': 335 b.append('\\'); 336 break; 337 case 'u': 338 i++; 339 int uc = Integer.parseInt(s.substring(i, i+4), 16); 340 b.append((char) uc); 341 i = i + 4; 342 break; 343 default: 344 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 345 } 346 } else { 347 b.append(ch); 348 i++; 349 } 350 } 351 return b.toString(); 352 353 } 354 public void skipToken(String token) throws FHIRLexerException { 355 if (getCurrent().equals(token)) 356 next(); 357 358 } 359 public String takeDottedToken() throws FHIRLexerException { 360 StringBuilder b = new StringBuilder(); 361 b.append(take()); 362 while (!done() && getCurrent().equals(".")) { 363 b.append(take()); 364 b.append(take()); 365 } 366 return b.toString(); 367 } 368 369 void skipComments() throws FHIRLexerException { 370 while (!done() && hasComment()) 371 next(); 372 } 373 374}