{ (* * fc_ast_lex.mll -- Lexer for functional C * * Ling Li, Xin Yu * Feb. 23, 2001, Caltech *) open Symbol open Fc_ast_parse open Fc_ast_type (* * File position. *) let current_line = ref 1 let current_schar = ref 0 (* * Advance a line. *) let set_next_line lexbuf = incr current_line; current_schar := Lexing.lexeme_end lexbuf (* * Get the position of the current lexeme. * We assume it is all on one line. *) let set_lexeme_position lexbuf = let line = !current_line in let schar = Lexing.lexeme_start lexbuf - !current_schar in let echar = Lexing.lexeme_end lexbuf - !current_schar in let file = Fc_ast_state.current_file () in let pos = file, line, schar, line, echar in Fc_ast_state.set_current_position pos; pos (* Extract octal number from string. (Example: "017" -> 15) *) let oct_of_string s = int_of_string ("0o" ^ s) (* Convert special char (string) *) let convert_char = function | "\\n" -> '\n' | "\\b" -> '\b' | "\\r" -> '\r' | "\\t" -> '\t' | "\\\\" -> '\\' | "\\\"" -> '"' | "\\'" -> '\'' | s -> (* '\\' ['0'-'7'] ['0'-'7']? ['0'-'7']? *) let ord = oct_of_string (String.sub s 1 (String.length s - 1)) in char_of_int ord } (* Regular expressions *) let white = [' ' '\t' '\r']+ (* '\r': ^M in DOS format file *) let newline = '\n' let alpha = ['A'-'Z' 'a'-'z'] let digit = ['0'-'9'] let xdigit = digit | ['a'-'f' 'A'-'F'] let alnum = alpha | digit (* Comments *) let c_comment = "/*" let comm_end = "*/" let cpp_comment = "//" [^ '\n']* (* Identifier *) let ident = ('_' | alpha) ('_' | alnum)* (* Numbers *) let dec = digit+ let oct = '0' ['0'-'7']* let hex = '0' ['x' 'X'] xdigit+ let exp = ['e' 'E'] ('+' | '-')? dec let float1 = (dec '.' digit* | '.' dec) exp? let float2 = dec exp (* Character Constants *) let single_char = '\'' [^ '\n' '\''] '\'' let special_char = "\\n" | "\\b" | "\\r" | "\\t" | "\\\\" | "\\\"" | "\\'" | ('\\' ['0'-'7'] ['0'-'7']? ['0'-'7']?) (* Special symbols *) let special_sym = [';' '+' '-' '*' '/' '%'] | ['=' '>' '<' '!' '|' '&' '^' '~'] | ['(' ')' '[' ']' '{' '}' ',' '?' ':' '.'] | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "^=" | "|=" | "==" | "!=" | ">=" | "<=" | "<<" | ">>" | "++" | "--" | "||" | "&&" | "->" | "<<=" | ">>=" | "sizeof" (* Main lexer *) rule main = parse (* Ignore white space & comments*) | white | cpp_comment { main lexbuf } | newline { (* Keep track of the current line number *) set_next_line lexbuf; main lexbuf } | c_comment { (* Continue until match comm_end *) comment lexbuf } (* numbers *) | oct { let pos = set_lexeme_position lexbuf in TokInt (oct_of_string (Lexing.lexeme lexbuf), pos) } | dec | hex { let pos = set_lexeme_position lexbuf in TokInt (int_of_string (Lexing.lexeme lexbuf), pos) } | float1 | float2 { let pos = set_lexeme_position lexbuf in TokFloat (float_of_string (Lexing.lexeme lexbuf), pos) } (* Character constants *) | single_char { let pos = set_lexeme_position lexbuf in TokChar ((Lexing.lexeme lexbuf).[1], pos) } | '\'' special_char '\'' { let pos = set_lexeme_position lexbuf in let s = Lexing.lexeme lexbuf in TokChar (convert_char (String.sub s 1 (String.length s - 2)), pos) } (* String constants *) | '"' { let pos1 = set_lexeme_position lexbuf in let strn = string_lex lexbuf in let pos2 = set_lexeme_position lexbuf in TokString (strn, (Fc_ast_util.union_pos pos1 pos2)) } (* Identifiers, including keywords & built-in types *) | ident { let pos = set_lexeme_position lexbuf in (* Keywords & built-in types *) match Lexing.lexeme lexbuf with | "typedef" -> TokTypedef pos | "if" -> TokIf pos | "else" -> TokElse pos | "for" -> TokFor pos | "while" -> TokWhile pos | "return" -> TokReturn pos | "break" -> TokBreak pos | "struct" -> TokStruct pos | "char" -> TokTypeChar pos | "int" -> TokTypeInt pos | "float" -> TokTypeFloat pos | "bool" -> TokTypeBool pos | id -> (* Either a variable or a type name *) let sym = Symbol.add id in if Fc_ast_state.is_type sym then TokTypeId (sym, pos) else TokId (sym, pos) } (* Special symbols *) | special_sym { let pos = set_lexeme_position lexbuf in match Lexing.lexeme lexbuf with | ";" -> TokSemi pos | "+" -> TokPlus pos | "-" -> TokMinus pos | "*" -> TokStar pos | "/" -> TokSlash pos | "%" -> TokMod pos | "++" -> TokIncr pos | "--" -> TokDecr pos | "=" -> TokAssign pos | "+=" -> TokPlusEq pos | "-=" -> TokMinusEq pos | "*=" -> TokStarEq pos | "/=" -> TokSlashEq pos | "%=" -> TokModEq pos | "||" -> TokLOr pos | "&&" -> TokLAnd pos | "==" -> TokEq pos | "!=" -> TokNotEq pos | ">" -> TokGt pos | ">=" -> TokGe pos | "<" -> TokLt pos | "<=" -> TokLe pos | "!" -> TokNot pos | "|" -> TokBOr pos | "&" -> TokBAnd pos | "^" -> TokBXor pos | "~" -> TokBNot pos | "|=" -> TokBOrEq pos | "&=" -> TokBAndEq pos | "^=" -> TokBXorEq pos | "<<" -> TokLShift pos | ">>" -> TokRShift pos | "<<="-> TokLShiftEq pos | ">>="-> TokRShiftEq pos | "(" -> TokLeftParen pos | ")" -> TokRightParen pos | "[" -> TokLeftBrack pos | "]" -> TokRightBrack pos | "{" -> TokLeftBrace pos | "}" -> TokRightBrace pos | "," -> TokComma pos | "?" -> TokQues pos | ":" -> TokColon pos | "." -> TokDot pos | "->" -> TokArrow pos | "sizeof" -> TokSizeof pos | s -> raise (ParseError (pos, s ^ " not implemented")) } (* If all else fails, this is an error. *) | _ { let pos = set_lexeme_position lexbuf in let s = Lexing.lexeme lexbuf in raise (ParseError (pos, Printf.sprintf "illegal character '%s'" (String.escaped s))) } (* End of file. *) | eof { let pos = set_lexeme_position lexbuf in TokEof pos } and comment = parse | newline { set_next_line lexbuf; comment lexbuf } | comm_end { main lexbuf } | _ { comment lexbuf } | eof { let pos = set_lexeme_position lexbuf in raise (ParseError (pos, "unterminated comment")) } and string_lex = parse | '"' '"' { "\"" ^ string_lex lexbuf } | '"' { "" } | special_char { let s = Lexing.lexeme lexbuf in String.make 1 (convert_char s) ^ string_lex lexbuf } | _ { let s = Lexing.lexeme lexbuf in s ^ string_lex lexbuf }