The Sol Programming Language!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tokenizer.lex 5.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. %{
  2. #define YYSTYPE void *
  3. #include "ast.h"
  4. #include "parser.tab.h"
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include <stdio.h>
  8. void yyerror(YYLTYPE *, stmt_node **, char *);
  9. int yywrap(void);
  10. char *str, *curptr;
  11. int cursz, chars;
  12. #define SZMUL 128
  13. void str_init(void) {
  14. str = malloc(SZMUL);
  15. curptr = str;
  16. cursz = SZMUL;
  17. chars = 0;
  18. }
  19. void str_putc(char c) {
  20. *curptr++ = c;
  21. chars++;
  22. if(chars >= cursz) {
  23. str = realloc(str, cursz + SZMUL);
  24. curptr = str + chars;
  25. cursz += SZMUL;
  26. }
  27. }
  28. /* http://stackoverflow.com/questions/656703/how-does-flex-support-bison-location-exactly */
  29. /* Many thanks to hugomg and David Elson! */
  30. static void update_loc(YYLTYPE *yylloc, char *yytext){
  31. int curr_line;
  32. int curr_col;
  33. curr_line = yylloc->first_line = yylloc->last_line;
  34. curr_col = yylloc->first_column = yylloc->last_column;
  35. {char * s; for(s = yytext; *s != '\0'; s++){
  36. if(*s == '\n'){
  37. curr_line++;
  38. curr_col = 1;
  39. }else{
  40. curr_col++;
  41. }
  42. }}
  43. yylloc->last_line = curr_line;
  44. yylloc->last_column = curr_col-1;
  45. }
  46. #define YY_USER_ACTION update_loc(yylloc, yytext);
  47. %}
  48. DIGIT [0-9]
  49. HEXDIGIT [0-9a-fA-F]
  50. ALPHA [a-zA-Z]
  51. IDENT [a-zA-Z_][a-zA-Z0-9_]*
  52. /* This is the right way to do it, but it keeps generating token $undefined.
  53. %x STRING
  54. \" { str_init(); BEGIN STRING; }
  55. <STRING>\\n { str_putc('\n'); }
  56. <STRING>\\t { str_putc('\t'); }
  57. <STRING>\\b { str_putc('\b'); }
  58. <STRING>\\r { str_putc('\r'); }
  59. <STRING>\\x{HEXDIGIT}{HEXDIGIT} { str_putc(strtol(yytext+2, NULL, 16)); }
  60. <STRING>\\\" { str_putc('"'); }
  61. <STRING>\" { str_putc('\0'); yylval = str; BEGIN 0; return STRING; }
  62. <STRING>. { str_putc(*yytext); }
  63. */
  64. %option bison-bridge bison-locations
  65. %%
  66. {DIGIT}+"."{DIGIT}* { *yylval = malloc(sizeof(double)); *((double *) *yylval) = atof(yytext); return FLOAT; }
  67. {DIGIT}+ { *yylval = malloc(sizeof(long)); *((long *) *yylval) = atol(yytext); return INT; }
  68. \"[^"]*\" |
  69. \'[^']*\' { *yylval = malloc(sizeof(unsigned long) + (yyleng - 2) * sizeof(char)); *((unsigned long *) *yylval) = yyleng - 2; memcpy(((char *) *yylval) + sizeof(unsigned long), yytext + 1, yyleng - 2); return STRING; }
  70. if { return IF; }
  71. then { return THEN; }
  72. else { return ELSE; }
  73. elseif { return ELSEIF; }
  74. while { return WHILE; }
  75. for { return FOR; }
  76. in { return IN; }
  77. do { return DO; }
  78. func { return FUNC; }
  79. macro { return MACRO; }
  80. lambda { return LAMBDA; }
  81. return { return RETURN; }
  82. break { return BREAK; }
  83. continue { return CONTINUE; }
  84. end { return END; }
  85. None { return NONE; }
  86. "+" { return PLUS; }
  87. "-" { return MINUS; }
  88. "*" { return STAR; }
  89. "/" { return SLASH; }
  90. "%" { return PERCENT; }
  91. "mod" { return PERCENT; }
  92. "**" { return DSTAR; }
  93. "&" { return BAND; }
  94. "|" { return BOR; }
  95. "^" { return BXOR; }
  96. "~" { return BNOT; }
  97. "&&" { return LAND; }
  98. "and" { return LAND; }
  99. "||" { return LOR; }
  100. "or" { return LOR; }
  101. "!" { return LNOT; }
  102. "not" { return LNOT; }
  103. "true" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 1; return INT; }
  104. "True" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 1; return INT; }
  105. "false" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 0; return INT; }
  106. "False" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 0; return INT; }
  107. "=" { return ASSIGN; }
  108. "+=" { return ASSIGNPLUS; }
  109. "-=" { return ASSIGNMINUS; }
  110. "*=" { return ASSIGNSTAR; }
  111. "/=" { return ASSIGNSLASH; }
  112. "**=" { return ASSIGNDSTAR; }
  113. "&=" { return ASSIGNBAND; }
  114. "|=" { return ASSIGNBOR; }
  115. "^=" { return ASSIGNBXOR; }
  116. "==" { return EQUAL; }
  117. "!=" { return NEQUAL; }
  118. "<" { return LESS; }
  119. ">" { return GREATER; }
  120. "<=" { return LESSEQ; }
  121. ">=" { return GREATEREQ; }
  122. ">>" { return RSHIFT; }
  123. "<<" { return LSHIFT; }
  124. "{" { return LBRACE; }
  125. "}" { return RBRACE; }
  126. "[" { return LBRACKET; }
  127. "]" { return RBRACKET; }
  128. ^[ \t]*"(" { return BLPAREN; } /* "Breaking" paren, not allowed to introduce a call_expr */
  129. "(" { return LPAREN; }
  130. ")" { return RPAREN; }
  131. "." { return DOT; }
  132. ":" { return COLON; }
  133. ";" { return SEMICOLON; }
  134. "," { return COMMA; }
  135. "#" { return POUND; }
  136. "!!!" { return TBANG; }
  137. {IDENT} { *yylval = (void *) strdup(yytext); return IDENT; }
  138. --[^\n]*\n /* Skip comments */
  139. [ \t\n]+ /* Skip whitespace */
  140. %%
  141. int yywrap(void) {
  142. return 1;
  143. }
  144. void yyerror(YYLTYPE *locp, stmt_node **prog, char *err) {
  145. fputs(err, stderr);
  146. fprintf(stderr, "\n(at lines %d-%d, cols %d-%d)\n", locp->first_line, locp->last_line, locp->first_column, locp->last_column);
  147. if(prog && *prog) {
  148. fprintf(stderr, "(while building a stmt of type %d)\n", (*prog)->type);
  149. }
  150. }
  151. stmt_node *sol_compile(const char *prgstr) {
  152. stmt_node *program = NULL;
  153. YY_BUFFER_STATE buf = yy_scan_string(prgstr);
  154. yyparse(&program);
  155. yy_delete_buffer(buf);
  156. return program;
  157. }
  158. stmt_node *sol_compile_buffer(const char *prgbuf, size_t sz) {
  159. stmt_node *program = NULL;
  160. YY_BUFFER_STATE buf = yy_scan_bytes(prgbuf, sz);
  161. yyparse(&program);
  162. yy_delete_buffer(buf);
  163. return program;
  164. }
  165. stmt_node *sol_compile_file(FILE *prgfile) {
  166. stmt_node *program = NULL;
  167. YY_BUFFER_STATE buf = yy_create_buffer(prgfile, YY_BUF_SIZE);
  168. yy_switch_to_buffer(buf);
  169. yyparse(&program);
  170. yy_delete_buffer(buf);
  171. return program;
  172. }