The Sol Programming Language!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tokenizer.lex 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. %{
  2. #define YYSTYPE void *
  3. #include "ast.h"
  4. #include "parser.tab.h"
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include <stdio.h>
  8. void yyerror(YYLTYPE *, stmt_node **, char *);
  9. int yywrap(void);
  10. char *str, *curptr;
  11. int cursz, chars;
  12. #define SZMUL 128
  13. int writing_html = 0;
  14. void str_init(void) {
  15. str = malloc(SZMUL);
  16. curptr = str;
  17. cursz = SZMUL;
  18. chars = 0;
  19. }
  20. void str_putc(char c) {
  21. *curptr++ = c;
  22. chars++;
  23. if(chars >= cursz) {
  24. str = realloc(str, cursz + SZMUL);
  25. curptr = str + chars;
  26. cursz += SZMUL;
  27. }
  28. }
  29. /* http://stackoverflow.com/questions/656703/how-does-flex-support-bison-location-exactly */
  30. /* Many thanks to hugomg and David Elson! */
  31. static void update_loc(YYLTYPE *yylloc, char *yytext){
  32. int curr_line;
  33. int curr_col;
  34. curr_line = yylloc->first_line = yylloc->last_line;
  35. curr_col = yylloc->first_column = yylloc->last_column;
  36. {char * s; for(s = yytext; *s != '\0'; s++){
  37. if(*s == '\n'){
  38. curr_line++;
  39. curr_col = 1;
  40. }else{
  41. curr_col++;
  42. }
  43. }}
  44. yylloc->last_line = curr_line;
  45. yylloc->last_column = curr_col-1;
  46. }
  47. char *FONTS[] = {
  48. "Adobe Courier",
  49. "Adobe Helvetica",
  50. "Adobe New Century Schoolbook",
  51. "Adobe Times",
  52. "Andale Mono",
  53. "Arial",
  54. "Arial Black",
  55. "C059",
  56. "Cantarell",
  57. "Century Schoolbook L",
  58. "Comic Sans MS",
  59. "Courier New",
  60. "cursor.pcf",
  61. "D050000L",
  62. "DejaVu Math TeX Gyre",
  63. "DejaVu Sans",
  64. "DejaVu Sans,DejaVu Sans Condensed",
  65. "DejaVu Sans,DejaVu Sans Light",
  66. "DejaVu Sans Mono",
  67. "DejaVu Serif",
  68. "DejaVu Serif,DejaVu Serif Condensed",
  69. "Denemo",
  70. "Dingbats",
  71. "Emmentaler",
  72. "feta26",
  73. "Georgia",
  74. "GNU Unifont",
  75. "GNU Unifont CSUR",
  76. "GNU Unifont Sample",
  77. "Impact",
  78. "Misc Fixed",
  79. "Misc Fixed Wide",
  80. "Nimbus Mono L",
  81. "Nimbus Mono PS",
  82. "Nimbus Roman",
  83. "Nimbus Roman No9 L",
  84. "NimbusSans",
  85. "Nimbus Sans",
  86. "Nimbus Sans L",
  87. "Nimbus Sans Narrow",
  88. "P052",
  89. "Standard Symbols L",
  90. "Standard Symbols PS",
  91. "Times New Roman",
  92. "Trebuchet MS",
  93. "Unifont",
  94. "Unifont CSUR",
  95. "Unifont Sample",
  96. "Unifont Upper",
  97. "URW Bookman",
  98. "URW Bookman L",
  99. "URW Chancery L",
  100. "URW Gothic",
  101. "URW Gothic L",
  102. "URW Palladio L",
  103. "Verdana",
  104. "Webdings",
  105. "Z003",
  106. };
  107. static void write_html(char *yytext) {
  108. if(writing_html) {
  109. printf("<span style=\"font-family: %s;%s%s%s%s\">%s</span>",
  110. FONTS[rand() % (sizeof(FONTS) / sizeof(*FONTS))],
  111. rand() & 1 ? "font-weight: bold;" : "",
  112. rand() & 1 ? "font-style: italic;" : "",
  113. rand() & 1 ? "text-decoration: underline;" : "",
  114. rand() & 1 ? "font-variant: small-caps;" : "",
  115. yytext
  116. );
  117. }
  118. }
  119. #define YY_USER_ACTION update_loc(yylloc, yytext); write_html(yytext);
  120. %}
  121. DIGIT [0-9]
  122. HEXDIGIT [0-9a-fA-F]
  123. ALPHA [a-zA-Z]
  124. IDENT [a-zA-Z_][a-zA-Z0-9_]*
  125. /* This is the right way to do it, but it keeps generating token $undefined.
  126. %x STRING
  127. \" { str_init(); BEGIN STRING; }
  128. <STRING>\\n { str_putc('\n'); }
  129. <STRING>\\t { str_putc('\t'); }
  130. <STRING>\\b { str_putc('\b'); }
  131. <STRING>\\r { str_putc('\r'); }
  132. <STRING>\\x{HEXDIGIT}{HEXDIGIT} { str_putc(strtol(yytext+2, NULL, 16)); }
  133. <STRING>\\\" { str_putc('"'); }
  134. <STRING>\" { str_putc('\0'); yylval = str; BEGIN 0; return STRING; }
  135. <STRING>. { str_putc(*yytext); }
  136. */
  137. %option bison-bridge bison-locations
  138. %%
  139. {DIGIT}+"."{DIGIT}* { *yylval = malloc(sizeof(double)); *((double *) *yylval) = atof(yytext); return FLOAT; }
  140. {DIGIT}+ { *yylval = malloc(sizeof(long)); *((long *) *yylval) = atol(yytext); return INT; }
  141. \"[^"]*\" |
  142. \'[^']*\' { *yylval = malloc(sizeof(unsigned long) + (yyleng - 2) * sizeof(char)); *((unsigned long *) *yylval) = yyleng - 2; memcpy(((char *) *yylval) + sizeof(unsigned long), yytext + 1, yyleng - 2); return STRING; }
  143. if { return IF; }
  144. then { return THEN; }
  145. else { return ELSE; }
  146. elseif { return ELSEIF; }
  147. while { return WHILE; }
  148. for { return FOR; }
  149. in { return IN; }
  150. do { return DO; }
  151. func { return FUNC; }
  152. macro { return MACRO; }
  153. lambda { return LAMBDA; }
  154. return { return RETURN; }
  155. break { return BREAK; }
  156. continue { return CONTINUE; }
  157. end { return END; }
  158. None { return NONE; }
  159. "+" { return PLUS; }
  160. "-" { return MINUS; }
  161. "*" { return STAR; }
  162. "/" { return SLASH; }
  163. "%" { return PERCENT; }
  164. "mod" { return PERCENT; }
  165. "**" { return DSTAR; }
  166. "&" { return BAND; }
  167. "|" { return BOR; }
  168. "^" { return BXOR; }
  169. "~" { return BNOT; }
  170. "&&" { return LAND; }
  171. "and" { return LAND; }
  172. "||" { return LOR; }
  173. "or" { return LOR; }
  174. "!" { return LNOT; }
  175. "not" { return LNOT; }
  176. "true" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 1; return INT; }
  177. "True" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 1; return INT; }
  178. "false" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 0; return INT; }
  179. "False" { *yylval = malloc(sizeof(long)); *((long *) *yylval) = 0; return INT; }
  180. "=" { return ASSIGN; }
  181. "+=" { return ASSIGNPLUS; }
  182. "-=" { return ASSIGNMINUS; }
  183. "*=" { return ASSIGNSTAR; }
  184. "/=" { return ASSIGNSLASH; }
  185. "**=" { return ASSIGNDSTAR; }
  186. "&=" { return ASSIGNBAND; }
  187. "|=" { return ASSIGNBOR; }
  188. "^=" { return ASSIGNBXOR; }
  189. "==" { return EQUAL; }
  190. "!=" { return NEQUAL; }
  191. "<" { return LESS; }
  192. ">" { return GREATER; }
  193. "<=" { return LESSEQ; }
  194. ">=" { return GREATEREQ; }
  195. ">>" { return RSHIFT; }
  196. "<<" { return LSHIFT; }
  197. "{" { return LBRACE; }
  198. "}" { return RBRACE; }
  199. "[" { return LBRACKET; }
  200. "]" { return RBRACKET; }
  201. ^[ \t]*"(" { return BLPAREN; } /* "Breaking" paren, not allowed to introduce a call_expr */
  202. "(" { return LPAREN; }
  203. ")" { return RPAREN; }
  204. "." { return DOT; }
  205. ":" { return COLON; }
  206. ";" { return SEMICOLON; }
  207. "," { return COMMA; }
  208. "#" { return POUND; }
  209. "!!!" { return TBANG; }
  210. {IDENT} { *yylval = (void *) strdup(yytext); return IDENT; }
  211. --[^\n]*\n /* Skip comments */
  212. [ \t\n]+ /* Skip whitespace */
  213. %%
  214. int yywrap(void) {
  215. return 1;
  216. }
  217. void yyerror(YYLTYPE *locp, stmt_node **prog, char *err) {
  218. fputs(err, stderr);
  219. fprintf(stderr, "\n(at lines %d-%d, cols %d-%d)\n", locp->first_line, locp->last_line, locp->first_column, locp->last_column);
  220. if(prog && *prog) {
  221. fprintf(stderr, "(while building a stmt of type %d)\n", (*prog)->type);
  222. }
  223. }
  224. stmt_node *sol_compile(const char *prgstr) {
  225. stmt_node *program = NULL;
  226. YY_BUFFER_STATE buf = yy_scan_string(prgstr);
  227. yyparse(&program);
  228. yy_delete_buffer(buf);
  229. return program;
  230. }
  231. stmt_node *sol_compile_buffer(const char *prgbuf, size_t sz) {
  232. stmt_node *program = NULL;
  233. YY_BUFFER_STATE buf = yy_scan_bytes(prgbuf, sz);
  234. yyparse(&program);
  235. yy_delete_buffer(buf);
  236. return program;
  237. }
  238. stmt_node *sol_compile_file(FILE *prgfile) {
  239. stmt_node *program = NULL;
  240. YY_BUFFER_STATE buf = yy_create_buffer(prgfile, YY_BUF_SIZE);
  241. yy_switch_to_buffer(buf);
  242. yyparse(&program);
  243. yy_delete_buffer(buf);
  244. return program;
  245. }
  246. void sol_write_html(FILE *prgfile) {
  247. stmt_node *program = NULL;
  248. YY_BUFFER_STATE buf = yy_create_buffer(prgfile, YY_BUF_SIZE);
  249. writing_html = 1;
  250. printf("<html><head><title>Sol Source File</title></head><body style=\"white-space: pre-wrap;\">\n");
  251. yy_switch_to_buffer(buf);
  252. yyparse(&program);
  253. yy_delete_buffer(buf);
  254. //stmt_free(program);
  255. printf("</body></html>\n");
  256. }