Browse Source

Inital commit with functioning tokenizer.

long_to_char
Cameron Weinfurt 3 years ago
commit
c5714d295e
  1. 3
      .gitignore
  2. 18
      main.c
  3. 79
      token.c
  4. 21
      token.h

3
.gitignore

@ -0,0 +1,3 @@
as2-s20.html
in.txt
out.txt_sample

18
main.c

@ -0,0 +1,18 @@
#include <stdio.h>
#include <stdlib.h>
#include "token.h"
int main() {
char *line = NULL;
size_t line_len = 0;
getline(&line,&line_len,stdin);
input_str = line;
int token;
while(token = tokenize())
printf("%d\n", token);
free(line);
}

79
token.c

@ -0,0 +1,79 @@
#include "token.h"
// The first string is where the tokenizer reads from, while the second is set
// when the underlying data needs to be parsed more specifically, like for
// numbers.
char *input_str = NULL;
char *token_str = NULL;
int tokenize() {
token_str = NULL;
// No text means no tokens.
if (!input_str) {
return TOK_NULL;
}
while (1) {
switch(*input_str) {
case ' ':
case '\t':
case '\n':
// Discard whitespace.
input_str++;
break;
case '0':
// Check if hex number.
if (*(input_str+1) & 0xDF == 'X' && ISHEX(*(input_str+2))) {
// Yes, munch hex digits and return.
token_str = input_str + 2;
input_str += 3;
while(ISHEX(*input_str)) input_str++;
return TOK_NUMHEX;
}
// No, this is a decimal number.
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// Decimal number. Munch digits.
token_str = input_str;
while(ISDIGIT(*input_str)) input_str++;
return TOK_NUMDEC;
// Operators and parenthesises.
case '+':
input_str++;
return TOK_PLUS;
case '-':
input_str++;
return TOK_MINUS;
case '*':
input_str++;
return TOK_STAR;
case '/':
input_str++;
return TOK_FSLASH;
case '^':
input_str++;
return TOK_CARROT;
case '(':
input_str++;
return TOK_LPAREN;
case ')':
input_str++;
return TOK_RPAREN;
// If there is no more data, return the null token.
case '\0':
return TOK_NULL;
// All other cases return an error token and don't advance the pointer.
default:
token_str = input_str;
return TOK_ERR;
}
}
}

21
token.h

@ -0,0 +1,21 @@
#include <stddef.h>
#define TOK_NULL 0
#define TOK_ERR -1
#define TOK_NUMDEC 1
#define TOK_NUMHEX 2
#define TOK_NUMBIN 3
#define TOK_LPAREN 4
#define TOK_RPAREN 5
#define TOK_PLUS 6
#define TOK_MINUS 7
#define TOK_STAR 8
#define TOK_FSLASH 9
#define TOK_CARROT 10
extern char *input_str, *token_str;
int tokenize();
#define ISDIGIT(c) (c >= '0' && c <= '9')
#define ISHEX(c) (c & 0xDF >= 'A' && c & 0xDF <= 'F') || ISDIGIT(c)
Loading…
Cancel
Save