LA FORET ROUGE

[2019-1 Compiler] LexAnalyzer

⏱ 4m | Categories: UNCATEGORIZED | Tags: KNU , CSE , COMPILER

First assignment in Compiler class was making my own version of lexical analyzer. It would be fast and easy if I was familiar with regex.

I assigned all these words one by one, but there must be easy way then I did…

 1char reserved[32][10] = {
 2    "asm","auto","break","case","char","const","continue","default", "do",\
 3    "double","else","extern","float","for","goto","if","int","long","register",\
 4    "return","short","signed","sizeof","static","struct","switch","typedef",\
 5    "union","unsigned","void","volatile","while"};
 6
 7char pre_dir[14][10] = {
 8    "define","error","import","undef","elif", "if", "include", "using",\
 9    "else", "ifdef", "line", "endif","ifndef", "pragma"};
10
11char operator[][4] = {
12    "+", "-", "*", "/", "%", "++", "--","==", "!=", ">", "<", ">=", "<=",\
13    "!", "&&", "||", "~", "&", "|", "^","<<", ">>", "+=", "-=", "*=", "/=",\
14    "%=", "&=", "|=", "^=", "<<=", ">>="};

Main function starts from here.

1int main(void) {
2    FILE *fp, *fo;
3    int i, j, k, predirFlag = 0;
4    char inputStr[255] = "";
5    char str[255] = "";

Declare all variables used in this programme.

  1    while (fgets(inputStr, sizeof(inputStr), fp)) {
  2        for (i = 0; i < strlen(inputStr); i++) {
  3
  4            if (inputStr[i] == ' ') i++;
  5            if (inputStr[i] == '\n') break;
  6
  7
  8            if (isalpha(inputStr[i])) {
  9                j = 0;
 10                concatStr(inputStr, str, &i, &j, 'a');
 11                isIdenResv(str);
 12                memset(str, '\0', sizeof(str));
 13            }
 14
 15
 16            else if (isdigit(inputStr[i])) {
 17                j = 0;
 18                concatStr(inputStr, str, &i, &j, 'n');
 19                insert_symtbl("numeric constant", str);
 20                memset(str, '\0', sizeof(str));
 21            }
 22
 23
 24            else if (ispunct(inputStr[i])) {
 25                if (inputStr[i] == '#') {
 26                    j = 0;
 27                    chtostr(inputStr[i++], str, "special character");
 28                    concatStr(inputStr, str, &i, &j, 'a');
 29                    for (k = 0; k < 14; k++) {
 30                        if (strcmp(pre_dir[k], str) == 0) {
 31                            insert_symtbl("preprocessor directives", str);
 32                            predirFlag = 1;
 33                            break;
 34                        }
 35                    }
 36                }
 37
 38                 else if (inputStr[i] == '\'') {
 39                    chtostr(inputStr[i++], str, "special character");
 40                    chtostr(inputStr[i++], str, "character constant");
 41                    chtostr(inputStr[i], str, "special character");
 42                }
 43
 44                else if (inputStr[i] == '"') {
 45                    chtostr(inputStr[i++], str, "special character");
 46                    j = 0;
 47                    concatStr(inputStr, str, &i, &j, 'c');
 48                    insert_symtbl("string constant", str);
 49                    memset(str, '\0', sizeof(str));
 50                    chtostr(inputStr[i], str, "special character");
 51                }
 52
 53                else if (strchr("+-*/%=!&|^", inputStr[i])!=NULL) {
 54                    j = 0;
 55                    concatStr(inputStr, str, &i, &j, 's');
 56
 57                    if ((strcmp(str, "+")==0 || strcmp(str, "-") == 0)&&(isdigit(inputStr[i+1]))) {
 58                        i++;
 59                        concatStr(inputStr, str, &i, &j, 'n');
 60                        insert_symtbl("numeric constant", str);
 61                        memset(str, '\0', sizeof(str));
 62                        continue;
 63                    }
 64
 65                    if (strcmp(str, "=") == 0) {
 66                        insert_symtbl("assign symbol", str); i++;
 67                        memset(str, '\0', sizeof(str));
 68                        continue;
 69                    }
 70
 71                    for (k = 0; k < 32; k++) {
 72                        if (strcmp(operator[k], str) == 0) {
 73                            insert_symtbl("operator", str);
 74                            break;
 75                        }
 76                    }
 77                }
 78
 79                else if(inputStr[i] == '<' || inputStr[i] == '>') {
 80                    j = 0;
 81                    concatStr(inputStr, str, &i, &j, 's');
 82                    for (k = 0; k < 32; k++) {
 83                        if (strcmp(operator[k], str) == 0 && predirFlag == 0) {
 84                            insert_symtbl("operator", str); i++;
 85                            break;
 86                        }
 87                    }
 88                    if (k == 32 && predirFlag == 1 && inputStr[i] == '>') predirFlag = 0;
 89                    if (k == 32) chtostr(inputStr[i], str, "separate symbol");
 90                }
 91
 92                else if (strchr(".,;:(){}", inputStr[i]) != NULL) {
 93                    chtostr(inputStr[i], str, "separate symbol");
 94                }
 95
 96                else {
 97                    chtostr(inputStr[i], str, "special character");
 98                }
 99
100                memset(str, '\0', sizeof(str));
101            }
102        }
103        memset(inputStr, '\0', sizeof(inputStr));
104    }
 1void isIdenResv(char token[]) {
 2    int i;
 3    for (i = 0; i < 32; i++) {
 4        if (strcmp(reserved[i], token)==0) {
 5            insert_symtbl("reserved words", token);
 6            return;
 7        }
 8    }
 9    insert_symtbl("identifier", token);
10    return;
11}
 1void concatStr(char src[], char dest[], int *srcIdx, int *destIdx, char op) {
 2    switch (op) {
 3        case 'a':
 4            while (isalnum(src[*srcIdx])) {
 5                dest[(*destIdx)++] += src[(*srcIdx)++];
 6            }
 7            (*srcIdx)--;
 8            break;
 9        case 'c':
10            while (src[*srcIdx] != '\"') {
11                dest[(*destIdx)++] += src[(*srcIdx)++];
12            }
13            break;
14        case 'n':
15            while (isdigit(src[*srcIdx]) || src[*srcIdx] == '.' || src[*srcIdx] == 'e' || src[*srcIdx] == '+' || src[*srcIdx] == '-') {
16                dest[(*destIdx)++] += src[(*srcIdx)++];
17            }
18            (*srcIdx)--;
19            break;
20        case 's':
21            while (ispunct(src[*srcIdx]) && src[*srcIdx]!=',' && src[*srcIdx]!=') {
22                dest[(*destIdx)++] += src[(*srcIdx)++];
23            }
24            (*srcIdx)--;
25            break;
26    }
27}
28
29
30void chtostr(char src, char dest[], char type[]) {
31    sprintf_s(dest, sizeof(dest), "%c", src);
32    insert_symtbl(type, dest);
33    memset(dest, '\0', sizeof(dest));
34}
 1void insert_symtbl(char type[], char token[]) {
 2    insert_tknlst("", token);
 3
 4    if (strstr(type, "constant")!=NULL) {
 5        insert_contbl(type, token);
 6    }
 7
 8    strcpy_s(symbolTable[idx_symtbl].type, sizeof(symbolTable[idx_symtbl].type), type);
 9    strcpy_s(symbolTable[idx_symtbl].token, sizeof(symbolTable[idx_symtbl].token), token);
10    idx_symtbl++;
11}
12
13void insert_tknlst(char type[], char token[]) {
14    strcpy_s(tokenList[idx_tknlst].type, sizeof(tokenList[idx_tknlst].type), type);
15    strcpy_s(tokenList[idx_tknlst].token, sizeof(tokenList[idx_tknlst].token), token);
16    idx_tknlst++;
17}
18
19void insert_contbl(char type[], char token[]) {
20    strcpy_s(constantTable[idx_contbl].type, sizeof(constantTable[idx_contbl].type), type);
21    strcpy_s(constantTable[idx_contbl].token, sizeof(constantTable[idx_contbl].token), token);
22    idx_contbl++;
23}

This is not a perfect code. There’s more cases to handle and the code is also not optimized. But it makes me realised why I have to learn more (including regex).

Comments

Link copied to clipboard!