First assignment in Compiler class was making my own version of lexical analyzer. It would be fast and easy if I was familiar with regex.
I assigned all these words one by one, but there must be easy way then I did…
1char reserved[32][10] = {
2 "asm","auto","break","case","char","const","continue","default", "do",\
3 "double","else","extern","float","for","goto","if","int","long","register",\
4 "return","short","signed","sizeof","static","struct","switch","typedef",\
5 "union","unsigned","void","volatile","while"};
6
7char pre_dir[14][10] = {
8 "define","error","import","undef","elif", "if", "include", "using",\
9 "else", "ifdef", "line", "endif","ifndef", "pragma"};
10
11char operator[][4] = {
12 "+", "-", "*", "/", "%", "++", "--","==", "!=", ">", "<", ">=", "<=",\
13 "!", "&&", "||", "~", "&", "|", "^","<<", ">>", "+=", "-=", "*=", "/=",\
14 "%=", "&=", "|=", "^=", "<<=", ">>="};
Main function starts from here.
1int main(void) {
2 FILE *fp, *fo;
3 int i, j, k, predirFlag = 0;
4 char inputStr[255] = "";
5 char str[255] = "";
Declare all variables used in this programme.
1 while (fgets(inputStr, sizeof(inputStr), fp)) {
2 for (i = 0; i < strlen(inputStr); i++) {
3
4 if (inputStr[i] == ' ') i++;
5 if (inputStr[i] == '\n') break;
6
7
8 if (isalpha(inputStr[i])) {
9 j = 0;
10 concatStr(inputStr, str, &i, &j, 'a');
11 isIdenResv(str);
12 memset(str, '\0', sizeof(str));
13 }
14
15
16 else if (isdigit(inputStr[i])) {
17 j = 0;
18 concatStr(inputStr, str, &i, &j, 'n');
19 insert_symtbl("numeric constant", str);
20 memset(str, '\0', sizeof(str));
21 }
22
23
24 else if (ispunct(inputStr[i])) {
25 if (inputStr[i] == '#') {
26 j = 0;
27 chtostr(inputStr[i++], str, "special character");
28 concatStr(inputStr, str, &i, &j, 'a');
29 for (k = 0; k < 14; k++) {
30 if (strcmp(pre_dir[k], str) == 0) {
31 insert_symtbl("preprocessor directives", str);
32 predirFlag = 1;
33 break;
34 }
35 }
36 }
37
38 else if (inputStr[i] == '\'') {
39 chtostr(inputStr[i++], str, "special character");
40 chtostr(inputStr[i++], str, "character constant");
41 chtostr(inputStr[i], str, "special character");
42 }
43
44 else if (inputStr[i] == '"') {
45 chtostr(inputStr[i++], str, "special character");
46 j = 0;
47 concatStr(inputStr, str, &i, &j, 'c');
48 insert_symtbl("string constant", str);
49 memset(str, '\0', sizeof(str));
50 chtostr(inputStr[i], str, "special character");
51 }
52
53 else if (strchr("+-*/%=!&|^", inputStr[i])!=NULL) {
54 j = 0;
55 concatStr(inputStr, str, &i, &j, 's');
56
57 if ((strcmp(str, "+")==0 || strcmp(str, "-") == 0)&&(isdigit(inputStr[i+1]))) {
58 i++;
59 concatStr(inputStr, str, &i, &j, 'n');
60 insert_symtbl("numeric constant", str);
61 memset(str, '\0', sizeof(str));
62 continue;
63 }
64
65 if (strcmp(str, "=") == 0) {
66 insert_symtbl("assign symbol", str); i++;
67 memset(str, '\0', sizeof(str));
68 continue;
69 }
70
71 for (k = 0; k < 32; k++) {
72 if (strcmp(operator[k], str) == 0) {
73 insert_symtbl("operator", str);
74 break;
75 }
76 }
77 }
78
79 else if(inputStr[i] == '<' || inputStr[i] == '>') {
80 j = 0;
81 concatStr(inputStr, str, &i, &j, 's');
82 for (k = 0; k < 32; k++) {
83 if (strcmp(operator[k], str) == 0 && predirFlag == 0) {
84 insert_symtbl("operator", str); i++;
85 break;
86 }
87 }
88 if (k == 32 && predirFlag == 1 && inputStr[i] == '>') predirFlag = 0;
89 if (k == 32) chtostr(inputStr[i], str, "separate symbol");
90 }
91
92 else if (strchr(".,;:(){}", inputStr[i]) != NULL) {
93 chtostr(inputStr[i], str, "separate symbol");
94 }
95
96 else {
97 chtostr(inputStr[i], str, "special character");
98 }
99
100 memset(str, '\0', sizeof(str));
101 }
102 }
103 memset(inputStr, '\0', sizeof(inputStr));
104 }
1void isIdenResv(char token[]) {
2 int i;
3 for (i = 0; i < 32; i++) {
4 if (strcmp(reserved[i], token)==0) {
5 insert_symtbl("reserved words", token);
6 return;
7 }
8 }
9 insert_symtbl("identifier", token);
10 return;
11}
1void concatStr(char src[], char dest[], int *srcIdx, int *destIdx, char op) {
2 switch (op) {
3 case 'a':
4 while (isalnum(src[*srcIdx])) {
5 dest[(*destIdx)++] += src[(*srcIdx)++];
6 }
7 (*srcIdx)--;
8 break;
9 case 'c':
10 while (src[*srcIdx] != '\"') {
11 dest[(*destIdx)++] += src[(*srcIdx)++];
12 }
13 break;
14 case 'n':
15 while (isdigit(src[*srcIdx]) || src[*srcIdx] == '.' || src[*srcIdx] == 'e' || src[*srcIdx] == '+' || src[*srcIdx] == '-') {
16 dest[(*destIdx)++] += src[(*srcIdx)++];
17 }
18 (*srcIdx)--;
19 break;
20 case 's':
21 while (ispunct(src[*srcIdx]) && src[*srcIdx]!=',' && src[*srcIdx]!=') {
22 dest[(*destIdx)++] += src[(*srcIdx)++];
23 }
24 (*srcIdx)--;
25 break;
26 }
27}
28
29
30void chtostr(char src, char dest[], char type[]) {
31 sprintf_s(dest, sizeof(dest), "%c", src);
32 insert_symtbl(type, dest);
33 memset(dest, '\0', sizeof(dest));
34}
1void insert_symtbl(char type[], char token[]) {
2 insert_tknlst("", token);
3
4 if (strstr(type, "constant")!=NULL) {
5 insert_contbl(type, token);
6 }
7
8 strcpy_s(symbolTable[idx_symtbl].type, sizeof(symbolTable[idx_symtbl].type), type);
9 strcpy_s(symbolTable[idx_symtbl].token, sizeof(symbolTable[idx_symtbl].token), token);
10 idx_symtbl++;
11}
12
13void insert_tknlst(char type[], char token[]) {
14 strcpy_s(tokenList[idx_tknlst].type, sizeof(tokenList[idx_tknlst].type), type);
15 strcpy_s(tokenList[idx_tknlst].token, sizeof(tokenList[idx_tknlst].token), token);
16 idx_tknlst++;
17}
18
19void insert_contbl(char type[], char token[]) {
20 strcpy_s(constantTable[idx_contbl].type, sizeof(constantTable[idx_contbl].type), type);
21 strcpy_s(constantTable[idx_contbl].token, sizeof(constantTable[idx_contbl].token), token);
22 idx_contbl++;
23}
This is not a perfect code. There’s more cases to handle and the code is also not optimized. But it makes me realised why I have to learn more (including regex).
Comments