- Published on
[2019-1 Compiler] LexAnalyzer
- Authors
- Name
- 신주용
First assignment in Compiler class was making my own version of lexical analyzer. It would be fast and easy if I was familiar with regex.
I assigned all these words one by one, but there must be easy way then I did...
char reserved[32][10] = {
"asm","auto","break","case","char","const","continue","default", "do",\
"double","else","extern","float","for","goto","if","int","long","register",\
"return","short","signed","sizeof","static","struct","switch","typedef",\
"union","unsigned","void","volatile","while"};
char pre_dir[14][10] = {
"define","error","import","undef","elif", "if", "include", "using",\
"else", "ifdef", "line", "endif","ifndef", "pragma"};
char operator[][4] = {
"+", "-", "*", "/", "%", "++", "--","==", "!=", ">", "<", ">=", "<=",\
"!", "&&", "||", "~", "&", "|", "^","<<", ">>", "+=", "-=", "*=", "/=",\
"%=", "&=", "|=", "^=", "<<=", ">>="};
Main function starts from here.
int main(void) {
FILE *fp, *fo;
int i, j, k, predirFlag = 0;
char inputStr[255] = "";
char str[255] = "";
Declare all variables used in this programme.
while (fgets(inputStr, sizeof(inputStr), fp)) {
for (i = 0; i < strlen(inputStr); i++) {
if (inputStr[i] == ' ') i++;
if (inputStr[i] == '\n') break;
if (isalpha(inputStr[i])) {
j = 0;
concatStr(inputStr, str, &i, &j, 'a');
isIdenResv(str);
memset(str, '\0', sizeof(str));
}
else if (isdigit(inputStr[i])) {
j = 0;
concatStr(inputStr, str, &i, &j, 'n');
insert_symtbl("numeric constant", str);
memset(str, '\0', sizeof(str));
}
else if (ispunct(inputStr[i])) {
if (inputStr[i] == '#') {
j = 0;
chtostr(inputStr[i++], str, "special character");
concatStr(inputStr, str, &i, &j, 'a');
for (k = 0; k < 14; k++) {
if (strcmp(pre_dir[k], str) == 0) {
insert_symtbl("preprocessor directives", str);
predirFlag = 1;
break;
}
}
}
else if (inputStr[i] == '\'') {
chtostr(inputStr[i++], str, "special character");
chtostr(inputStr[i++], str, "character constant");
chtostr(inputStr[i], str, "special character");
}
else if (inputStr[i] == '"') {
chtostr(inputStr[i++], str, "special character");
j = 0;
concatStr(inputStr, str, &i, &j, 'c');
insert_symtbl("string constant", str);
memset(str, '\0', sizeof(str));
chtostr(inputStr[i], str, "special character");
}
else if (strchr("+-*/%=!&|^", inputStr[i])!=NULL) {
j = 0;
concatStr(inputStr, str, &i, &j, 's');
if ((strcmp(str, "+")==0 || strcmp(str, "-") == 0)&&(isdigit(inputStr[i+1]))) {
i++;
concatStr(inputStr, str, &i, &j, 'n');
insert_symtbl("numeric constant", str);
memset(str, '\0', sizeof(str));
continue;
}
if (strcmp(str, "=") == 0) {
insert_symtbl("assign symbol", str); i++;
memset(str, '\0', sizeof(str));
continue;
}
for (k = 0; k < 32; k++) {
if (strcmp(operator[k], str) == 0) {
insert_symtbl("operator", str);
break;
}
}
}
else if(inputStr[i] == '<' || inputStr[i] == '>') {
j = 0;
concatStr(inputStr, str, &i, &j, 's');
for (k = 0; k < 32; k++) {
if (strcmp(operator[k], str) == 0 && predirFlag == 0) {
insert_symtbl("operator", str); i++;
break;
}
}
if (k == 32 && predirFlag == 1 && inputStr[i] == '>') predirFlag = 0;
if (k == 32) chtostr(inputStr[i], str, "separate symbol");
}
else if (strchr(".,;:(){}", inputStr[i]) != NULL) {
chtostr(inputStr[i], str, "separate symbol");
}
else {
chtostr(inputStr[i], str, "special character");
}
memset(str, '\0', sizeof(str));
}
}
memset(inputStr, '\0', sizeof(inputStr));
}
void isIdenResv(char token[]) {
int i;
for (i = 0; i < 32; i++) {
if (strcmp(reserved[i], token)==0) {
insert_symtbl("reserved words", token);
return;
}
}
insert_symtbl("identifier", token);
return;
}
void concatStr(char src[], char dest[], int *srcIdx, int *destIdx, char op) {
switch (op) {
case 'a':
while (isalnum(src[*srcIdx])) {
dest[(*destIdx)++] += src[(*srcIdx)++];
}
(*srcIdx)--;
break;
case 'c':
while (src[*srcIdx] != '\"') {
dest[(*destIdx)++] += src[(*srcIdx)++];
}
break;
case 'n':
while (isdigit(src[*srcIdx]) || src[*srcIdx] == '.' || src[*srcIdx] == 'e' || src[*srcIdx] == '+' || src[*srcIdx] == '-') {
dest[(*destIdx)++] += src[(*srcIdx)++];
}
(*srcIdx)--;
break;
case 's':
while (ispunct(src[*srcIdx]) && src[*srcIdx]!=',' && src[*srcIdx]!=') {
dest[(*destIdx)++] += src[(*srcIdx)++];
}
(*srcIdx)--;
break;
}
}
void chtostr(char src, char dest[], char type[]) {
sprintf_s(dest, sizeof(dest), "%c", src);
insert_symtbl(type, dest);
memset(dest, '\0', sizeof(dest));
}
void insert_symtbl(char type[], char token[]) {
insert_tknlst("", token);
if (strstr(type, "constant")!=NULL) {
insert_contbl(type, token);
}
strcpy_s(symbolTable[idx_symtbl].type, sizeof(symbolTable[idx_symtbl].type), type);
strcpy_s(symbolTable[idx_symtbl].token, sizeof(symbolTable[idx_symtbl].token), token);
idx_symtbl++;
}
void insert_tknlst(char type[], char token[]) {
strcpy_s(tokenList[idx_tknlst].type, sizeof(tokenList[idx_tknlst].type), type);
strcpy_s(tokenList[idx_tknlst].token, sizeof(tokenList[idx_tknlst].token), token);
idx_tknlst++;
}
void insert_contbl(char type[], char token[]) {
strcpy_s(constantTable[idx_contbl].type, sizeof(constantTable[idx_contbl].type), type);
strcpy_s(constantTable[idx_contbl].token, sizeof(constantTable[idx_contbl].token), token);
idx_contbl++;
}
This is not a perfect code. There's more cases to handle and the code is also not optimized. But it makes me realised why I have to learn more (including regex).