Parsing Programm with C

 Programming language class에서 제작한 문법에 따른 parser이다. Robert W. sebesta의 Concepts of Programming Languages 교재에 나온 코드를 참고하였다.

간단한 arithmetic 연산을 수행하고, IDENT와 CONST, OP를 구분하여 lexeme으로 받아들이고, 선언된 변수에 대해 symbol table을 만들어 값을 저장하는 코드이다.

연산의 경우, stmt를 IDENT = expr으로 나누고, expr-term-factor- lex 순으로 함수가 호출되며 연산을 진행한다. 연산자 우선순위가 높은 것이 나중에 파싱 되어야 하므로 expr가 Add/Sub, term이 Mult/Div를 연산하고, factor는 left parenthesis에 대해 expr을 다시 호출하거나 IDENT, CONST 값을 lex를 호출하여 얻는다.

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <string.h>

int charClass;
char lexeme[100]; // token_string
char nextChar;
int lexLen;
int token;
int nextToken; //token_type
FILE* in_fp, * fopen(); //txt file input

/* Function declarations */
void addChar();
void getChar();
void getNonBlank();
int lookup(char ch);
int lex();

void stmt();
void expr();
void term();
void factor();
void error();

/* Character classes */
#define LETTER 0 // IDENT
#define DIGIT 1 // CONST
#define UNKNOWN 99

/* Token codes */
#define INT_LIT 10 // CONST
#define IDENT 11 // IDENT
#define ASSIGN_OP 20 // :=
#define ADD_OP 21 // +
#define SUB_OP 22 // -
#define MULT_OP 23 // *
#define DIV_OP 24 // /
#define LEFT_PAREN 25 // (
#define RIGHT_PAREN 26 // )
#define NEW_LINE 27 // ;

/****************************************************************/

int operatortop = 0; //top pointer for operator stack
int operandtop = 0; // top pointer for operand stack
int CONSTcount = 0; // number of CONST
int IDENTcount = 0; // number of IDENT
int OPcount = 0; // number of OP

char operatorStack[20]; // OP stack : + - * /
int constStack[20]; // CONST stack
char* operandStack[20]; // IDENT stack

typedef struct IDENTIFIER { // IDENT value and name store.
    int value; // IDENT value
    char* name; // IDENT name
}IDENTIFIER;

IDENTIFIER* IDstore[20]; // Symbol table for IDENT
int IDtop = 0; // top pointer for symbol table IDstore
int buff = 0; // buffer for operation values
int parseResult = 0; // switch index for result

/****************************************************************/
//main function

int main(int argc, char*argv[]) {
    /* Open the input data file and process its contents */
    FILE* fp = fopen(argv[1], "r"); // execute by command >>program.exe input.txt
    in_fp = fp;
    if (in_fp == NULL)
        printf("ERROR - cannot open file \n");
    else {
        getChar();
        do {
            stmt();
        } while (!feof(in_fp));
    }
    printf("Result ==> ");
    for(int i=0;i<IDtop;i++){
        printf("%s: %d; ",IDstore[i]->name, IDstore[i]->value);
    } //print out the result of parseing.
    printf("\n");
    for(int i=0;i<IDtop;i++){
        free(IDstore[i]); // Free each allocated struct
    }
    for (int i = 0; i < operandtop; i++) {
        free(operandStack[i]); // Free each allocated string
    }
    printf("Parsing complete. Press Enter to continue...\n");
    getchar();  // Wait for Enter key

    return 0;
}

/****************************************************************/
//stmt() function : parse the input program into <stmt> -> <ident><Assign_op><expr>

void stmt() {
    //printf("Enter <stmt>\n");
    parseResult = 0; // switch value for parse result

    IDENTcount = 0; CONSTcount = 0; OPcount = 0; buff = 0;operandtop = 0; operatortop = 0; // initialize the count & top values when new line started.
    do {
        lex();
    } while (nextToken!=ASSIGN_OP);

    IDENTIFIER* ID = (IDENTIFIER*)malloc(sizeof(IDENTIFIER)); // new IDENT
    ID->name = operandStack[operandtop-1];
    expr();
    ID->value = atoi(operandStack[--operandtop]);
    if (IDtop < 20) IDstore[IDtop++] = ID; // add it to symbol table
    printf("ID: %d; CONST: %d; OP: %d;\n", IDENTcount, CONSTcount, OPcount);

    switch (parseResult) {
    case 0:
        printf("OK\n");
        break;
    case 1:
        printf("WARNING\n");
        break;
    case 2:
        printf("ERROR\n");
        break;
    }
    //printf("Exit <stmt>\n\n");
}

/****************************************************************/
//expr() function : parses strings with add/sub operation : <expr> -> <term>{ (+| -) < term > }

void expr() {
    //printf("Enter <expr>\n");
    /* Parse the first term */
    term();
    /* As long as the next token is + or -, get
    the next token and parse the next term */
    while (nextToken == ADD_OP || nextToken == SUB_OP) {
        lex();
        term();
        if (operatortop > 0 && operatorStack[operatortop-1] == '+') {
            if (!((operandStack[operandtop - 2][0] >= 48 )&& (operandStack[operandtop - 2][0] <= 57))) { // check the operand is IDENT or CONST
                int i = 0;
                for (i = 0; i < IDtop; i++) { // if it's IDENT, find it's value in Symbol table
                    if (strcmp(IDstore[i]->name, operandStack[operandtop - 2]) == 0) {
                        sprintf(operandStack[operandtop - 2], "%d", IDstore[i]->value); // if the IDENT found in symbol table, get it's value as string.
                        break;
                    }
                }
            }
            if (!(operandStack[operandtop-1][0] >= 48 && operandStack[operandtop-1][0] <= 57)) {
                int i = 0;
                for (i = 0; i < IDtop; i++) {
                    if (strcmp(IDstore[i]->name, operandStack[operandtop - 1]) == 0) {
                        sprintf(operandStack[operandtop - 1], "%d", IDstore[i]->value);
                        break;
                    }
                }
            }
            int midbuff = 0;
            midbuff = atoi(operandStack[operandtop - 2]) + atoi(operandStack[operandtop - 1]);
            operandtop -= 2;
            operatortop -= 1;
            sprintf(operandStack[operandtop++], "%d", midbuff);
        }
        if (operatortop > 0 && operatorStack[operatortop-1] == '-') {
            if (!(operandStack[operandtop - 2][0] >= 48 && operandStack[operandtop - 2][0] <= 57)) {
                int i = 0;
                for (i = 0; i < IDtop; i++) {
                    if (strcmp(IDstore[i]->name, operandStack[operandtop - 2]) == 0) {
                        sprintf(operandStack[operandtop - 2], "%d", IDstore[i]->value);
                        break;
                    }
                }
            }
            if (!(operandStack[operandtop-1][0] >= 48 && operandStack[operandtop-1][0] <= 57)) {
                int i = 0;
                for (i = 0; i < IDtop; i++) {
                    if (strcmp(IDstore[i]->name, operandStack[operandtop-1]) == 0) {
                        sprintf(operandStack[operandtop-1], "%d", IDstore[i]->value);
                        break;
                    }
                }
            }
            int midbuff = 0;
            midbuff = atoi(operandStack[operandtop - 2]) - atoi(operandStack[operandtop - 1]);
            operandtop -= 2;
            operatortop -= 1;
            sprintf(operandStack[operandtop++], "%d", midbuff);
        }
        //printf("%d\n", buff);
        //term();
    }
    //printf("Exit <expr>\n");
} /* End of function expr */

/****************************************************************/
//term() function : parse string with mult/div operation : <term> -> <factor> {(* | /) <factor>)

void term() {
    //printf("Enter <term>\n");
    /* Parse the first factor */
    factor();
    /* As long as the next token is * or /, get the
    next token and parse the next factor */
    while (nextToken == MULT_OP || nextToken == DIV_OP) {
        lex();
        //factor();
        if (operatortop > 0 && operatorStack[operatortop-1] == '*') {
            if (!(operandStack[operandtop - 2][0] >= 48 && operandStack[operandtop -2][0] <= 57)) { // check the operand is IDENT or CONST
                for (int i = 0; i < IDtop; i++) { // if it's IDENT, find it's value in Symbol table
                    if (strcmp(IDstore[i]->name, operandStack[operandtop - 2]) == 0) {
                        sprintf(operandStack[operandtop - 2], "%d", IDstore[i]->value);
                        break;
                    }
                }
            }
            if (!(operandStack[operandtop-1][0] >= 48 && operandStack[operandtop-1][0] <= 57)) {
                for (int i = 0; i < IDtop; i++) {
                    if (strcmp(IDstore[i]->name, operandStack[operandtop-1]) == 0) {
                        sprintf(operandStack[operandtop-1], "%d", IDstore[i]->value);
                        break;
                    }
                }
            }
            int midbuff = 0;
            midbuff = atoi(operandStack[operandtop - 2]) * atoi(operandStack[operandtop-1]);
            operandtop -= 2;
            operatortop -= 1;
            sprintf(operandStack[operandtop++], "%d", midbuff);
        }
        if (operatortop > 0 && operatorStack[operatortop-1] == '/') {
            if (!(operandStack[operandtop - 2][0] >= 48 && operandStack[operandtop - 2][0] <= 57)) {
                for (int i = 0; i < IDtop; i++) {
                    if (strcmp(IDstore[i]->name, operandStack[operandtop - 2]) == 0) {
                        sprintf(operandStack[operandtop - 2], "%d", IDstore[i]->value);
                        break;
                    }
                }
            }
            if (!(operandStack[operandtop-1][0] >= 48 && operandStack[operandtop-1][0] <= 57)) {
                for (int i = 0; i < IDtop; i++) {
                    if (strcmp(IDstore[i]->name, operandStack[operandtop-1]) == 0) {
                        sprintf(operandStack[operandtop-1], "%d", IDstore[i]->value);
                        break;
                    }
                }
            }
            int midbuff = 0;
            midbuff = atoi(operandStack[operandtop - 2]) / atoi(operandStack[operandtop-1]);
            operandtop -= 2;
            operatortop -= 1;
            sprintf(operandStack[operandtop++], "%d", midbuff);
        }
        //printf("%d\n", buff);
        factor();
    }
    //printf("Exit <term>\n");
} /* End of function term */

/****************************************************************/
//factor() function : Parse string with parenthesis or IDENT or CONST : <factor> -> ID | CONST | ( <expr> )

void factor() {
    //printf("Enter <factor>\n");
    /* Determine which RHS */
    if (nextToken == LEFT_PAREN) { // if parenthesis comes after an operator, have to detect forward
        //printf("Enter <left parenthesis>\n");
        lex();
        expr();
        if (nextToken == RIGHT_PAREN) {
            //printf("Enter <right parenthesis>\n"); // lex() is done in outside of parenthesis
        }
        else
            error();
    }

    lex();
    if (nextToken == IDENT || nextToken == INT_LIT)
        /* Get the next token */
        lex();
    /* If the RHS is ( <expr>), call lex to pass over the left parenthesis, call expr, and check for the right parenthesis */

    else {
        if (nextToken == LEFT_PAREN) {
            //printf("Enter <left parenthesis>\n");
            lex();
            expr();
            if (nextToken == RIGHT_PAREN) {
                //printf("Enter <right parenthesis>\n");
                lex();
            }
            else
                error();
        }
        else if (nextToken == RIGHT_PAREN) {
            //printf("Exit <factor>\n");
            return;
        }
        else if (nextToken == NEW_LINE) {
            //printf("Exit <factor>\n");
            return;
        }
        else if (nextToken == MULT_OP || nextToken == DIV_OP) {
            //printf("Exit <factor>\n");
            return;
        }
        else if (nextToken == ADD_OP || nextToken == SUB_OP) {
            //printf("Exit <factor>\n");
            return;
        }
        else error();
    }
    //printf("Exit <factor>\n");
} /* End of function factor */

/*****************************************************/
//lex() function : Lexical analyzer, read the character from input file, and put it into lexeme array

int lex() {
    //printf("Enter <lex>\n");
    lexLen = 0;
    getNonBlank();
    switch (charClass) {
    case LETTER: // parse IDENT
        addChar();
        getChar();
        while (charClass == LETTER || charClass == DIGIT) {
            addChar();
            getChar();
        }
        nextToken = IDENT;
        operandStack[operandtop++] = _strdup(lexeme);
        IDENTcount++;
        break;

    case DIGIT: // parse CONST
        addChar();
        getChar();
        while (charClass == DIGIT) {
            addChar();
            getChar();
        }
        nextToken = INT_LIT;
        operandStack[operandtop++] = _strdup(lexeme);
        CONSTcount++;
        break;

    case UNKNOWN: // parse OP & parenthesis
        lookup(nextChar);
        getChar();
        break;

    case EOF: // if reached EOF. shouldn't be reached normally.
        nextToken = EOF;
        lexeme[0] = 'E';
        lexeme[1] = 'O';
        lexeme[2] = 'F';
        lexeme[3] = 0;
        error();
        break;
    }

    //prinf("Exit <lex>, next token is %s\n", lexeme);
    printf("%s ", lexeme);
    if (nextToken == NEW_LINE) printf("\n");
    return nextToken;
} /* End of function lex */

/*****************************************************/
//lookup() function : lookup operators and parentheses and return the token

int lookup(char ch) {
    switch (ch) {
    case '(':
        addChar();
        nextToken = LEFT_PAREN;
        break;
    case ')':
        addChar();
        nextToken = RIGHT_PAREN;
        break;
    case '+':
        addChar();
        nextToken = ADD_OP;
        operatorStack[operatortop++] = '+';
        OPcount++;
        break;
    case '-':
        addChar();
        nextToken = SUB_OP;
        operatorStack[operatortop++] = '-';
        OPcount++;
        break;
    case '*':
        addChar();
        nextToken = MULT_OP;
        operatorStack[operatortop++] = '*';
        OPcount++;
        break;
    case '/':
        addChar();
        nextToken = DIV_OP;
        operatorStack[operatortop++] = '/';
        OPcount++;
        break;
    case ':':
        addChar();
        nextChar = getc(in_fp); // Assign_Op -> :=
        addChar();
        nextToken = ASSIGN_OP;
        break;
    case ';':
        addChar();
        nextToken = NEW_LINE;
        break;
    default:
        addChar();
        nextToken = EOF;
        break;
    }
    return nextToken;
}

/*****************************************************/
/* addChar - a function to add nextChar to lexeme */

void addChar() {
    if (lexLen <= 98) {
        lexeme[lexLen++] = nextChar;
        lexeme[lexLen] = 0;
    }
    else
        printf("Error - lexeme is too long \n");
}

/*****************************************************/
/* getChar - a function to get the next character of
input and determine its character class */

void getChar() {
    if ((nextChar = getc(in_fp)) != EOF) {
        if (isalpha(nextChar))
            charClass = LETTER;
        else if (isdigit(nextChar))
            charClass = DIGIT;
        else charClass = UNKNOWN;
    }
    else
        charClass = EOF;
}

/*****************************************************/
/* getNonBlank - a function to call getChar until it returns a non-whitespace character */

void getNonBlank() {
    while (isspace(nextChar))
        getChar();
}

/*****************************************************/
// error() function : gives error message and pause the programm.

void error() {
    printf("Wrong input detected!\n");
    printf("Press Enter to continue...\n");
    getchar();  // Wait for Enter key
    return;
}

댓글

이 블로그의 인기 게시물

IIKH Class from Timothy Budd's introduction to OOP

Compiler 9 - Efficient Code generation

Software Engineering 10 - V&V, SOLID principle