490 阿裏雲技術社區[雲棲]

一個簡單的C語言詞法分析與語法分析器

詞法分析
可識別內容:
標識符：id
數字：num
關鍵字：int,char,if,else,while,do,for
標號：, ， . ， ;
算術運算符號：=，+，-，*，/，&，!，|，&&，||
關係運算符：<，<=，>=，>，==，!=
注釋：//

內碼定義：
單個符號，如{，+，*，> 等，均使用其ascii碼做內碼，占雙或多個字節的符號（包括保留字，標號，數字，運算符等）為其取名如下：
Enum { END=0,INT,CHAR,IF,ELSE,WHILE=5,
DO,FOR,ARGAND,ARGOR,NUM=10,
ID,LESSEQUAL,EQUAL,GREATEQUAL,NOTEQUAL=15 };
其中NUM代表數字，ID代表標識符.

測試程序1-1的詞法分析結果如下：

123 {
11 X
61 =
10 12
43 +
43 +
11 b
47 /
10 13
45 -
10 5
42 *
10 4
42 *
10 9
59 ;
11 Y
61 =

10 4
42 *
10 1024
59 ;
3 if
40 (
11 X
14 >=
11 Y
41 )
123 {
3 if 40 (
11 i
13 ==
11 i

41 )
123 {
11 X
61 =
11 Y
125 }
125 }
59 ;
59 ;
59 ;
59 ;
5 while
40 (
11 X
60 <

11 Y
41 )
123 {
11 X
61 =
11 X
43 +
10 1
59 ;
125 }
125 }

語法分析
C語言子集，可支持
語句塊，語句，條件語句，While循環語句，賦值語句，基本算術表達式等。例如：
{
// Comment Supported : This is only a Test ^_^

X = 12 + b / 13 - 5 * 4 * 9; // A AssignmentStatement
Y = 4 * 1024;
if( X >= Y){
if( i == i){   // This is nested if Statement
X=Y;
}
}
;;;;      // This is Null Statement
while( X < Y){   // This is while Statement
X = X +1;
}
}
測試程序1-1

支持錯誤檢測，如將上麵例子中X = 12 + b / 13 - 5 * 4 * 9;
故意修改為：X = 12 ++ b / 13 - 5 * 4 * 9; 則會出現如下錯誤提示，指示了出錯行數和行內偏移位置：

規則如下：
<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}

<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>

<Condition> ::= <Expression><RelationOperator><Expression>
<Expression> ::= <Item>{+<Item>|-<Item>}
<Item> ::= <Factor>{*<Factor>|/<Factor>}
<Factor> ::= ID|NUM|(<Expression>)
<RelationOperator> ::= <|<=|>=|>|==|!=

//非終結符的英文定義
void StatementBlock(); //語句塊
void StatementSequence(); //語句串

// XxxxxStatement() 為三類語句
void NullStatement(); //空語句--僅僅含有一個;號
void CommonStatement(); //語句
void VariantStatement(); //變種語句--包括 if(){},while{},他們都不以;結尾

// 下麵的屬於CommonStatement
void AssignmentStatement(); //賦值語句

// 下麵兩種屬於VariantStatement
void ConditionStatement(); //條件語句
void LoopWhileStatement(); //while循環語句

void Condition();   //條件
void Expression();   //表達式
void Item();    //項
void Factor();    //因子
void RelationOperator();  //關係運算符

不能支持的主要方麵：函數調用的識別，逗號表達式，for循環，switch語句。

詞法分析：

// LexAly.cpp :C子集詞法分析程序

/**//*

支持內容:

標識符:id

關鍵字: int,char,if,else,while,do,for

標號: ,, ., ;

算術運算符號: =,+,-,&,!,|,&&,||

全局字符串:

instr 記錄待解析的字符串

string 存貯當前被解析到的ID

接口:

gettoken();

Sample:

輸入:

instr =

for(i=0;i<10;i++){

j=i+10;

printf("%d",j);

}

輸出:

for

(

……

}

注意:

要記得處理程序中未顯示出來的符號，如空白(' ')，回車(' ')，值表符(' ')

#include "stdafx.h"

#include <ctype.h>

#include <stdlib.h>

#include <string.h>

#include "Constant.h"

extern void grammar_check();

// 注意：這裏累計數目(最大值)不能大於32 (10 0000B)TOKEN

//enum {END=0,INT=1,CHAR,IF,ELSE,WHILE,DO,FOR,ARGAND,ARGOR,NUM,ID} ;

char index[][20] = ...{

...{"END OF File"}, /**//* 0 END*/

...{"int"}, /**//* 1 INT*/

...{"char"}, /**//* 2 CHAR*/

...{"if"}, /**//* 3 IF*/

...{"else"}, /**//* 4 ELSE*/

...{"while"}, /**//* 5 WHILE*/

...{"do"}, /**//* 6 DO*/

...{"for"}, /**//* 7 FOR*/

...{"&&"}, /**//* 8 ARGAND*/

...{"||"}, /**//* 9 ARGOR*/

...{""}, /**//* 10 NUM */

...{""}, /**//* 11 ID */

...{"<="}, /**//* 12 LESSEQUAL */

...{"=="}, /**//* 13 EQUAL */

...{">="}, /**//* 14 GREATEQUAL */

...{"!="}, /**//* 15 NOTEQUAL */

...{""} /**//* 16 ID */

};

char input[10000] = ...{0};

char * instr = input;

char * const start_of_instr=input;

//string 包含gettoken最新得到的id等串

//gym包含 gettoken得到的內容的代號

//current_line 包含當前行號

char string[MAX_INDENT];

int sym;

int current_line = 1;

int start_pos_of_current_line;

char *strstart; //用於輔助識別num,id

int gettoken();

int _gettoken();

void error(char *cur);

char* getlinestring(int line,char *in_buf);

int nextline();

int getline();

int getcurrentpos();

int nextline() ...{ return ++current_line; }

int getline() ...{ return current_line; }

int getcurrentpos() ...{ return (int)instr; }

char* getlinestring(int line,char *in_buf)

...{

char * t = input;

int i = 1;

while(*t != 0 && i < line )...{

if( *t==' ' ) i++;

t++;

}

int len = 0;

while ( *t != ' ' )...{

in_buf[len] = *t;

len++;

t++;

}

in_buf[len] = 0;

return in_buf;

}

void error(char *cur)

...{

printf("Spell Error found at line %d ",getline());

exit(0);

}

//語法分析

int main_grammar(char *filename)

...{

int i;

FILE *f;

if(!(f=fopen(filename,"r")))...{

printf("Fail to open source file %s! ",filename);

exit(0);

}

int k=0;

char c;

while((c=fgetc(f))!=EOF)

...{

input[k]=c;

k++;

}

input[k] = 0;

//打印出程序

printf("%s ",start_of_instr);

//開始語法檢查

grammar_check();

printf("Success! ");

return 0;

}

// 詞法分析

int main_spell(char *filename)

...{

int i;

FILE *f;

if(!(f=fopen(filename,"r")))...{

printf("Fail to open source file %s! ",filename);

exit(0);

}

int k=0;

char c;

while((c=fgetc(f))!=EOF)

...{

input[k]=c;

k++;

}

input[k] = 0;

printf("%s ",start_of_instr);

while((i=gettoken())!=END)

...{

if(i == ID)...{

printf("%d %s ",i,string);

continue;

}

if(i == NUM)...{

printf("%d %s ",i,string);

continue;

}

if(i<20)...{

printf("%d %s ",i,index[i]);

}else...{

printf("%d %c ",i,i);

}

return 0;

}

int gettoken()

...{

int i= (sym = _gettoken());

#if 0

if(i == ID)...{

printf("%s",string);

}

if(i == NUM)...{

printf("%s",string);

}

if(i<20)...{

printf("%s",index[i]);

}else...{

printf("%c",i);

}

#endif

return sym;

}

int _gettoken()

...{

char *cp = instr;

for(;;)...{

if( *instr == 0)

return END;

/**//*

if( 可能讀入的字符 > 當前可用緩衝區大小 )

擴展緩衝區

//int,char,if,else,while,do,for

switch ( *instr )

...{

case 'i':

if( instr[1] == 'f' && notda(instr[2]) )

...{

instr+=2;return IF;

}

if( instr[1] == 'n' && instr[2] == 't' && notda(instr[3]) )

...{

instr+=3; return INT;

}

// not a keyword. but an id.

strstart = instr;

instr++;

goto id_label;

case 'c':

if( instr[1] == 'h' && instr[2] == 'a' && instr[3] == 'r' && notda(instr[4]) )

...{instr+=4;return CHAR; }

strstart = instr;

instr++;

goto id_label;

break;

case 'e':

if( instr[1] == 'l' && instr[2] == 's' && instr[3] == 'e' && notda(instr[4]) )

...{instr+=4;return ELSE; }

strstart = instr;

instr++;

goto id_label;

break;

case 'w':

if( instr[1] == 'h' && instr[2] == 'i' && instr[3] == 'l' && instr[4] == 'e' && notda(instr[5]) )

...{instr+=5;return WHILE; }

strstart = instr;

instr++;

goto id_label;

case 'd':

if( instr[1] == 'o' && notda(instr[4]) )

...{instr+=2;return DO; }

strstart = instr;

instr++;

goto id_label;

case 'f':

if( instr[1] == 'o' && instr[2] == 'r' && notda(instr[3]) )

...{instr+=3;return FOR; }

strstart = instr;

instr++;

goto id_label;

// deal with IDs.

// EXCLUDE:i,c,d,e,w,f

case 'a': ; case 'b': ;

case 'g': ; case 'h': ;

case 'j': ; case 'k': ; case 'l': ;

case 'm': ; case 'n': ; case 'o': ;

case 'p': ; case 'q': ; case 'r': ;

case 's': ; case 't': ; case 'u': ;

case 'v': ; case 'x': ;

case 'y': ; case 'z': ;

case 'A': ; case 'B': ;

case 'C': ; case 'D': ; case 'E': ;

case 'F': ; case 'G': ; case 'H': ;

case 'I': ; case 'J': ; case 'K': ;

case 'L': ; case 'M': ; case 'N': ;

case 'O': ; case 'P': ; case 'Q': ;

case 'R': ; case 'S': ; case 'T': ;

case 'U': ; case 'V': ; case 'W': ;

case 'X': ; case 'Y': ; case 'Z': ;

strstart = instr;

instr++;

goto id_label;

case '0': ;

case '1': ; case '2': ; case '3': ;

case '4': ; case '5': ; case '6': ;

case '7': ; case '8': ; case '9': ;

strstart = instr;

instr++;

goto num_label;

最後更新：2017-04-01 13:37:07

一個簡單的C語言詞法分析與語法分析器

上一篇： PostgreSQL VS Oracle OLTP 的測試方法 - 2

下一篇： OceanBase如何解決支付寶數據庫的高一致性

相關內容

熱門內容

最新內容