简单词法分析器实现
編寫分析器有兩種方法,一種是通過DFA對單詞進行識別,二是通過直接編敲代碼進行識別。
#include<stdio.h> #include<string.h> #include<stdlib.h> //用指針而不是二維數組表示。這樣就不用指定字符串長度,僅僅是不能改動指針所指向字符串的內容 char *key[]={"int","char","float","double","void","const","for","if","else","then","while","switch","break","main","return"}; char buffer[20];//存儲當前識別出的單詞 char *identifier[50]; char *num[50]; int ident_num;//標志符數 int number;//數字數 int judgement_num(char c) {if (c >= '0' && c <= '9') return 0;else if (c == '.') return 1;return -1; } int judge(char c){if(c=='_') return 2;else if(c>='0'&&c<='9') return 1;else if(c>='a'&&c<='z'||c>='A'&&c<='Z') return 0;return -1; } //next_i,next_j分別代表著狀態轉換表move的當前狀態與接收到的字符 //width代表每一狀態可能遇到的狀態數,length代表終態集大小 int DFA(int begin,int move[],int width,int final[],int length,int(*judge)(char)){int len=0,next_i,next_j,tmp;char next_char;memset(buffer,0,sizeof(buffer));next_char=getchar();next_i=begin;while((next_j=judge(next_char))!=-1){tmp=next_i;next_i=move[next_i*width+next_j];if(next_i==-1){printf("move[%d][%d]has not next state\n",tmp,next_j);return -1;}buffer[len++]=next_char;next_char=getchar();}ungetc(next_char,stdin);buffer[len]='\0';for(int i=0;i<length;i++){if(next_i==final[i]) return 0;}return -1; }void is_comment(){char next_char;char tmp[5];memset(tmp,0,sizeof(tmp));tmp[0]=getchar();next_char=getchar();if(next_char=='*'){while((next_char=getchar())!=EOF){if(next_char=='*'&&getchar()=='/') return;}printf("The comment is error\n");}else if(next_char=='/'){while((next_char=getchar())!='\n');}else if(next_char=='='){printf("(/=,_)\n");}else{printf("(/,_)\n");ungetc(next_char,stdin);} } void is_other(){char next_char,c=getchar();if(c=='+'||c=='-'||c=='*'||c=='%'){next_char=getchar();if(next_char=='=') printf("(%c=,_)\n",c);else {printf("(%c,_)\n",c);ungetc(next_char,stdin);}}else if(c=='>'||c=='<'){next_char=getchar();if(next_char=='=') printf("(rlop,%c=)\n",c);else {printf("(rlop,%c)\n",c);ungetc(next_char,stdin);}}else if(c=='!'){next_char=getchar();if(next_char=='=') printf("(!=,_)\n");else {printf("(not,_)\n");ungetc(next_char,stdin);}}else if(c=='|'){next_char=getchar();if (next_char == '|')printf("(or,||)\n");else {ungetc(next_char, stdin);}}else if(c=='&'){next_char=getchar();if (next_char == '&')printf("(and,&&)\n");else {ungetc(next_char, stdin);}}else if(c=='='||c=='('||c==')'||c=='['||c==']'||c==';'||c==','||c=='{'||c=='}'){printf("(%c,_)\n",c);} } void is_digit(){int begin=0;int move[]={1,-1,1,2,2,-1};int final[]={1,2};int result=-1;result=DFA(begin,move,2,final,2,judgement_num);if(result==-1){printf("digit DFA error\n");exit(-1);}else if(result==0){//printf("%s\n",buffer);for(int i=0;i<number;i++){if(strcmp(buffer,num[i])==0){printf("(num,%s)\n",buffer);return;}}num[number]=(char*)malloc(sizeof(buffer));strcpy(num[number++],buffer);printf("(num,%s)\n",buffer);return;} } void is_letter(){int i;int begin=0;int move[] ={1,-1,1,1,1,1};int final[]={1,2};int result=-1;result=DFA(begin,move,3,final,1,judge);if(result==-1){printf("letter DFA error\n");exit(-1);}else if(result==0){int len=sizeof(key)/sizeof(char *);//如為keywordfor(i=0;i<len;i++){if(strcmp(buffer,key[i])==0){printf("(key,%s)\n",key[i]);return;}}//如為標志符for(i=0;i<ident_num;i++){//在當前標志符表中已經存在if(strcmp(buffer,identifier[i])==0){printf("(%id,%s)\n",identifier[i]);return;}}//如不存在,則寫入identifier[ident_num]=(char*)malloc(sizeof(buffer));strcmp(identifier[ident_num++],buffer);printf("(id,%s)\n",buffer);return;} }void init() {ident_num=0;number=0; } void work() {char c;while((c=getchar())!=EOF){ungetc(c,stdin);if(judge(c)==2||judge(c)==0) is_letter();else if(judge(c)==1) is_digit();else if(c=='/') is_comment();else is_other();} }int main() {freopen("in.txt","r",stdin);init();work();return 0; }
本程序採用DFA對單詞進行識別。
DFA的實現方法。大概思想和書上一致,在程序中,則是用二維數組代表狀態轉換矩陣,用一維數組表示終態。
一個詞法編輯要實現的功能主要包含下面幾點:
可以識別標識符、keyword、數字和運算符,對凝視進行過濾。同一時候還能識別出程序錯誤。
使用說明:
本程序的輸入由當前文件夾下的in.txt文件讀取輸入,輸出為一系列二元式#include<stdio.h> #include<string.h> #include<stdlib.h> //用指針而不是二維數組表示。這樣就不用指定字符串長度,僅僅是不能改動指針所指向字符串的內容 char *key[]={"int","char","float","double","void","const","for","if","else","then","while","switch","break","main","return"}; char buffer[20];//存儲當前識別出的單詞 char *identifier[50]; char *num[50]; int ident_num;//標志符數 int number;//數字數 int judgement_num(char c) {if (c >= '0' && c <= '9') return 0;else if (c == '.') return 1;return -1; } int judge(char c){if(c=='_') return 2;else if(c>='0'&&c<='9') return 1;else if(c>='a'&&c<='z'||c>='A'&&c<='Z') return 0;return -1; } //next_i,next_j分別代表著狀態轉換表move的當前狀態與接收到的字符 //width代表每一狀態可能遇到的狀態數,length代表終態集大小 int DFA(int begin,int move[],int width,int final[],int length,int(*judge)(char)){int len=0,next_i,next_j,tmp;char next_char;memset(buffer,0,sizeof(buffer));next_char=getchar();next_i=begin;while((next_j=judge(next_char))!=-1){tmp=next_i;next_i=move[next_i*width+next_j];if(next_i==-1){printf("move[%d][%d]has not next state\n",tmp,next_j);return -1;}buffer[len++]=next_char;next_char=getchar();}ungetc(next_char,stdin);buffer[len]='\0';for(int i=0;i<length;i++){if(next_i==final[i]) return 0;}return -1; }void is_comment(){char next_char;char tmp[5];memset(tmp,0,sizeof(tmp));tmp[0]=getchar();next_char=getchar();if(next_char=='*'){while((next_char=getchar())!=EOF){if(next_char=='*'&&getchar()=='/') return;}printf("The comment is error\n");}else if(next_char=='/'){while((next_char=getchar())!='\n');}else if(next_char=='='){printf("(/=,_)\n");}else{printf("(/,_)\n");ungetc(next_char,stdin);} } void is_other(){char next_char,c=getchar();if(c=='+'||c=='-'||c=='*'||c=='%'){next_char=getchar();if(next_char=='=') printf("(%c=,_)\n",c);else {printf("(%c,_)\n",c);ungetc(next_char,stdin);}}else if(c=='>'||c=='<'){next_char=getchar();if(next_char=='=') printf("(rlop,%c=)\n",c);else {printf("(rlop,%c)\n",c);ungetc(next_char,stdin);}}else if(c=='!'){next_char=getchar();if(next_char=='=') printf("(!=,_)\n");else {printf("(not,_)\n");ungetc(next_char,stdin);}}else if(c=='|'){next_char=getchar();if (next_char == '|')printf("(or,||)\n");else {ungetc(next_char, stdin);}}else if(c=='&'){next_char=getchar();if (next_char == '&')printf("(and,&&)\n");else {ungetc(next_char, stdin);}}else if(c=='='||c=='('||c==')'||c=='['||c==']'||c==';'||c==','||c=='{'||c=='}'){printf("(%c,_)\n",c);} } void is_digit(){int begin=0;int move[]={1,-1,1,2,2,-1};int final[]={1,2};int result=-1;result=DFA(begin,move,2,final,2,judgement_num);if(result==-1){printf("digit DFA error\n");exit(-1);}else if(result==0){//printf("%s\n",buffer);for(int i=0;i<number;i++){if(strcmp(buffer,num[i])==0){printf("(num,%s)\n",buffer);return;}}num[number]=(char*)malloc(sizeof(buffer));strcpy(num[number++],buffer);printf("(num,%s)\n",buffer);return;} } void is_letter(){int i;int begin=0;int move[] ={1,-1,1,1,1,1};int final[]={1,2};int result=-1;result=DFA(begin,move,3,final,1,judge);if(result==-1){printf("letter DFA error\n");exit(-1);}else if(result==0){int len=sizeof(key)/sizeof(char *);//如為keywordfor(i=0;i<len;i++){if(strcmp(buffer,key[i])==0){printf("(key,%s)\n",key[i]);return;}}//如為標志符for(i=0;i<ident_num;i++){//在當前標志符表中已經存在if(strcmp(buffer,identifier[i])==0){printf("(%id,%s)\n",identifier[i]);return;}}//如不存在,則寫入identifier[ident_num]=(char*)malloc(sizeof(buffer));strcmp(identifier[ident_num++],buffer);printf("(id,%s)\n",buffer);return;} }void init() {ident_num=0;number=0; } void work() {char c;while((c=getchar())!=EOF){ungetc(c,stdin);if(judge(c)==2||judge(c)==0) is_letter();else if(judge(c)==1) is_digit();else if(c=='/') is_comment();else is_other();} }int main() {freopen("in.txt","r",stdin);init();work();return 0; }
總結
- 上一篇: 21、Java并发性和多线程-Java中
- 下一篇: 2D缩放