当前位置: 首页 > news >正文

编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

 

 

 

 

 

单词”分类说明 

标识符(Identifier):变量名和函数名(字母或下划线开头);

关键字(Keyword):系统保留字;

运算符(Operator): + - * / % = == != < <= > >= 等;

分隔符(Separator): , ; . ' " ( ) [ ] { } // /* */ #等;

常量(Constant): 字符串或字符常量;

注释(Note): 注释不参与编译。

 

 

 

程序清单:

/*

     ========================================================

                        词法分析器V1.0

         (DFA,C语言描述,可分析C/C++词法)

                       Author:Estrong

                          2012.05.10 

    ========================================================

*/

 

/*

说明:

    标识符(Identifier):变量名和函数名(字母或下划线开头);

    关键字(Keyword):具有特定的功能系统保留字,不能移作他用;

    运算符(Operator):   + - * / % = == != < <= > >= 等;

    分隔符(Separator):  , ; . ' " ( ) [ ] { } // /米 米/ #等;

    常量(Constant):字符串或字符常量;

    注释(Note): 注释不参与编译。

*/

#include "stdio.h"

#include "string.h"

 

#define MAX_STR 256

#define MAX_KEYWORDS 62

#define MAX_OPERATORS 12

#define MAX_SEPARATORS 15

 

/* 定义分析状态 */

#define STA_START 1

#define STA_IDorKEYWORD 2 /*   IDENTIFIER: 标识符   */

#define STA_NUMBER 3      /*   NUMBER:     数字     */

#define STA_NOTE 4        /*   NOTE:        注释     */

#define STA_CONSTANT 5    /*   CONSTANT:    常量     */

#define STA_DONE  6       /*   DONE:       完成     */

 

/* 定义所属类型 */

#define TYPE_KEYWORD 1    /*   KEYWORD:    保留字   */

#define TYPE_IDENTIFIER 2 /*   IDENTIFIER: 标识符   */

#define TYPE_NUMBER 3     /*   NUMBER:     数字     */

#define TYPE_NOTE 4       /*   NOTE:        注释     */

#define TYPE_CONSTANT 5   /*   CONSTANT:    常量     */

#define TYPE_OPERATOR 6   /*   OPERATOR:    运算符   */

#define TYPE_SEPARATOR 7  /*   SEPARATOR:   分隔符   */

#define TYPE_ERROR 8      /*   ERROR:       错误     */

#define TYPE_UNKNOWN 9    /*   UNKNOWN:     未知     */

#define TYPE_ENDFILE 10   /*   ENDFILE:     文件结束 */

 

char *Operators[MAX_OPERATORS] = {"+","-","*","/","%","=","==","!=","<","<=",">",">="};

char *Separators[MAX_SEPARATORS] ={",",";",".","\'","\"","(",")","[","]","{","}","//","/*","*/","#"};

char *Keywords[MAX_KEYWORDS] = {"include","define","auto","bool","break","case","catch","char","class",

                                "const","const_cast","continue","default","delete","do","double",

                                "dynamic_cast","else","enum","explicit","extern","false","float","for",

                                "friend","goto","if","inline","int","long","mutable","namespace","new",

                                "operator","private","protected","public","register","reinterpret_cast",

                                "return","short","signed","sizeof","static","static_cast","struct",

                                "switch","template","this","throw","true","try","typedef","typeid",

                                "typename","union","unsigned","using","virtual","void","volatile","while"};

 

/* 是否为运算符 */

int IsOperator(char c)

{

    int i;

    for(i=0;i<MAX_OPERATORS;i++)

        if(Operators[i][0]==c)

            return 1;

    return 0;

}

 

/* 是否为分隔符 */

int IsSeparator(char c)

{

    int i;

    for(i=0;i<MAX_SEPARATORS;i++)

        if(Separators[i][0]==c)

            return 1;

    return 0;

}

 

/* 是否为保留字 */

int IsKeyword(char *str)

{

    int i;

    for(i=0;i<MAX_KEYWORDS;i++)

        if(strcmp(Keywords[i],str)==0)

            return 1;

    return 0;

}

 

/* DONE一次输出一次 */

void OutputOneDone(FILE *outf,int type,char *str)

{

    if(IsKeyword(str)==1) type=TYPE_KEYWORD;

    switch(type)

    {

        case TYPE_KEYWORD:    fprintf(outf,"       KEYWORD:     ");break;

        case TYPE_IDENTIFIER: fprintf(outf,"       IDENTIFIER:  ");break;

        case TYPE_NUMBER:     fprintf(outf,"       NUMBER:      ");break;

        case TYPE_NOTE:       fprintf(outf,"       NOTE:        ");break;

        case TYPE_CONSTANT:   fprintf(outf,"       CONSTANT:    ");break;

        case TYPE_OPERATOR:   fprintf(outf,"       OPERATOR:    ");break;

        case TYPE_SEPARATOR:  fprintf(outf,"       SEPARATOR:   ");break;

        case TYPE_ERROR:      fprintf(outf,"       ERROR:       ");break;

        case TYPE_UNKNOWN:    fprintf(outf,"       UNKNOWN:     ");break;

        default:break;

    }

    fprintf(outf,"%s\n",str);

}

 

/* DFA词法分析函数 */

void LexAnalyse(FILE *inf,FILE *outf)

{

   char c;

   char str[MAX_STR];/* 过程字符串 */

   int i;

   int line_no=1;/* 行号 */

   int state,type;

   char flag_limit_one_line;/* 标志为 / * 注释 范围是一行 */

   char flag_had_got_dot;/* 用于限定小数中只能有一个小数点 */

   fprintf(outf,"Line %d--------------------------------------\n",line_no);

   while(!feof(inf))

   {

        i=0;

        state=STA_START;

        flag_limit_one_line=0;

        flag_had_got_dot=0;

        while(state!=STA_DONE)

        {

            c=fgetc(inf);

            switch(state)

            {

                case STA_START:

                                if( c==' ' || c=='\t');

                                else if(c=='\n')

                                {

                                    line_no++;

                                    fprintf(outf,"Line %d--------------------------------------\n",line_no);

                                }

                                else if( (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

                                {

                                    state=STA_IDorKEYWORD;

                                    type=TYPE_IDENTIFIER;

                                    str[i]=c; i++;

                                }

                                else if(c>='0' && c<='9')

                                {

                                    state=STA_NUMBER;

                                    type=TYPE_NUMBER;

                                    str[i]=c; i++;

                                }

                                else if(c=='/')

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if(c=='/')   /*   //注释    */

                                    {

                                        flag_limit_one_line=1;

                                        state=STA_NOTE;

                                        type=TYPE_NOTE;

                                        str[i]=c; i++;

 

                                    }

                                    else if(c=='*')       /*   / * 注释开始    */

                                    {

                                        state=STA_NOTE;

                                        type=TYPE_NOTE;

                                        str[i]=c; i++;

                                    }

                                    else

                                    {

                                        state=STA_DONE;

                                        type=TYPE_OPERATOR;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        i=1;/* str[1]='\0'; */

                                    }

                                }

                                else if(c=='<' || c=='>')

                                {

                                    state=STA_DONE;

                                    type=TYPE_OPERATOR;

                                    str[0]=c;

                                    c=fgetc(inf);

                                    if(c=='=')

                                    {

                                        str[1]='=';i=2;/* str[2]='\0'    */

                                    }

                                    else/* 暂未考虑<<,>>位移运算符的识别 */

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        i=1;/* str[1]='\0'; */

                                    }

                                }

                                else if(c=='!')

                                {

                                    state=STA_DONE;

                                    str[0]=c;

                                    c=fgetc(inf);

                                    if(c=='=')  /*    是!= */

                                    {

                                        type=TYPE_OPERATOR;

                                        str[1]='=';i=2;/* str[2]='\0'    */

                                    }

                                    else        /*    非!= */

                                    {

                                        type=TYPE_UNKNOWN;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        i=1;/* str[1]='\0'; */

                                    }

                                }

                                else if(c=='\"' || c=='\'')

                                {

                                    state=STA_CONSTANT;

                                    type=TYPE_CONSTANT;

                                    str[0]=c; i=1;

                                }

                                else if(IsOperator(c))

                                {

                                    state=STA_DONE;

                                    type=TYPE_OPERATOR;

                                    str[0]=c; i=1;/* str[1]='\0'; */

                                }

                                else if(IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    type=TYPE_SEPARATOR;

                                    str[0]=c; i=1;/* str[1]='\0'; */

                                }

                                else if(c==EOF)

                                {

                                    state=STA_DONE;

                                    type=TYPE_ENDFILE;

                                }

                                else

                                {

                                    state=STA_DONE;

                                    type=TYPE_UNKNOWN;

                                }

                                break;/* case STA */

                case STA_IDorKEYWORD:

                                if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') || c=='_' )

                                {

                                    str[i]=c; i++;

                                }

                                else if(c=='.')

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if((c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

                                    {

                                        str[i]=c; i++;

                                        /* type=TYPE_IDENTIFIER;     */

                                    }

                                    else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        state=STA_DONE;

                                        type=TYPE_ERROR;         /*  如 date. 或 date.1 标记为ERROR类型 */

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                        type=TYPE_ERROR;

                                    }

                                }

                                else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                }

                                else

                                {

                                    state=STA_DONE;

                                    type=TYPE_ERROR;

                                }

                                break;/* case STA */

                case STA_NUMBER:

                                if(c>='0' && c<='9')

                                {

                                    str[i]=c; i++;

                                }

                                else if(c=='.')   /* 小数识别  */

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if(flag_had_got_dot==0)

                                    {

                                    if(c>='0' && c<='9')

                                    {

                                        str[i]=c; i++;

                                        /* type=TYPE_NUMBER; */

                                        flag_had_got_dot=1;

                                    }

                                    else /* if(c==' ' || c=='\t' || c=='\n' || c>='0' && c<='9' || ...) */

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        state=STA_DONE;

                                        type=TYPE_ERROR;         /*  如 date. 或 date.1 标记为ERROR类型 */

                                    }

                                    }

                                    else

                                    {

                                        type=TYPE_ERROR;

                                        str[i]=c; i++;

                                    }

                                }

                                else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                }

                                else

                                {

                                    str[i]=c; i++;

                                    type=TYPE_ERROR;

                                }

                                break;/* case STA */

                case STA_NOTE:

                                if(flag_limit_one_line==1)   /*      是/ * 注释, 限定一行 */

                                {

                                    if(c=='\n')

                                    {

                                        state=STA_DONE;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                }

                                else /*      是  / *   注释 */

                                {

                                    if(feof(inf))

                                    {

                                        state=STA_DONE;

                                        type=TYPE_ERROR;

                                    }

                                    else if(c=='\n')

                                    {

                                        line_no++;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='*')

                                    {

                                        str[i]=c; i++;

                                        c=fgetc(inf);

                                        if(c=='/')

                                        {

                                            state=STA_DONE;

                                            str[i]=c; i++;

                                        }

                                        else

                                        {

                                            if(feof(inf))

                                            {

                                                state=STA_DONE;

                                                type=TYPE_ERROR;

                                            }

                                            fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        }

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                }

                                break;/* case STA */

                case STA_CONSTANT:

                                    if(feof(inf))

                                    {

                                        state=STA_DONE;

                                        type=TYPE_ERROR;

                                    }

                                    else if(c=='\n')

                                    {

                                        line_no++;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='\"' || c=='\'')

                                    {

                                        state=STA_DONE;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='\\')

                                    {

                                        str[i]=c; i++;

                                        c=fgetc(inf);

                                        str[i]=c; i++;

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                    break;/* case STA */

                case STA_DONE:  break;/* case STA */

                default:        break;/* case STA */

            }

        }/* state=STA_DONE */

        str[i]='\0';

        OutputOneDone(outf,type,str);/* DONE一次输出一次 */

   }/* feof(inf) */

}

 

/* 主函数 */

main()

{

    FILE *input,*output;

    if((input=fopen("input.c","r"))==NULL)

    {

        printf("Cannot find the file!\nStrike any key to exit!\n");

        system("pause");

        exit(1);

    }

    else

    {

        output=fopen("output.c","w");

        LexAnalyse(input,output);

        fprintf(output,"----------------------------------------------END OF FILE!\n");

        fclose(input);

        fclose(output);

        printf("Lexical Analyzer has finished the analyzation!\nFor more information please see the file output.txt.\n");

        system("pause");

    }

}

 

 

转载于:https://www.cnblogs.com/Estrong/archive/2012/11/05/2744548.html

相关文章:

  • SQL 表操作
  • Qt学习之路(44): QSortFilterProxyModel
  • UIimage图片在程序Documents目录下的存取
  • Qt学习之路(43): QDirModel
  • java “==”和“ equals”以及instanceof的区别
  • Qt学习之路(42): QStringListModel
  • The Clocks
  • 发布app store流程
  • Qt学习之路(41): QTableWidget
  • Qt学习之路(40): QTreeWidget
  • 配置EM遇到的问题
  • Qt学习之路(38): model-view架构
  • MAGENTO EVENT OBSERVER
  • Qt学习之路(39): QListWidget
  • iTextSharp 使用详解(转)
  • 【Linux系统编程】快速查找errno错误码信息
  • CSS进阶篇--用CSS开启硬件加速来提高网站性能
  • Git同步原始仓库到Fork仓库中
  • HTML中设置input等文本框为不可操作
  • HTTP传输编码增加了传输量,只为解决这一个问题 | 实用 HTTP
  • JAVA SE 6 GC调优笔记
  • Mac 鼠须管 Rime 输入法 安装五笔输入法 教程
  • Python - 闭包Closure
  • React16时代,该用什么姿势写 React ?
  • Terraform入门 - 1. 安装Terraform
  • 闭包--闭包作用之保存(一)
  • 仿天猫超市收藏抛物线动画工具库
  • 计算机在识别图像时“看到”了什么?
  • 如何使用 JavaScript 解析 URL
  • 入门到放弃node系列之Hello Word篇
  • 算法-插入排序
  • C# - 为值类型重定义相等性
  • 容器镜像
  • ​力扣解法汇总946-验证栈序列
  • #{} 和 ${}区别
  • (2.2w字)前端单元测试之Jest详解篇
  • (C#)获取字符编码的类
  • (done) NLP “bag-of-words“ 方法 (带有二元分类和多元分类两个例子)词袋模型、BoW
  • (NSDate) 时间 (time )比较
  • (pytorch进阶之路)CLIP模型 实现图像多模态检索任务
  • (安卓)跳转应用市场APP详情页的方式
  • (力扣)1314.矩阵区域和
  • (六)库存超卖案例实战——使用mysql分布式锁解决“超卖”问题
  • (一)认识微服务
  • ..thread“main“ com.fasterxml.jackson.databind.JsonMappingException: Jackson version is too old 2.3.1
  • .MSSQLSERVER 导入导出 命令集--堪称经典,值得借鉴!
  • .NET 2.0中新增的一些TryGet,TryParse等方法
  • .Net Remoting(分离服务程序实现) - Part.3
  • .Net Web项目创建比较不错的参考文章
  • .net 微服务 服务保护 自动重试 Polly
  • .NET/C# 使用 ConditionalWeakTable 附加字段(CLR 版本的附加属性,也可用用来当作弱引用字典 WeakDictionary)
  • .net中应用SQL缓存(实例使用)
  • .php结尾的域名,【php】php正则截取url中域名后的内容
  • @manytomany 保存后数据被删除_[Windows] 数据恢复软件RStudio v8.14.179675 便携特别版...
  • [ 蓝桥杯Web真题 ]-布局切换