编译原理头歌实验:词法分析程序设计与实现(C语言版)
我们需要对五种单词符号进行识别分析,这里将单词符号分为三大块进行识别。首先判断字符是否为关键字或者标识符,并与已定义好的关键字进行比较,从而判断为关键字或者标识符;然后是数字的识别;最后是其他字符的判断,它们被一一定义好的判断进行识别,这样所有的字符便被识别出来了。标示符和关键字的判断if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
编译原理头歌实验:词法分析程序设计与实现(C语言版)
1.实验描述
任务描述
本关任务:加深对词法分析器的工作过程的理解;加强对词法分析方法的掌握;能够采用一种编程语言实现简单的词法分析程序;能够使用自己编写的分析程序对简单的程序段进行词法分析。
编程要求
根据提示,在右侧编辑器补充代码标示符、数字符及其他字符符号的识别程序后,点击评测运行程序,系统会自动进行结果对比。
测试说明
平台会对你编写的代码进行测试:
测试输入:
using namespace std; int main() {
int year;
cout << “hello” << endl;
return 0; }
2.实验操作提示
2.1定义目标语言的可用符号表和构词规则。
我们需要对五种单词符号进行识别分析,这里将单词符号分为三大块进行识别。首先判断字符是否为关键字或者标识符,并与已定义好的关键字进行比较,从而判断为关键字或者标识符;然后是数字的识别;最后是其他字符的判断,它们被一一定义好的判断进行识别,这样所有的字符便被识别出来了。标示符和关键字的判断
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) //可能是标示符或者关键字
字符与关键字的区别通过对比得出:
if (strcmp(token, rwtab1[n]) == 0)
{
syn = 2;
break;
}
else if (strcmp(token, rwtab[n]) == 0) {
syn = 1;
break;
}
}
对于数字的识别:
else if ((ch >= '0' && ch <= '9')) //数字
其他字符的识别,他们被一一定义进行识别:
else switch (ch) //其他字符
{
case'<':m = 0; token[m++] = ch;
ch = prog[p++];
if (ch == '>')
{
syn = 4;
token[m++] = ch;
}
else if (ch == '=')
{
syn = 4;
token[m++] = ch;
}
else
{
syn = 4;
p--;
}
break;
case'>':m = 0; token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 4;
token[m++] = ch;
}
else
{
syn = 4;
p--;
}
break;
case':':m = 0; token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 4;
token[m++] = ch;
}
else
{
syn = 4;
p--;
}
break;
case'*':syn = 4; token[0] = ch; break;
case'/':syn = 4; token[0] = ch; break;
case'+':syn = 4; token[0] = ch; break;
case'-':syn = 4; token[0] = ch; break;
case'=':syn = 4; token[0] = ch; break;
case';':syn = 5; token[0] = ch; break;
case',':syn = 5; token[0] = ch; break;
case'(':syn = 5; token[0] = ch; break;
case')':syn = 5; token[0] = ch; break;
case'{':syn = 5; token[0] = ch; break;
case'}':syn = 5; token[0] = ch; break;
case'#':syn = 0; token[0] = ch; break;
case'\n':syn = -2; break;
default: syn = -1; break;
}
}
依次读入源程序符号,对源程序进行单词切分和识别,直到源程序结束。
字符的输入我们使用cin.get() 获取,并切分保存在 prog中:
p = 0;
row = 1;
cout << "Please input string:" << endl;
do
{
cin.get(ch);
prog[p++] = ch;
} while (ch != '#');
p = 0;
对正确的单词,按照它的种别以<种别码,值>的形式保存在符号表中;
对不正确的单词,做出错误处理。
单词识别后,我们对返回的符号按3,4的规则进行输出:
{
scaner();
switch (syn)
{
case 0: break;
case 3: cout << "(" << syn << "," << sum << ")" << endl; break;
case -1: cout << "Error in row " << row << "!" << endl; break;
case -2: row = row++; break;
default: cout << "(" << syn << "," << token << ")" << endl; break;
}
} while (syn != 0);
3.实验代码展示
#include <stdio.h>
#include <string.h>
#include <iostream>
using namespace std;
char prog[1000], token[20];
char ch;
int syn, p, m = 0, n, row = 1, sum = 0;
// 扩充关键字表
const char* rwtab[10] = { "if","int","for","while","do","return","break","continue", "using", "namespace" };
const char* rwtab1[8] = { "main","a","b","c","d","e","f","g" };
void scaner()
{
// 初始化token数组
for (n = 0; n < 20; n++) token[n] = '\0';
// 跳过空白字符
while (p < strlen(prog) && (prog[p] == ' ' || prog[p] == '\t' || prog[p] == '\n'))
{
if (prog[p] == '\n')
row++;
p++;
}
if (p >= strlen(prog))
{
syn = 0;
return;
}
ch = prog[p++];
// 进行标示符或者关键字的识别
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
{
m = 0;
while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))
{
token[m++] = ch;
if (p >= strlen(prog))
break;
ch = prog[p++];
}
token[m] = '\0';
if (p < strlen(prog))
p--;
syn = 2;
for (n = 0; n < 10; n++) // 更新关键字表长度
{
if (strcmp(token, rwtab[n]) == 0)
{
syn = 1;
break;
}
}
}
// 进行数字的识别
else if (ch >= '0' && ch <= '9')
{
sum = 0;
while (ch >= '0' && ch <= '9')
{
sum = sum * 10 + (ch - '0');
if (p >= strlen(prog))
break;
ch = prog[p++];
}
if (p < strlen(prog))
p--;
syn = 3;
}
// 进行其他字符的识别
else
{
switch (ch)
{
case '<':
m = 0;
token[m++] = ch;
if (p < strlen(prog) && prog[p] == '<')
{
token[m++] = prog[p++];
syn = 4;
}
else
{
syn = 4;
p--;
}
break;
case '>':
m = 0;
token[m++] = ch;
if (p < strlen(prog) && prog[p] == '=')
{
token[m++] = prog[p++];
syn = 4;
}
else
{
syn = 4;
p--;
}
break;
case ':':
m = 0;
token[m++] = ch;
if (p < strlen(prog) && prog[p] == '=')
{
token[m++] = prog[p++];
syn = 4;
}
else
{
syn = 4;
p--;
}
break;
case '*':
syn = 4;
token[0] = ch;
break;
case '/':
syn = 4;
token[0] = ch;
if (p < strlen(prog) && prog[p] == '/') {
token[1] = '/';
syn = 5; // 注释符号作为界符处理
p++;
}
break;
case '+':
syn = 4;
token[0] = ch;
break;
case '-':
syn = 4;
token[0] = ch;
break;
case '=':
syn = 4;
token[0] = ch;
break;
case ';':
syn = 5;
token[0] = ch;
break;
case ',':
syn = 5;
token[0] = ch;
break;
case '(':
syn = 5;
token[0] = ch;
break;
case ')':
syn = 5;
token[0] = ch;
break;
case '{':
syn = 5;
token[0] = ch;
break;
case '}':
syn = 5;
token[0] = ch;
break;
case '#':
syn = 0;
token[0] = ch;
break;
case '"':
syn = 5;
token[0] = ch;
break;
default:
syn = -1;
break;
}
}
}
int main()
{
// 输入
p = 0;
cout << "Please input string:" << endl;
do
{
cin.get(ch);
prog[p++] = ch;
} while (ch != '#');
prog[p] = '\0';
p = 0;
// 输出
do
{
scaner();
switch (syn)
{
case 0:
break;
case 3:
cout << "(" << syn << "," << sum << ")" << endl;
break;
case -1:
cout << "Error in row " << row << "!" << endl;
break;
default:
cout << "(" << syn << "," << token << ")" << endl;
break;
}
} while (syn != 0);
return 0;
}

这个词法分析器通过逐字符读取输入,根据字符的类型和上下文规则,识别出关键字、标识符、数字、运算符和分隔符。它能正确处理简单的 C++ 代码段,并输出每个记号的类型和内容。程序设计清晰,逻辑分明,适合学习词法分析的基本原理。
希望这个讲解能帮助你深入理解代码的实现过程!如果有疑问,欢迎评论区交流!
更多推荐


所有评论(0)