NEMU | KuangjuX(狂且)

Get Started 起步

Description 描述

PA讲述的是一个"先驱创造计算机"的故事。

本实践课程所要设计的“NEMU”一款经过简化的 x86 全系统模拟器。

PA0

寄存器结构体

在框架的源码中，为了考察我们寄存器的结构，作者故意地使用了错误的数据结构来描述寄存器。使用结构体来描述寄存器的内部结构，会错误地计算寄存器的地址空间。因此我们首先需要将结构体改为联合体来描述寄存器的地址。

在include\cpu\reg.h文件里修改寄存器的描述

typedef struct {
	union {
		union{
			uint32_t _32;
			uint16_t _16;
			uint8_t _8[2];
		}gpr[8];

		struct 
		{
			uint32_t eax, ecx, edx, ebx, esp, ebp, esi, edi;
		};
		
	};
	//store 
	swaddr_t eip;

} CPU_state;

PA1

单步执行

完成这个操作很简单，因为框架已经实现了cpu_exec()函数模拟cpu的调用，我们只需读取命令行参数并且进行函数的调用就可以完成。

static int cmd_si(char *args){
	//initliza the step
	int step = 0;
	if(args == NULL)step=1;
	else
	{
		sscanf(args,"%d",&step);

	}
	cpu_exec(step);
	return 0;
	
}

打印寄存器

本题提供两个参数，‘r’表示打印寄存器状态，‘w’表示打印监视点，因为监视点需要和后面的任务有关，这里暂时先不讲。打印寄存器很简单，只需要调用寄存器的状态并将其打印出来即可，我们定义一个printRegisters函数：

void printRegisters(){
	printf("eax: 0x%-10x  %-10d\n", cpu.eax, cpu.eax);
	printf("edx: 0x%-10x  %-10d\n", cpu.edx, cpu.edx);
	printf("ecx: 0x%-10x  %-10d\n", cpu.ecx, cpu.ecx);
	printf("ebx: 0x%-10x  %-10d\n", cpu.ebx, cpu.ebx);
	printf("ebp: 0x%-10x  %-10d\n", cpu.ebp, cpu.ebp);
	printf("esi: 0x%-10x  %-10d\n", cpu.esi, cpu.esi);
	printf("esp: 0x%-10x  %-10d\n", cpu.esp, cpu.esp);
	printf("eip: 0x%-10x  %-10d\n", cpu.eip, cpu.eip);

}

扫描内存

扫描内存提供两个参数选项，扫描的地址和地址附近字节的内存扫描。因此我们只需要对命令行的两个参数分别进行处理就可以了。扫描的地址有简单和复杂两种状态。简单状态即输入16进制的数，复杂则可以输入表达式，复杂状态我们需要调用下一问中的表达式求值的函数进行解析。

表达式只能是16进制数：

static int cmd_x(char* args){
	if(args == NULL) return 0;
	uint32_t num = 0, addr;
	while(args[0] == ' ')++args;	//trim
	while('0' <= args[0] && args[0] <= '9') num = (num << 3) + (num << 1) + (args[0] & 15), ++args;
	//get num
	while(args[0] == ' ')++args; //trim
	//get address
	if(args[0] == '0' && args[1] == 'x'){
		args = args + 2;
		addr = read_address(args);

		while(num) {
			printf("address 0x%x:", addr);
			int i;
			for(i = 0;i < 4; i++)printf(" 0x%x", swaddr_read(addr + i, 1));
			printf("\n");
			addr += 4;
			--num;
		}
		return 0;
	}else{
		printf("\033[1;31mInvalid expression\n\033[0m");
		return 0;
	}

}

首先清除空格，然后读入数字并将数字求值，然后再读入地址将地址进行求值，我们写了一个解析地址的函数read_address()进行解析。

//read address
uint32_t read_address(char *args){
	uint32_t address;
	address = 0;
	while(('0' <= args[0] && args[0] <= '9') || ('a' <= args[0] && args[0] <= 'f') || ('A' <= args[0] && args[0] <= 'F')){
				if('0' <= args[0] && args[0] <= '9') 
                    address = (address<<4)+((args[0]-'0'));
				if('a' <= args[0] && args[0] <= 'f') 
                    address = (address<<4)+((args[0]-'a')+9);
				if('A' <= args[0] && args[0] <= 'F') a
                    ddress = (address<<4)+((args[0]-'A')+9);
				++args;
			}
    return address;
}

首先判断是否输入的是否为16进制数，如果不是则返回，否则则解析地址。将数字和地址解析完成后就可以使用内置的swaddr_read函数进行读取地址信息。

数学表达式求值

在本题中，我们主要需要为表达式添加规则，使用正则表达式为表达式每个可能出现的字符添加规则，并为其制定优先级来对其进行词法分析，最后再对其进行递归求值。

首先完成字符的正则表达式的匹配：

static struct rule {
	char *regex;
	int token_type;
} rules[] = {

	/* TODO: Add more rules.
	 * Pay attention to the precedence level of different rules.
	 */

	{" +",	NOTYPE},				//spaces
	{"\\+", PLUS},					//plus
	{"-", MINUS},					//minus
	{"\\*", STAR},					//star
	{"/", DIV},						//div
	{"==", EQ},						//eq
	{"!=", NOTEQ},					//noteq
	{"&&", AND},					//and
	{"\\|\\|", OR},					//or
	{"!", NOT},						//not
	{"\\(", LB},					//lb
	{"\\)", RB},					//rb
	{"0[xX][0-9a-zA-Z]+", HEX},		//hex
	{"[0-9]+", DEC},				//dec
	{"\\$[a-z]+", REG}				//reg
};

在代码中我们为可能出现的字符进行了正则表达式的匹配。然后我们需要将识别出的token信息加入tokens数组中。在for循环中，用regexec()函数匹配目标文本字符串和前面定义的rules[i]中的正则表达式比较，pmatch.rm_so == 0表示匹配串在目标串的第一个位置，pmatch.rm_eo表示结束位置，position和substr_len表示读取完后的位置和读取长度。成功识别得到该字符或者字符串的对应规则后，用switch语句将表达式中每一个部分用对应的数字表示type，复制到tokens[nr_token].str中，用strncpy函数，末尾加上\0表示字符串。

static bool make_token(char *e) {
	int position = 0;
	int i;
	regmatch_t pmatch;
	
	nr_token = 0;

	while(e[position] != '\0') {
		/* Try all rules one by one. */
		for(i = 0; i < NR_REGEX; i ++) {
			if(regexec(&re[i], e + position, 1, &pmatch, 0) == 0 && pmatch.rm_so == 0) {
				//char *substr_start = e + position;
				int substr_len = pmatch.rm_eo;
				//Log("match rules[%d] = \"%s\" at position %d with len %d: %.*s", i, rules[i].regex, position, substr_len, substr_len, substr_start);
				position += substr_len;

				/* TODO: Now a new token is recognized with rules[i]. Add codes
				 * to record the token in the array `tokens'. For certain types
				 * of tokens, some extra actions should be performed.
				 */

				switch(rules[i].token_type) {
					case NOTYPE:
						break;											//It's blank!
					case HEX:case DEC:case REG:
						strncpy(tokens[nr_token].str, e + position - substr_len, substr_len);//regs or number
						tokens[nr_token].str[substr_len] = '\0';		//add '\0', it's very important
						//WARNING: 64 may be a little small...
					default:
						if(rules[i].token_type == MINUS) {	//solve neg
							if(nr_token == 0) tokens[nr_token++].type = NEG;
							else if(PLUS <= tokens[nr_token - 1].type && tokens[nr_token - 1].type <= LB) {
								tokens[nr_token++].type = NEG;
							} else tokens[nr_token++].type = MINUS;
						} else if(rules[i].token_type == STAR) { //solve pointer
							if(nr_token == 0) tokens[nr_token++].type = POINTER;
							else if(PLUS <= tokens[nr_token - 1].type && tokens[nr_token - 1].type <= LB) {
								tokens[nr_token++].type = POINTER;
							} else tokens[nr_token++].type = STAR;
						} else {
							tokens[nr_token++].type = rules[i].token_type;	//other	
						}
						break;
					//panic("please implement me");
				}
				break;
			}
		}

		if(i == NR_REGEX) {
			printf("no match at position %d\n%s\n%*.s^\n", position, e, position, "");
			return false;
		}
	}

	return true; 
}

然后我们需要需要将符号匹配的函数补全，函数提供了三个参数，表达式左侧指针l，右侧指针r，以及表示匹配结果的指针sucess，我们需要从左至右遍历token数组，检查其符号是否匹配，遍历的时候如果有左括号咋cnt加1，有右括号则将cnt减1，如果最终cnt不等于0则表示符号不匹配。如果i不等于r且cnt等于0时在表达式中有类似()()这种括号，需要将flag设置为0，否则则设为1。

bool check_parentheses(int l, int r, bool *success) {//Check the parentheses, use stack.
	*success = true;
	if(l > r) return *success = false;
	int cnt = 0, flag = 1, i;		//A simple stack
	for(i = l;i <= r; i++){
		if(tokens[i].type == LB) ++cnt;
		if(tokens[i].type == RB) --cnt;
		if(cnt < 0)	return *success = false;//Bad
		if(i != r && cnt == 0) flag = 0;
	}
	if(cnt != 0) return *success = false;
	return flag;
}

最后我们需要将eval函数填充完，在expr函数中我们调用了make_token函数和eval函数，make_token来判断表达式的格式是否正确，而在eval中我们则需要对表达式进行求值，同check_parentheses函数一样，eval也接受同样三个参数。如果传入的参数l>r则直接将success设为false返回，如果l==r，则需要判断表达式是否为十进制数，十六进制数或者寄存器，若为这其中一种，则返回，否则表达式无意义，返回success为false。

当l>r的时候，则需要先调用check_parentheses函数进行符号判断，如果符号不匹配则直接将success设为false返回。接着判断flag是否为1（即没有类似()()这种括号），为1的情况下则清除左右括号进行递归。否则需要遍历表达式将括号删掉，并且解决优先级的问题。

我们遍历所有的tokens，将符号优先级进行比较，找到优先级最低的符号，设置为now，将now的左边和now的右边分开方便递归。由于取反，取负，指针解引用都是对单侧进行操作且优先级最高，所以需要单独拿出来进行讨论，而其他符号则是对now指针的双侧进行操作，所以需要分别将两侧表达式求值进行递归处理。

最后eval函数及expr函数代码如下:

uint32_t eval(int l, int r, bool *success) {
	*success = true;
	if(l > r) return *success = false;// Bad Expression !!
	if(l == r){				//It's a number or reg, otherwise bad expression
		uint32_t tmp;
		if(tokens[l].type == HEX) {
			sscanf(tokens[l].str, "%x", &tmp);
			return tmp;
		} else if(tokens[l].type == DEC) {
			sscanf(tokens[l].str, "%d", &tmp);
			return tmp;
		} else if(tokens[l].type == REG) {	//read register
			if(strcmp(tokens[l].str + 1, "eax") == 0) return cpu.eax;
			if(strcmp(tokens[l].str + 1, "ecx") == 0) return cpu.ecx;
			if(strcmp(tokens[l].str + 1, "edx") == 0) return cpu.edx;
			if(strcmp(tokens[l].str + 1, "ebx") == 0) return cpu.ebx;
			if(strcmp(tokens[l].str + 1, "esp") == 0) return cpu.esp;
			if(strcmp(tokens[l].str + 1, "ebp") == 0) return cpu.ebp;
			if(strcmp(tokens[l].str + 1, "esi") == 0) return cpu.esi;
			if(strcmp(tokens[l].str + 1, "edi") == 0) return cpu.edi;
			if(strcmp(tokens[l].str + 1, "eip") == 0) return cpu.eip;
			return *success = false; 
		}
		return *success = false;
	}
	bool flag = check_parentheses(l, r, success);
	if(!success) return 0;						//Bad
	if(flag) return eval(l + 1, r - 1, success);//OK, remove parentheses
	//Now we should find the dominant token
	int now = -1, type = 0x3f3f3f3f, cnt = 0, i;
	for(i = l; i <= r; i++) {
		if(tokens[i].type == LB) ++cnt;
		if(tokens[i].type == RB) --cnt;
		if(cnt != 0) continue;	//In mathched parentheses, pass
		if(PLUS <= tokens[i].type && tokens[i].type <= POINTER) {
			if(type >= PRE[tokens[i].type]) type = PRE[tokens[i].type], now = i;
		}
	}
	assert(now != -1);
	uint32_t a, b;
	//solve '!'
	if(tokens[now].type >= NOT) {
		//if type>=not, which means other token has been solved
		//so the first token must be NOT or NEG or POINTER
		b = eval(l + 1, r, success);
		if(!(*success)) return *success = false;
		if(tokens[l].type == NOT) return !b;
		if(tokens[l].type == NEG) return -b;
		if(tokens[l].type == POINTER) return swaddr_read(b, 1);
		return *success = false;
	}
	a = eval(l, now - 1, success);
	if(!(*success))return *success = false;
	b = eval(now + 1, r ,success);
	if(!(*success))return *success = false;
	if(tokens[now].type == PLUS) return a + b;
	if(tokens[now].type == STAR) return a * b;
	if(tokens[now].type == DIV) return a / b;	
	if(tokens[now].type == MINUS) return a - b;
	if(tokens[now].type == EQ) return a == b;
	if(tokens[now].type == NOTEQ) return a != b;
	if(tokens[now].type == AND) return a && b;
	if(tokens[now].type == OR) return a || b;
	return 0;
}

uint32_t expr(char *e, bool *success) {
	if(!make_token(e)) {
		*success = false;
		return 0;
	}
	/* TODO: Insert codes to evaluate the expression. */
	//panic("please implement me");
	//Calculate the value
	return eval(0, nr_token - 1, success);//call eval to calculate the value of expression e
}

监视点

本题较简单，即用一个链表结构维护一个监视点池。即需要实现几个函数，初始化监视点池、新申请一个监视点，释放一个监视点，向监视点插入表达式等方法。

初始化监视点池即将监视点头结点设为空，空闲节点设为整个监视点池

void init_wp_pool() {
	int i;
	for(i = 0; i < NR_WP; i ++) {
		wp_pool[i].NO = i;
		wp_pool[i].next = &wp_pool[i + 1];
	}
	wp_pool[NR_WP - 1].next = NULL;

	head = NULL;
	free_ = wp_pool;
}

新申请一个监视点需要将head节点指向空闲节点，再将空闲节点指向其next节点

WP* new_wp() {
	assert(free_ != NULL);
	WP* ans = free_;
	free_ = ans->next;
	ans->next = NULL;
	return ans;
}

移除节点则需要将这个节点从链表中移除出去。

int removeNode(int id) {
	if(head == NULL) return 0;
	if(head->NO == id){
		WP* tmp = head;
		head = head->next;
		wp_free(tmp);
		return 1;
	}
	WP* now = head;
	while(now->next != NULL) {
		if(now->next->NO == id) {
			WP* tmp = now->next;
			now->next = now->next->next;
			wp_free(tmp);
			return 1;
		}
		now = now->next;
	}
	return 0;
}

在ui.c的函数中只需要读入参数，调用这些维护监视点池的函数即可。