MIT 6.S081 | 0x0a Mmap
You should implement enough
mmap
andmunmap
functionality to make themmaptest
test program work. Ifmmaptest
doesn’t use ammap
feature, you don’t need to implement that feature.
本节实验严重参考了 Millgon’s blog 的做法
分析
之前都没怎么接触过这俩函数,于是问了下通义千问。
函数解释
mmap()
和 munmap()
是在Unix和类Unix操作系统(如Linux)中用于内存映射的系统调用。这些函数允许用户空间程序直接访问内核管理的内存或文件,提供了一种高效的数据访问机制。
mmap()
mmap()
函数的主要作用是将一个文件或其它对象映射到进程的地址空间中,这样文件的内容就可以像访问普通内存一样进行读写。当一个文件被映射后,对映射区域的任何修改都会反映到原始文件上,除非使用了某种特殊的映射标志来改变这种行为。
mmap()
的基本原型如下:
void *;
参数解释:
addr
:建议的映射起始地址,通常设为NULL
让内核选择合适的地址。length
:要映射的字节数。prot
:页面保护标志,指定映射区域的权限,例如读、写等。flags
:映射类型和特殊标志,例如MAP_SHARED
(共享映射)或MAP_PRIVATE
(私有拷贝映射)。fd
:文件描述符,通常是通过open()
打开的文件。offset
:文件的偏移量,映射的起点位置。
munmap()
munmap()
函数的作用是取消之前通过mmap()
创建的内存映射。一旦调用munmap()
,对原来映射区域的访问将不再有效,且对该区域的任何引用都会导致段错误(Segmentation fault)。
munmap()
的基本原型如下:
int ;
参数解释:
addr
:映射区域的起始地址,这是mmap()
返回的地址。length
:映射区域的长度,与mmap()
中的length
相同。
这两个函数提供了非常强大的功能,可以用于实现高效的文件I/O、进程间的共享内存通信以及动态链接库的加载等场景。然而,由于它们直接操作内核资源,因此在使用时需要特别小心,确保正确的地址和长度被传递给函数,避免内存泄漏或其他潜在的错误。
实现思路

mmap()
是将一个文件映射到进程的地址空间,看一下进程在内存中的布局,这个映射的起始地址可以选为 TRAPEFRAME
之前的空闲区域。内存的映射和懒加载有点像 COW 那节实验做的事,mmap()
时只需要记录一下映射的信息,然后在缺页异常时再去加载文件内容。进程数据也需要记录一下这个映射信息,方便后续的各种操作。总结一下,需要做以下几个事情:
mmap()
:在proc
结构体中增加一个vma
数组,用于存储虚拟内存区域的信息。在sysfile.c
中实现sys_mmap()
函数,用于处理mmap()
系统调用。在trap.c
中处理缺页异常,实现懒加载。munmap()
:在sysfile.c
中实现sys_munmap()
函数,用于处理munmap()
系统调用。- 在
allocproc()
和freeproc()
中初始化和释放vma
。在fork()
中复制vma
。
代码
diff --git a/Makefile b/Makefile
index 9b83591..5f6af9c 100644
$U/_grind\
$U/_wc\
$U/_zombie\
+ $U/_mmaptest\
diff --git a/kernel/defs.h b/kernel/defs.h
index a3c962b..14605aa 100644
struct sleeplock;
struct stat;
struct superblock;
+struct vma;
// bio.c
void binit(void);
int fetchaddr(uint64, uint64*);
void syscall();
+// sysfile.c
+int vmatryalloc(uint64 va);
+void vmaunmap(pagetable_t pagetable, uint64 va, uint64 sz, struct vma *v);
+
// trap.c
extern uint ticks;
void trapinit(void);
diff --git a/kernel/memlayout.h b/kernel/memlayout.h
index 776f98c..61c030c 100644
// TRAPFRAME (p->trapframe, used by the trampoline)
// TRAMPOLINE (the same page as in the kernel)
#define TRAPFRAME (TRAMPOLINE - PGSIZE)
+
+// Used for virtual memory areas
+#define VMAEND TRAPFRAME
diff --git a/kernel/param.h b/kernel/param.h
index 6624bff..3b99e82 100644
#define NPROC 64 // maximum number of processes
#define NCPU 8 // maximum number of CPUs
+#define NVMA 16 // maximum number of virtual memory areas
#define NOFILE 16 // open files per process
#define NFILE 100 // open files per system
#define NINODE 50 // maximum number of active i-nodes
diff --git a/kernel/proc.c b/kernel/proc.c
index 959b778..efd03c7 100644
p->context.ra = (uint64)forkret;
p->context.sp = p->kstack + PGSIZE;
+ for (int i = 0; i < NVMA; i++) {
+ p->vma[i].valid = 0;
+ }
+
return p;
}
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;
+ for (int i = 0; i < NVMA; i++) {
+ struct vma *v = &p->vma[i];
+ vmaunmap(p->pagetable, v->addr, v->sz, v);
+ }
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
+ for (i = 0; i < NVMA; i++) {
+ struct vma *v = &p->vma[i];
+ if (v->valid) {
+ np->vma[i] = *v;
+ filedup(v->f);
+ }
+ }
+
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
diff --git a/kernel/proc.h b/kernel/proc.h
index d021857..e1ff755 100644
/* 280 */ uint64 t6;
};
+// Virtual memory areas
+struct vma {
+ int valid;
+
+ uint64 addr;
+ int sz;
+ struct file *f;
+ int prot;
+ int flags;
+ int offset;
+};
+
enum procstate { UNUSED, USED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
// Per-process state
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
+ struct vma vma[NVMA]; // Virual memory areas
};
diff --git a/kernel/riscv.h b/kernel/riscv.h
index 20a01db..3b14dca 100644
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // user can access
+#define PTE_G (1L << 5) // global mapping
+#define PTE_A (1L << 6) // accessed
+#define PTE_D (1L << 7) // dirty
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
diff --git a/kernel/syscall.c b/kernel/syscall.c
index ed65409..4fb9baa 100644
extern uint64 sys_link(void);
extern uint64 sys_mkdir(void);
extern uint64 sys_close(void);
+extern uint64 sys_mmap(void);
+extern uint64 sys_munmap(void);
// An array mapping syscall numbers from syscall.h
// to the function that handles the system call.
[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close,
+[SYS_mmap] sys_mmap,
+[SYS_munmap] sys_munmap,
};
void
diff --git a/kernel/syscall.h b/kernel/syscall.h
index bc5f356..e7b18d6 100644
#define SYS_link 19
#define SYS_mkdir 20
#define SYS_close 21
+#define SYS_mmap 22
+#define SYS_munmap 23
diff --git a/kernel/sysfile.c b/kernel/sysfile.c
index 16b668c..849bdd2 100644
#include "sleeplock.h"
#include "file.h"
#include "fcntl.h"
+#include "memlayout.h"
// Fetch the nth word-sized system call argument as a file descriptor
// and return both the descriptor and the corresponding struct file.
}
return 0;
}
+
+uint64
+sys_mmap(void)
+{
+ uint64 addr;
+ int sz, prot, flags, fd, offset;
+ struct file *f;
+ struct proc *p = myproc();
+
+ argaddr(0, &addr);
+ argint(1, &sz);
+ argint(2, &prot);
+ argint(3, &flags);
+ if (argfd(4, &fd, &f) < 0)
+ return -1;
+ argint(5, &offset);
+
+ if ((!f->readable && (prot & PROT_READ)) || (!f->writable && (prot & PROT_WRITE) && !(flags & MAP_PRIVATE)))
+ return -1;
+
+ sz = PGROUNDUP(sz);
+
+ struct vma* v = 0;
+
+ uint64 vmaend = VMAEND;
+
+ for (int i = 0; i < NVMA; i++) {
+ if (!p->vma[i].valid) {
+ v = &p->vma[i];
+ v->valid = 1;
+ break;
+ }
+ if (p->vma[i].addr < vmaend) {
+ vmaend = PGROUNDDOWN(p->vma[i].addr);
+ }
+ }
+
+ if (v == 0)
+ panic("mmap: no free space.");
+
+
+ v->addr = vmaend - sz;
+ v->sz = sz;
+ v->prot = prot;
+ v->flags = flags;
+ v->f = f;
+ v->offset = offset;
+
+ filedup(f);
+
+ return v->addr;
+}
+
+uint64
+sys_munmap(void)
+{
+ uint64 addr;
+ int sz;
+
+ argaddr(0, &addr);
+ argint(1, &sz);
+
+ struct proc *p = myproc();
+ struct vma *v = 0;
+ for (int i = 0; i < NVMA; i++) {
+ if (p->vma[i].valid && p->vma[i].addr <= addr && addr < p->vma[i].addr + p->vma[i].sz) {
+ v = &p->vma[i];
+ break;
+ }
+ }
+
+ if (v == 0) {
+ return -1;
+ }
+
+ if (addr > v->addr && addr + sz < v->addr + v->sz) {
+ return -1;
+ }
+
+ uint64 aaddr = addr;
+ if (addr > v->addr) {
+ aaddr = PGROUNDUP(addr);
+ }
+
+ int nbytes = sz - (aaddr - addr); // bytes to unmap
+ if (nbytes < 0) {
+ nbytes = 0;
+ }
+
+ vmaunmap(p->pagetable, aaddr, nbytes, v);
+
+ if (addr <= v->addr && addr + sz > v->addr) {
+ v->offset += addr + sz - v->addr;
+ v->addr = addr + sz;
+ }
+ v->sz -= sz;
+
+ if (v->sz <= 0) {
+ fileclose(v->f);
+ v->valid = 0;
+ }
+
+ return 0;
+}
+
+int
+vmatryalloc(uint64 va) {
+ struct proc *p = myproc();
+ struct vma *v = 0;
+
+ for (int i = 0; i < NVMA; i++) {
+ if (p->vma[i].valid && p->vma[i].addr <= va && va < p->vma[i].addr + p->vma[i].sz) {
+ v = &p->vma[i];
+ break;
+ }
+ }
+
+ if (v == 0) {
+ printf("vmatryalloc(): cannot find vma contain specific addr %p\n", va);
+ return -1;
+ }
+
+ void *pa = kalloc();
+ if (pa == 0)
+ panic("vmatryalloc(): kalloc");
+ memset(pa, 0, PGSIZE);
+
+ begin_op();
+ ilock(v->f->ip);
+ readi(v->f->ip, 0, (uint64)pa, v->offset + PGROUNDDOWN(va - v->addr), PGSIZE);
+ iunlock(v->f->ip);
+ end_op();
+
+ if (mappages(p->pagetable, va, PGSIZE, (uint64)pa, PTE_R | PTE_W | PTE_U) < 0)
+ panic("vmatryalloc(): mappages");
+
+ return 0;
+}
+
+void
+vmaunmap(pagetable_t pagetable, uint64 va, uint64 nbytes, struct vma *v)
+{
+ uint64 a;
+ pte_t *pte;
+
+ if((va % PGSIZE) != 0)
+ panic("vmaunmap: not aligned");
+
+ for(a = va; a < va + nbytes; a += PGSIZE){
+ if((pte = walk(pagetable, a, 0)) == 0)
+ continue;
+ if(PTE_FLAGS(*pte) == PTE_V)
+ panic("uvmunmap: not a leaf");
+ if(*pte & PTE_V) {
+ uint64 pa = PTE2PA(*pte);
+ if ((*pte & PTE_D) && (v->flags & MAP_SHARED)) {
+ begin_op();
+ ilock(v->f->ip);
+ uint64 off = a - v->addr;
+ if (off < 0) {
+ // the first page is not full
+ writei(v->f->ip, 0, pa + (-off), v->offset, PGSIZE + off);
+ } else if (off + PGSIZE > v->sz) {
+ // the last page is not full
+ writei(v->f->ip, 0, pa, v->offset + off, v->sz - off);
+ } else {
+ // full page
+ writei(v->f->ip, 0, pa, v->offset + off, PGSIZE);
+ }
+ iunlock(v->f->ip);
+ end_op();
+ }
+ kfree((void*)pa);
+ *pte = 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/kernel/trap.c b/kernel/trap.c
index 512c850..263726e 100644
syscall();
} else if((which_dev = devintr()) != 0){
// ok
+ } else if(r_scause() == 13 || r_scause() == 15) {
+ if (vmatryalloc(r_stval()) != 0) {
+ printf("usertrap(): vma lazy allocation error!\n");
+ setkilled(p);
+ }
} else {
printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
diff --git a/user/user.h b/user/user.h
index 4d398d5..eaf7e4e 100644
char* sbrk(int);
int sleep(int);
int uptime(void);
+void* mmap(const void*, int, int, int, int, int);
+int munmap(const void*, int);
// ulib.c
int stat(const char*, struct stat*);
diff --git a/user/usys.pl b/user/usys.pl
index 01e426e..d23b9cc 100755
entry("sbrk");
entry("sleep");
entry("uptime");
+entry("mmap");
+entry("munmap");