From 971462be94ba0a5c74af7a5f9653dfabd4932a63 Mon Sep 17 00:00:00 2001 From: GnoCiYeH Date: Wed, 1 Nov 2023 20:55:57 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0thread=E5=92=8Cfutex=E6=9C=BA?= =?UTF-8?q?=E5=88=B6=20(#411)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 初步实现clone系统调用 * 实现了线程,初步实现futex机制,添加了几个小的系统调用 * 更改pcb引用计数问题 * 解决死锁bug --------- Co-authored-by: LoGin --- kernel/src/arch/x86_64/process/mod.rs | 63 +- kernel/src/arch/x86_64/process/syscall.rs | 74 +- kernel/src/arch/x86_64/rand.rs | 33 + kernel/src/driver/net/e1000e/e1000e.rs | 27 +- kernel/src/driver/net/e1000e/e1000e_driver.rs | 10 +- kernel/src/driver/net/virtio_net.rs | 2 +- kernel/src/filesystem/vfs/syscall.rs | 25 +- kernel/src/lib.rs | 1 + kernel/src/libs/futex/constant.rs | 54 ++ kernel/src/libs/futex/futex.rs | 647 ++++++++++++++++++ kernel/src/libs/futex/mod.rs | 3 + kernel/src/libs/futex/syscall.rs | 106 +++ kernel/src/libs/mod.rs | 2 + kernel/src/libs/rand.rs | 7 + kernel/src/main.c | 2 + kernel/src/mm/ucontext.rs | 22 +- kernel/src/process/c_adapter.rs | 12 + kernel/src/process/fork.rs | 315 +++++++-- kernel/src/process/kthread.rs | 8 +- kernel/src/process/mod.rs | 122 +++- kernel/src/process/syscall.rs | 109 ++- kernel/src/sched/completion.rs | 2 +- kernel/src/syscall/mod.rs | 110 ++- kernel/src/syscall/user_access.rs | 12 +- kernel/src/time/timer.rs | 20 +- 25 files changed, 1641 insertions(+), 147 deletions(-) create mode 100644 kernel/src/libs/futex/constant.rs create mode 100644 kernel/src/libs/futex/futex.rs create mode 100644 kernel/src/libs/futex/mod.rs create mode 100644 kernel/src/libs/futex/syscall.rs create mode 100644 kernel/src/libs/rand.rs diff --git a/kernel/src/arch/x86_64/process/mod.rs b/kernel/src/arch/x86_64/process/mod.rs index c9c78e84a..107ad82f5 100644 --- a/kernel/src/arch/x86_64/process/mod.rs +++ b/kernel/src/arch/x86_64/process/mod.rs @@ -5,7 +5,11 @@ use core::{ sync::atomic::{compiler_fence, Ordering}, }; -use alloc::{string::String, sync::Arc, vec::Vec}; +use alloc::{ + string::String, + sync::{Arc, Weak}, + vec::Vec, +}; use memoffset::offset_of; use x86::{controlregs::Cr4, segmentation::SegmentSelector}; @@ -20,14 +24,16 @@ use crate::{ VirtAddr, }, process::{ - fork::CloneFlags, KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, - SwitchResult, SWITCH_RESULT, + fork::{CloneFlags, KernelCloneArgs}, + KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult, + SWITCH_RESULT, }, syscall::{Syscall, SystemError}, }; use self::{ kthread::kernel_thread_bootstrap_stage1, + syscall::ARCH_SET_FS, table::{switch_fs_and_gs, KERNEL_DS, USER_DS}, }; @@ -38,6 +44,9 @@ pub mod kthread; pub mod syscall; pub mod table; +pub const IA32_FS_BASE: u32 = 0xC000_0100; +pub const IA32_GS_BASE: u32 = 0xC000_0101; + extern "C" { /// 从中断返回 fn ret_from_intr(); @@ -175,7 +184,7 @@ impl ArchPCBInfo { if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { self.fsbase = x86::current::segmentation::rdfsbase() as usize; } else { - self.fsbase = 0; + self.fsbase = x86::msr::rdmsr(IA32_FS_BASE) as usize; } } @@ -183,19 +192,23 @@ impl ArchPCBInfo { if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { self.gsbase = x86::current::segmentation::rdgsbase() as usize; } else { - self.gsbase = 0; + self.gsbase = x86::msr::rdmsr(IA32_GS_BASE) as usize; } } pub unsafe fn restore_fsbase(&mut self) { if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { x86::current::segmentation::wrfsbase(self.fsbase as u64); + } else { + x86::msr::wrmsr(IA32_FS_BASE, self.fsbase as u64); } } pub unsafe fn restore_gsbase(&mut self) { if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) { x86::current::segmentation::wrgsbase(self.gsbase as u64); + } else { + x86::msr::wrmsr(IA32_GS_BASE, self.gsbase as u64); } } @@ -228,11 +241,11 @@ impl ProcessControlBlock { panic!("current_pcb is null"); } unsafe { - // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用 - let arc_wrapper: ManuallyDrop> = - ManuallyDrop::new(Arc::from_raw(*p)); + // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下 + let weak_wrapper: ManuallyDrop> = + ManuallyDrop::new(Weak::from_raw(*p)); - let new_arc: Arc = Arc::clone(&arc_wrapper); + let new_arc: Arc = weak_wrapper.upgrade().unwrap(); return new_arc; } } @@ -255,11 +268,12 @@ impl ProcessManager { /// /// 由于这个过程与具体的架构相关,所以放在这里 pub fn copy_thread( - _clone_flags: &CloneFlags, current_pcb: &Arc, new_pcb: &Arc, + clone_args: KernelCloneArgs, current_trapframe: &TrapFrame, ) -> Result<(), SystemError> { + let clone_flags = clone_args.flags; let mut child_trapframe = current_trapframe.clone(); // 子进程的返回值为0 @@ -278,6 +292,10 @@ impl ProcessManager { // 拷贝栈帧 unsafe { + let usp = clone_args.stack; + if usp != 0 { + child_trapframe.rsp = usp as u64; + } let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame; *trap_frame_ptr = child_trapframe; } @@ -296,15 +314,19 @@ impl ProcessManager { drop(current_arch_guard); // 设置返回地址(子进程开始执行的指令地址) - if new_pcb.flags().contains(ProcessFlags::KTHREAD) { let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize; - new_arch_guard.rip = kthread_bootstrap_stage1_func_addr; } else { new_arch_guard.rip = ret_from_intr as usize; } + // 设置tls + if clone_flags.contains(CloneFlags::CLONE_SETTLS) { + drop(new_arch_guard); + Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?; + } + return Ok(()); } @@ -335,28 +357,28 @@ impl ProcessManager { compiler_fence(Ordering::SeqCst); next_addr_space.read().user_mapper.utable.make_current(); + drop(next_addr_space); compiler_fence(Ordering::SeqCst); // 切换内核栈 // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁) - let next_arch = SpinLockGuard::leak(next.arch_info()); - let prev_arch = SpinLockGuard::leak(prev.arch_info()); + let next_arch = SpinLockGuard::leak(next.arch_info()) as *mut ArchPCBInfo; + let prev_arch = SpinLockGuard::leak(prev.arch_info()) as *mut ArchPCBInfo; - prev_arch.rip = switch_back as usize; + (*prev_arch).rip = switch_back as usize; // 恢复当前的 preempt count*2 ProcessManager::current_pcb().preempt_enable(); ProcessManager::current_pcb().preempt_enable(); - SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev.clone()); - SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next.clone()); // 切换tss TSSManager::current_tss().set_rsp( x86::Ring::Ring0, next.kernel_stack().stack_max_address().data() as u64, ); + SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev); + SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next); // kdebug!("switch tss ok"); - compiler_fence(Ordering::SeqCst); // 正式切换上下文 switch_to_inner(prev_arch, next_arch); @@ -365,7 +387,7 @@ impl ProcessManager { /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数 #[naked] -unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut ArchPCBInfo) { +unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) { asm!( // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"): // @@ -394,6 +416,9 @@ unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut Arc mov [rdi + {off_fs}], fs mov [rdi + {off_gs}], gs + // mov fs, [rsi + {off_fs}] + // mov gs, [rsi + {off_gs}] + push rbp push rax diff --git a/kernel/src/arch/x86_64/process/syscall.rs b/kernel/src/arch/x86_64/process/syscall.rs index ee2e4f35d..2d66e7aaf 100644 --- a/kernel/src/arch/x86_64/process/syscall.rs +++ b/kernel/src/arch/x86_64/process/syscall.rs @@ -1,4 +1,4 @@ -use alloc::{string::String, vec::Vec}; +use alloc::{string::String, sync::Arc, vec::Vec}; use crate::{ arch::{ @@ -10,9 +10,9 @@ use crate::{ mm::ucontext::AddressSpace, process::{ exec::{load_binary_file, ExecParam, ExecParamFlags}, - ProcessManager, + ProcessControlBlock, ProcessManager, }, - syscall::{Syscall, SystemError}, + syscall::{user_access::UserBufferWriter, Syscall, SystemError}, }; impl Syscall { @@ -105,6 +105,8 @@ impl Syscall { regs.rflags = 0x200; regs.rax = 1; + drop(param); + // kdebug!("regs: {:?}\n", regs); // kdebug!( @@ -114,4 +116,70 @@ impl Syscall { return Ok(()); } + + /// ## 用于控制和查询与体系结构相关的进程特定选项 + pub fn arch_prctl(option: usize, arg2: usize) -> Result { + let pcb = ProcessManager::current_pcb(); + if let Err(SystemError::EINVAL) = Self::do_arch_prctl_64(&pcb, option, arg2, true) { + Self::do_arch_prctl_common(option, arg2)?; + } + Ok(0) + } + + /// ## 64位下控制fs/gs base寄存器的方法 + pub fn do_arch_prctl_64( + pcb: &Arc, + option: usize, + arg2: usize, + from_user: bool, + ) -> Result { + let mut arch_info = pcb.arch_info_irqsave(); + match option { + ARCH_GET_FS => { + unsafe { arch_info.save_fsbase() }; + let mut writer = UserBufferWriter::new( + arg2 as *mut usize, + core::mem::size_of::(), + from_user, + )?; + writer.copy_one_to_user(&arch_info.fsbase, 0)?; + } + ARCH_GET_GS => { + unsafe { arch_info.save_gsbase() }; + let mut writer = UserBufferWriter::new( + arg2 as *mut usize, + core::mem::size_of::(), + from_user, + )?; + writer.copy_one_to_user(&arch_info.gsbase, 0)?; + } + ARCH_SET_FS => { + arch_info.fsbase = arg2; + // 如果是当前进程则直接写入寄存器 + if pcb.pid() == ProcessManager::current_pcb().pid() { + unsafe { arch_info.restore_fsbase() } + } + } + ARCH_SET_GS => { + arch_info.gsbase = arg2; + if pcb.pid() == ProcessManager::current_pcb().pid() { + unsafe { arch_info.restore_gsbase() } + } + } + _ => { + return Err(SystemError::EINVAL); + } + } + Ok(0) + } + + #[allow(dead_code)] + pub fn do_arch_prctl_common(_option: usize, _arg2: usize) -> Result { + todo!("do_arch_prctl_common not unimplemented"); + } } + +pub const ARCH_SET_GS: usize = 0x1001; +pub const ARCH_SET_FS: usize = 0x1002; +pub const ARCH_GET_FS: usize = 0x1003; +pub const ARCH_GET_GS: usize = 0x1004; diff --git a/kernel/src/arch/x86_64/rand.rs b/kernel/src/arch/x86_64/rand.rs index 6916c64dd..7d5076691 100644 --- a/kernel/src/arch/x86_64/rand.rs +++ b/kernel/src/arch/x86_64/rand.rs @@ -1,5 +1,38 @@ use core::arch::x86_64::_rdtsc; +use alloc::vec::Vec; + +use crate::{ + libs::rand::GRandFlags, + syscall::{user_access::UserBufferWriter, Syscall, SystemError}, +}; + pub fn rand() -> usize { return unsafe { (_rdtsc() * _rdtsc() + 998244353_u64 * _rdtsc()) as usize }; } + +impl Syscall { + /// ## 将随机字节填入buf + /// + /// ### 该系统调用与linux不一致,因为目前没有其他随机源 + pub fn get_random(buf: *mut u8, len: usize, flags: GRandFlags) -> Result { + if flags.bits() == (GRandFlags::GRND_INSECURE.bits() | GRandFlags::GRND_RANDOM.bits()) { + return Err(SystemError::EINVAL); + } + + let mut writer = UserBufferWriter::new(buf, len, true)?; + + let mut ret = Vec::new(); + let mut count = 0; + while count < len { + let rand = rand(); + for offset in 0..4 { + ret.push((rand >> offset * 2) as u8); + count += 1; + } + } + + writer.copy_to_user(&ret, 0)?; + Ok(len) + } +} diff --git a/kernel/src/driver/net/e1000e/e1000e.rs b/kernel/src/driver/net/e1000e/e1000e.rs index 352e72982..05dc4dfef 100644 --- a/kernel/src/driver/net/e1000e/e1000e.rs +++ b/kernel/src/driver/net/e1000e/e1000e.rs @@ -16,7 +16,7 @@ use crate::driver::pci::pci::{ }; use crate::driver::pci::pci_irq::{IrqCommonMsg, IrqMsg, IrqSpecificMsg, PciInterrupt, IRQ}; use crate::include::bindings::bindings::pt_regs; -use crate::libs::volatile::{ReadOnly, Volatile, VolatileReadable, VolatileWritable, WriteOnly}; +use crate::libs::volatile::{ReadOnly, Volatile, WriteOnly}; use crate::net::net_core::poll_ifaces_try_lock_onetime; use crate::{kdebug, kinfo}; @@ -45,8 +45,10 @@ const E1000E_DEVICE_ID: [u16; 14] = [ // BAR0空间大小(128KB) const E1000E_BAR_REG_SIZE: u32 = 128 * 1024; // BAR0空间对齐(64bit) +#[allow(dead_code)] const E1000E_BAR_REG_ALIGN: u8 = 64; // 单个寄存器大小(32bit, 4字节) +#[allow(dead_code)] const E1000E_REG_SIZE: u8 = 4; // TxBuffer和RxBuffer的大小(DMA页) @@ -112,11 +114,13 @@ impl E1000EBuffer { } } + #[allow(dead_code)] pub fn as_addr(&self) -> NonNull { assert!(self.length != 0); return self.buffer; } + #[allow(dead_code)] pub fn as_addr_u64(&self) -> u64 { assert!(self.length != 0); return self.buffer.as_ptr() as u64; @@ -127,6 +131,7 @@ impl E1000EBuffer { return self.paddr; } + #[allow(dead_code)] pub fn as_slice(&self) -> &[u8] { assert!(self.length != 0); return unsafe { from_raw_parts(self.buffer.as_ptr(), self.length) }; @@ -154,10 +159,11 @@ impl E1000EBuffer { // 中断处理函数, 调用协议栈的poll函数,未来可能会用napi来替换这里 // Interrupt handler -unsafe extern "C" fn e1000e_irq_handler(irq_num: u64, irq_paramer: u64, regs: *mut pt_regs) { +unsafe extern "C" fn e1000e_irq_handler(_irq_num: u64, _irq_paramer: u64, _regs: *mut pt_regs) { poll_ifaces_try_lock_onetime().ok(); } +#[allow(dead_code)] pub struct E1000EDevice { // 设备寄存器 // device registers @@ -194,6 +200,7 @@ pub struct E1000EDevice { impl E1000EDevice { // 从PCI标准设备进行驱动初始化 // init the device for PCI standard device struct + #[allow(unused_assignments)] pub fn new(device: &mut PciDeviceStructureGeneralDevice) -> Result { // 从BAR0获取我们需要的寄存器 // Build registers sturcts from BAR0 @@ -479,6 +486,7 @@ impl E1000EDevice { // 切换是否接受分组到达的中断 // change whether the receive timer interrupt is enabled // Note: this method is not completely implemented and not used in the current version + #[allow(dead_code)] pub fn e1000e_intr_set(&mut self, state: bool) { let mut ims = unsafe { volread!(self.interrupt_regs, ims) }; match state { @@ -491,6 +499,7 @@ impl E1000EDevice { // 实现了一部分napi机制的收包函数, 现在还没有投入使用 // This method is a partial implementation of napi (New API) techniques // Note: this method is not completely implemented and not used in the current version + #[allow(dead_code)] pub fn e1000e_receive2(&mut self) -> Option { // 向设备表明我们已经接受到了之前的中断 // Tell e1000e we have received the interrupt @@ -585,8 +594,8 @@ pub extern "C" fn rs_e1000e_init() { pub fn e1000e_init() -> () { match e1000e_probe() { - Ok(code) => kinfo!("Successfully init e1000e device!"), - Err(error) => kinfo!("Error occurred!"), + Ok(_code) => kinfo!("Successfully init e1000e device!"), + Err(_error) => kinfo!("Error occurred!"), } } @@ -618,6 +627,7 @@ pub fn e1000e_probe() -> Result { // 用到的e1000e寄存器结构体 // pp.275, Table 13-3 // 设备通用寄存器 +#[allow(dead_code)] struct GeneralRegs { ctrl: Volatile, //0x00000 ctrl_alias: Volatile, //0x00004 @@ -630,6 +640,7 @@ struct GeneralRegs { mdic: Volatile, //0x00020 } // 中断控制 +#[allow(dead_code)] struct InterruptRegs { icr: Volatile, //0x000c0 ICR寄存器应当为只读寄存器,但我们需要向其中写入来清除对应位 itr: Volatile, //0x000c4 @@ -644,6 +655,7 @@ struct ReceiveCtrlRegs { rctl: Volatile, //0x00100 } // 发包功能控制 +#[allow(dead_code)] struct TransmitCtrlRegs { tctl: Volatile, //0x00400 tctl_ext: Volatile, //0x00404 @@ -652,6 +664,7 @@ struct TransmitCtrlRegs { tipg: Volatile, //0x00410 } // 收包功能相关 +#[allow(dead_code)] struct ReceiveRegs { rdbal0: Volatile, //0x02800 rdbah0: Volatile, //0x02804 @@ -666,6 +679,7 @@ struct ReceiveRegs { rxdctl: Volatile, //0x2828 } // 发包功能相关 +#[allow(dead_code)] struct TransimitRegs { tdbal0: Volatile, //0x03800 tdbah0: Volatile, //0x03804 @@ -689,6 +703,7 @@ struct ReceiveAddressRegs { struct PCIeRegs { gcr: Volatile, //0x05b00 } +#[allow(dead_code)] struct StatisticsRegs {} // 0x05200-0x053fc @@ -714,15 +729,19 @@ const E1000E_CTRL_SLU: u32 = 1 << 6; const E1000E_CTRL_FRCSPD: u32 = 1 << 11; const E1000E_CTRL_FRCDPLX: u32 = 1 << 12; const E1000E_CTRL_RST: u32 = 1 << 26; +#[allow(dead_code)] const E1000E_CTRL_RFCE: u32 = 1 << 27; +#[allow(dead_code)] const E1000E_CTRL_TFCE: u32 = 1 << 28; const E1000E_CTRL_PHY_RST: u32 = 1 << 31; // IMS const E1000E_IMS_LSC: u32 = 1 << 2; const E1000E_IMS_RXDMT0: u32 = 1 << 4; +#[allow(dead_code)] const E1000E_IMS_RXO: u32 = 1 << 6; const E1000E_IMS_RXT0: u32 = 1 << 7; +#[allow(dead_code)] const E1000E_IMS_RXQ0: u32 = 1 << 20; const E1000E_IMS_OTHER: u32 = 1 << 24; // qemu use this bit to set msi-x interrupt diff --git a/kernel/src/driver/net/e1000e/e1000e_driver.rs b/kernel/src/driver/net/e1000e/e1000e_driver.rs index 9b44e7417..3dd72d44e 100644 --- a/kernel/src/driver/net/e1000e/e1000e_driver.rs +++ b/kernel/src/driver/net/e1000e/e1000e_driver.rs @@ -3,16 +3,12 @@ use crate::{ driver::{ base::{ - device::{ - bus::Bus, - driver::{Driver, DriverError}, - Device, DevicePrivateData, IdTable, - }, + device::{bus::Bus, driver::Driver, Device, IdTable}, kobject::{KObjType, KObject, KObjectState}, }, net::NetDriver, }, - kdebug, kinfo, + kinfo, libs::spinlock::SpinLock, net::{generate_iface_id, NET_DRIVERS}, syscall::SystemError, @@ -84,7 +80,7 @@ impl phy::RxToken for E1000ERxToken { } impl phy::TxToken for E1000ETxToken { - fn consume(self, len: usize, f: F) -> R + fn consume(self, _len: usize, f: F) -> R where F: FnOnce(&mut [u8]) -> R, { diff --git a/kernel/src/driver/net/virtio_net.rs b/kernel/src/driver/net/virtio_net.rs index 16bd78814..2b58fb6ce 100644 --- a/kernel/src/driver/net/virtio_net.rs +++ b/kernel/src/driver/net/virtio_net.rs @@ -310,7 +310,7 @@ impl NetDriver for VirtioInterface { let mut guard = self.iface.lock(); let poll_res = guard.poll(timestamp, self.driver.force_get_mut(), sockets); // todo: notify!!! - kdebug!("Virtio Interface poll:{poll_res}"); + // kdebug!("Virtio Interface poll:{poll_res}"); if poll_res { return Ok(()); } diff --git a/kernel/src/filesystem/vfs/syscall.rs b/kernel/src/filesystem/vfs/syscall.rs index e4ca340f3..f62f2eeae 100644 --- a/kernel/src/filesystem/vfs/syscall.rs +++ b/kernel/src/filesystem/vfs/syscall.rs @@ -151,7 +151,6 @@ impl Syscall { /// @return 文件描述符编号,或者是错误码 pub fn open(path: &str, mode: FileMode) -> Result { // kdebug!("open: path: {}, mode: {:?}", path, mode); - // 文件名过长 if path.len() > MAX_PATHLEN as usize { return Err(SystemError::ENAMETOOLONG); @@ -727,6 +726,13 @@ impl Syscall { return Ok(kstat); } + fn do_stat(path: &str) -> Result { + let fd = Self::open(path, FileMode::O_RDONLY)?; + let ret = Self::do_fstat(fd as i32); + Self::close(fd)?; + ret + } + pub fn fstat(fd: i32, usr_kstat: *mut PosixKstat) -> Result { let kstat = Self::do_fstat(fd)?; if usr_kstat.is_null() { @@ -738,6 +744,14 @@ impl Syscall { return Ok(0); } + pub fn stat(path: &str, user_kstat: *mut PosixKstat) -> Result { + let fd = Self::open(path, FileMode::O_RDONLY)?; + Self::fstat(fd as i32, user_kstat).map_err(|e| { + Self::close(fd).ok(); + e + }) + } + pub fn mknod( path_ptr: *const i8, mode: ModeType, @@ -771,6 +785,15 @@ impl Syscall { return Ok(0); } + + pub fn writev(fd: i32, iov: usize, count: usize) -> Result { + // IoVecs会进行用户态检验 + let iovecs = unsafe { IoVecs::from_user(iov as *const IoVec, count, false) }?; + + let data = iovecs.gather(); + + Self::write(fd, &data) + } } #[repr(C)] #[derive(Debug, Clone, Copy)] diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 7aacda352..63f27900a 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -20,6 +20,7 @@ #![feature(ptr_to_from_bits)] #![feature(concat_idents)] #![cfg_attr(target_os = "none", no_std)] +#![feature(atomic_mut_ptr)] #[cfg(test)] #[macro_use] diff --git a/kernel/src/libs/futex/constant.rs b/kernel/src/libs/futex/constant.rs new file mode 100644 index 000000000..87df5c85e --- /dev/null +++ b/kernel/src/libs/futex/constant.rs @@ -0,0 +1,54 @@ +bitflags! { + pub struct FutexArg: u32 { + const FUTEX_WAIT = 0; + const FUTEX_WAKE = 1; + const FUTEX_FD = 2; + const FUTEX_REQUEUE = 3; + const FUTEX_CMP_REQUEUE = 4; + const FUTEX_WAKE_OP = 5; + const FUTEX_LOCK_PI = 6; + const FUTEX_UNLOCK_PI = 7; + const FUTEX_TRYLOCK_PI = 8; + const FUTEX_WAIT_BITSET = 9; + const FUTEX_WAKE_BITSET = 10; + const FUTEX_WAIT_REQUEUE_PI = 11; + const FUTEX_CMP_REQUEUE_PI = 12; + const FUTEX_LOCK_PI2 = 13; + } + + pub struct FutexFlag: u32 { + const FLAGS_MATCH_NONE = 0x01; + const FLAGS_SHARED = 0x01; + const FLAGS_CLOCKRT = 0x02; + const FLAGS_HAS_TIMEOUT = 0x04; + const FUTEX_PRIVATE_FLAG = 128; + const FUTEX_CLOCK_REALTIME = 256; + const FUTEX_CMD_MASK = !(Self::FUTEX_PRIVATE_FLAG.bits() | Self::FUTEX_CLOCK_REALTIME.bits()); + } + + pub struct FutexOP: u32 { + const FUTEX_OP_SET = 0; + const FUTEX_OP_ADD = 1; + const FUTEX_OP_OR = 2; + const FUTEX_OP_ANDN = 3; + const FUTEX_OP_XOR = 4; + const FUTEX_OP_OPARG_SHIFT = 8; + } + + pub struct FutexOpCMP: u32 { + const FUTEX_OP_CMP_EQ = 0; + const FUTEX_OP_CMP_NE = 1; + const FUTEX_OP_CMP_LT = 2; + const FUTEX_OP_CMP_LE = 3; + const FUTEX_OP_CMP_GT = 4; + const FUTEX_OP_CMP_GE = 5; + } +} + +#[allow(dead_code)] +pub const FUTEX_WAITERS: u32 = 0x80000000; +#[allow(dead_code)] +pub const FUTEX_OWNER_DIED: u32 = 0x40000000; +#[allow(dead_code)] +pub const FUTEX_TID_MASK: u32 = 0x3fffffff; +pub const FUTEX_BITSET_MATCH_ANY: u32 = 0xffffffff; diff --git a/kernel/src/libs/futex/futex.rs b/kernel/src/libs/futex/futex.rs new file mode 100644 index 000000000..65b100aff --- /dev/null +++ b/kernel/src/libs/futex/futex.rs @@ -0,0 +1,647 @@ +use alloc::{ + collections::LinkedList, + sync::{Arc, Weak}, +}; +use core::hash::{Hash, Hasher}; +use core::{intrinsics::likely, sync::atomic::AtomicU64}; +use hashbrown::HashMap; + +use crate::{ + arch::{sched::sched, CurrentIrqArch, MMArch}, + exception::InterruptArch, + libs::spinlock::{SpinLock, SpinLockGuard}, + mm::{ucontext::AddressSpace, MemoryManagementArch, VirtAddr}, + process::{ProcessControlBlock, ProcessManager}, + syscall::{user_access::UserBufferReader, SystemError}, + time::{ + timer::{next_n_us_timer_jiffies, Timer, WakeUpHelper}, + TimeSpec, + }, +}; + +use super::constant::*; + +static mut FUTEX_DATA: Option = None; + +pub struct FutexData { + data: SpinLock>, +} + +impl FutexData { + pub fn futex_map() -> SpinLockGuard<'static, HashMap> { + unsafe { FUTEX_DATA.as_ref().unwrap().data.lock() } + } + + pub fn try_remove(key: &FutexKey) -> Option { + unsafe { + let mut guard = FUTEX_DATA.as_ref().unwrap().data.lock(); + if let Some(futex) = guard.get(key) { + if futex.chain.is_empty() { + return guard.remove(key); + } + } + } + None + } +} + +pub struct Futex; + +// 对于同一个futex的进程或线程将会在这个bucket等待 +pub struct FutexHashBucket { + // 该futex维护的等待队列 + chain: LinkedList>, +} + +impl FutexHashBucket { + /// ## 判断是否在bucket里 + pub fn contains(&self, futex_q: &FutexObj) -> bool { + self.chain + .iter() + .filter(|x| futex_q.pcb.ptr_eq(&x.pcb) && x.key == futex_q.key) + .count() + != 0 + } + + /// 让futex_q在该bucket上挂起 + /// + /// 进入该函数前,需要关中断 + #[inline(always)] + pub fn sleep_no_sched(&mut self, futex_q: Arc) -> Result<(), SystemError> { + assert!(CurrentIrqArch::is_irq_enabled() == false); + self.chain.push_back(futex_q); + + ProcessManager::mark_sleep(true)?; + + Ok(()) + } + + /// ## 唤醒队列中的最多nr_wake个进程 + /// + /// return: 唤醒的进程数 + #[inline(always)] + pub fn wake_up( + &mut self, + key: FutexKey, + bitset: Option, + nr_wake: u32, + ) -> Result { + let mut count = 0; + let mut pop_count = 0; + while let Some(futex_q) = self.chain.pop_front() { + if futex_q.key == key { + // TODO: 考虑优先级继承的机制 + + if let Some(bitset) = bitset { + if futex_q.bitset != bitset { + self.chain.push_back(futex_q); + continue; + } + } + + // 唤醒 + if futex_q.pcb.upgrade().is_some() { + self.remove(futex_q.clone()); + ProcessManager::wakeup(&futex_q.pcb.upgrade().unwrap())?; + } + + // 判断唤醒数 + count += 1; + if count >= nr_wake { + break; + } + } else { + self.chain.push_back(futex_q); + } + // 判断是否循环完队列了 + pop_count += 1; + if pop_count >= self.chain.len() { + break; + } + } + Ok(count as usize) + } + + /// 将FutexObj从bucket中删除 + pub fn remove(&mut self, futex: Arc) { + self.chain.drain_filter(|x| Arc::ptr_eq(x, &futex)); + } +} + +#[derive(Debug)] +pub struct FutexObj { + pcb: Weak, + key: FutexKey, + bitset: u32, + // TODO: 优先级继承 +} + +pub enum FutexAccess { + FutexRead, + FutexWrite, +} + +#[allow(dead_code)] +#[derive(Hash, PartialEq, Eq, Clone, Debug)] +/// ### 用于定位内核唯一的futex +pub enum InnerFutexKey { + Shared(SharedKey), + Private(PrivateKey), +} + +#[derive(Hash, PartialEq, Eq, Clone, Debug)] +pub struct FutexKey { + ptr: u64, + word: u64, + offset: u32, + key: InnerFutexKey, +} + +/// 不同进程间通过文件共享futex变量,表明该变量在文件中的位置 +#[derive(Hash, PartialEq, Eq, Clone, Debug)] +pub struct SharedKey { + i_seq: u64, + page_offset: u64, +} + +/// 同一进程的不同线程共享futex变量,表明该变量在进程地址空间中的位置 +#[derive(Clone, Debug)] +pub struct PrivateKey { + // 所在的地址空间 + address_space: Option>, + // 表示所在页面的初始地址 + address: u64, +} + +impl Hash for PrivateKey { + fn hash(&self, state: &mut H) { + self.address.hash(state); + } +} + +impl Eq for PrivateKey {} + +impl PartialEq for PrivateKey { + fn eq(&self, other: &Self) -> bool { + if self.address_space.is_none() && other.address_space.is_none() { + return self.address == other.address; + } else { + return self + .address_space + .as_ref() + .unwrap_or(&Weak::default()) + .ptr_eq(&other.address_space.as_ref().unwrap_or(&Weak::default())) + && self.address == other.address; + } + } +} + +impl Futex { + /// ### 初始化FUTEX_DATA + pub fn init() { + unsafe { + FUTEX_DATA = Some(FutexData { + data: SpinLock::new(HashMap::new()), + }) + }; + } + + /// ### 让当前进程在指定futex上等待直到futex_wake显式唤醒 + pub fn futex_wait( + uaddr: VirtAddr, + flags: FutexFlag, + val: u32, + abs_time: Option, + bitset: u32, + ) -> Result { + if bitset == 0 { + return Err(SystemError::EINVAL); + } + + // 获取全局hash表的key值 + let key = Self::get_futex_key( + uaddr, + flags.contains(FutexFlag::FLAGS_SHARED), + FutexAccess::FutexRead, + )?; + + let mut futex_map_guard = FutexData::futex_map(); + let bucket = futex_map_guard.get_mut(&key); + let bucket_mut = match bucket { + Some(bucket) => bucket, + None => { + let bucket = FutexHashBucket { + chain: LinkedList::new(), + }; + futex_map_guard.insert(key.clone(), bucket); + futex_map_guard.get_mut(&key).unwrap() + } + }; + + // 使用UserBuffer读取futex + let user_reader = + UserBufferReader::new(uaddr.as_ptr::(), core::mem::size_of::(), true)?; + + // 从用户空间读取到futex的val + let mut uval = 0; + + // 读取 + // 这里只尝试一种方式去读取用户空间,与linux不太一致 + // 对于linux,如果bucket被锁住时读取失败,将会将bucket解锁后重新读取 + user_reader.copy_one_from_user::(&mut uval, 0)?; + + // 不满足wait条件,返回错误 + if uval != val { + return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); + } + + let pcb = ProcessManager::current_pcb(); + // 创建超时计时器任务 + let mut timer = None; + if !abs_time.is_none() { + let time = abs_time.unwrap(); + + let wakeup_helper = WakeUpHelper::new(pcb.clone()); + + let sec = time.tv_sec; + let nsec = time.tv_nsec; + let jiffies = next_n_us_timer_jiffies((nsec / 1000 + sec * 1_000_000) as u64); + + let wake_up = Timer::new(wakeup_helper, jiffies); + + wake_up.activate(); + timer = Some(wake_up); + } + + let futex_q = Arc::new(FutexObj { + pcb: Arc::downgrade(&pcb), + key: key.clone(), + bitset, + }); + let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + // 满足条件则将当前进程在该bucket上挂起 + bucket_mut.sleep_no_sched(futex_q.clone()).map_err(|e| { + kwarn!("error:{e:?}"); + e + })?; + drop(bucket_mut); + drop(futex_map_guard); + drop(irq_guard); + sched(); + + // 被唤醒后的检查 + let mut futex_map_guard = FutexData::futex_map(); + let bucket = futex_map_guard.get_mut(&key); + let bucket_mut = match bucket { + // 如果该pcb不在链表里面了或者该链表已经被释放,就证明是正常的Wake操作 + Some(bucket_mut) => { + if !bucket_mut.contains(&futex_q) { + // 取消定时器任务 + if timer.is_some() { + timer.unwrap().cancel(); + } + return Ok(0); + } + // 非正常唤醒,返回交给下层 + bucket_mut + } + None => { + // 取消定时器任务 + if timer.is_some() { + timer.unwrap().cancel(); + } + return Ok(0); + } + }; + + // 如果是超时唤醒,则返回错误 + if timer.is_some() { + if timer.clone().unwrap().timeout() { + bucket_mut.remove(futex_q); + + return Err(SystemError::ETIMEDOUT); + } + } + + // TODO: 如果没有挂起的信号,则重新判断是否满足wait要求,重新进入wait + + // 经过前面的几个判断,到这里之后, + // 当前进程被唤醒大概率是其他进程更改了uval,需要重新去判断当前进程是否满足wait + + // 到这里之后,前面的唤醒条件都不满足,则是被信号唤醒 + // 需要处理信号然后重启futex系统调用 + + // 取消定时器任务 + if timer.is_some() { + let timer = timer.unwrap(); + if !timer.timeout() { + timer.cancel(); + } + } + Ok(0) + } + + // ### 唤醒指定futex上挂起的最多nr_wake个进程 + pub fn futex_wake( + uaddr: VirtAddr, + flags: FutexFlag, + nr_wake: u32, + bitset: u32, + ) -> Result { + if bitset == 0 { + return Err(SystemError::EINVAL); + } + + // 获取futex_key,并且判断地址空间合法性 + let key = Self::get_futex_key( + uaddr, + flags.contains(FutexFlag::FLAGS_SHARED), + FutexAccess::FutexRead, + )?; + let mut binding = FutexData::futex_map(); + let bucket_mut = binding.get_mut(&key).ok_or(SystemError::EINVAL)?; + + // 确保后面的唤醒操作是有意义的 + if bucket_mut.chain.is_empty() { + return Ok(0); + } + // 从队列中唤醒 + let count = bucket_mut.wake_up(key.clone(), Some(bitset), nr_wake)?; + + drop(bucket_mut); + drop(binding); + + FutexData::try_remove(&key); + + Ok(count) + } + + /// ### 唤醒制定uaddr1上的最多nr_wake个进程,然后将uaddr1最多nr_requeue个进程移动到uaddr2绑定的futex上 + pub fn futex_requeue( + uaddr1: VirtAddr, + flags: FutexFlag, + uaddr2: VirtAddr, + nr_wake: i32, + nr_requeue: i32, + cmpval: Option, + requeue_pi: bool, + ) -> Result { + if nr_requeue < 0 || nr_wake < 0 { + return Err(SystemError::EINVAL); + } + + // 暂时不支持优先级继承 + if requeue_pi { + return Err(SystemError::ENOSYS); + } + + let key1 = Self::get_futex_key( + uaddr1, + flags.contains(FutexFlag::FLAGS_SHARED), + FutexAccess::FutexRead, + )?; + let key2 = Self::get_futex_key(uaddr2, flags.contains(FutexFlag::FLAGS_SHARED), { + match requeue_pi { + true => FutexAccess::FutexWrite, + false => FutexAccess::FutexRead, + } + })?; + + if requeue_pi && key1 == key2 { + return Err(SystemError::EINVAL); + } + + if likely(!cmpval.is_none()) { + let uval_reader = + UserBufferReader::new(uaddr1.as_ptr::(), core::mem::size_of::(), true)?; + let curval = uval_reader.read_one_from_user::(0)?; + + // 判断是否满足条件 + if *curval != cmpval.unwrap() { + return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); + } + } + + let mut futex_data_guard = FutexData::futex_map(); + if !requeue_pi { + // 唤醒nr_wake个进程 + let bucket_1_mut = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?; + let ret = bucket_1_mut.wake_up(key1.clone(), None, nr_wake as u32)?; + drop(bucket_1_mut); + // 将bucket1中最多nr_requeue个任务转移到bucket2 + for _ in 0..nr_requeue { + let bucket_1_mut = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?; + let futex_q = bucket_1_mut.chain.pop_front(); + match futex_q { + Some(futex_q) => { + let bucket_2_mut = + futex_data_guard.get_mut(&key2).ok_or(SystemError::EINVAL)?; + bucket_2_mut.chain.push_back(futex_q); + } + None => { + break; + } + } + } + + return Ok(ret); + } else { + // 暂时不支持优先级继承 + todo!() + } + } + + /// ### 唤醒futex上的进程的同时进行一些操作 + pub fn futex_wake_op( + uaddr1: VirtAddr, + flags: FutexFlag, + uaddr2: VirtAddr, + nr_wake: i32, + nr_wake2: i32, + op: i32, + ) -> Result { + let key1 = Futex::get_futex_key( + uaddr1, + flags.contains(FutexFlag::FLAGS_SHARED), + FutexAccess::FutexRead, + )?; + let key2 = Futex::get_futex_key( + uaddr2, + flags.contains(FutexFlag::FLAGS_SHARED), + FutexAccess::FutexWrite, + )?; + + let mut futex_data_guard = FutexData::futex_map(); + let bucket1 = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?; + let mut wake_count = 0; + + // 唤醒uaddr1中的进程 + wake_count += bucket1.wake_up(key1, None, nr_wake as u32)?; + + match Self::futex_atomic_op_inuser(op as u32, uaddr2) { + Ok(ret) => { + // 操作成功则唤醒uaddr2中的进程 + if ret { + let bucket2 = futex_data_guard.get_mut(&key2).ok_or(SystemError::EINVAL)?; + wake_count += bucket2.wake_up(key2, None, nr_wake2 as u32)?; + } + } + Err(e) => { + // TODO:retry? + return Err(e); + } + } + + Ok(wake_count) + } + + fn get_futex_key( + uaddr: VirtAddr, + fshared: bool, + _access: FutexAccess, + ) -> Result { + let mut address = uaddr.data(); + + // 计算相对页的偏移量 + let offset = address & (MMArch::PAGE_SIZE - 1); + // 判断内存对齐 + if !(uaddr.data() & (core::mem::size_of::() - 1) == 0) { + return Err(SystemError::EINVAL); + } + + // 目前address指向所在页面的起始地址 + address -= offset; + + // 若不是进程间共享的futex,则返回Private + if !fshared { + return Ok(FutexKey { + ptr: 0, + word: 0, + offset: offset as u32, + key: InnerFutexKey::Private(PrivateKey { + address: address as u64, + address_space: None, + }), + }); + } + + // 获取到地址所在地址空间 + let address_space = AddressSpace::current()?; + // TODO: 判断是否为匿名映射,是匿名映射才返回PrivateKey + return Ok(FutexKey { + ptr: 0, + word: 0, + offset: offset as u32, + key: InnerFutexKey::Private(PrivateKey { + address: address as u64, + address_space: Some(Arc::downgrade(&address_space)), + }), + }); + + // 未实现共享内存机制,贡献内存部分应该通过inode构建SharedKey + // todo!("Shared memory not implemented"); + } + + pub fn futex_atomic_op_inuser(encoded_op: u32, uaddr: VirtAddr) -> Result { + let op = FutexOP::from_bits((encoded_op & 0x70000000) >> 28).ok_or(SystemError::ENOSYS)?; + let cmp = + FutexOpCMP::from_bits((encoded_op & 0x0f000000) >> 24).ok_or(SystemError::ENOSYS)?; + + let sign_extend32 = |value: u32, index: i32| { + let shift = (31 - index) as u8; + return (value << shift) >> shift; + }; + + let mut oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); + let cmparg = sign_extend32(encoded_op & 0x00000fff, 11); + + if encoded_op & (FutexOP::FUTEX_OP_OPARG_SHIFT.bits() << 28) != 0 { + if oparg > 31 { + kwarn!( + "futex_wake_op: pid:{} tries to shift op by {}; fix this program", + ProcessManager::current_pcb().pid().data(), + oparg + ); + + oparg &= 31; + } + } + + // TODO: 这个汇编似乎是有问题的,目前不好测试 + let old_val = Self::arch_futex_atomic_op_inuser(op, oparg, uaddr)?; + + match cmp { + FutexOpCMP::FUTEX_OP_CMP_EQ => { + return Ok(cmparg == old_val); + } + FutexOpCMP::FUTEX_OP_CMP_NE => { + return Ok(cmparg != old_val); + } + FutexOpCMP::FUTEX_OP_CMP_LT => { + return Ok(cmparg < old_val); + } + FutexOpCMP::FUTEX_OP_CMP_LE => { + return Ok(cmparg <= old_val); + } + FutexOpCMP::FUTEX_OP_CMP_GE => { + return Ok(cmparg >= old_val); + } + FutexOpCMP::FUTEX_OP_CMP_GT => { + return Ok(cmparg > old_val); + } + _ => { + return Err(SystemError::ENOSYS); + } + } + } + + /// ### 对futex进行操作 + /// + /// 进入该方法会关闭中断保证修改的原子性,所以进入该方法前应确保中断锁已释放 + /// + /// ### return uaddr原来的值 + #[allow(unused_assignments)] + pub fn arch_futex_atomic_op_inuser( + op: FutexOP, + oparg: u32, + uaddr: VirtAddr, + ) -> Result { + let guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + + let reader = + UserBufferReader::new(uaddr.as_ptr::(), core::mem::size_of::(), true)?; + + let oldval = reader.read_one_from_user::(0)?; + + let atomic_addr = AtomicU64::new(uaddr.data() as u64); + // 这个指针是指向指针的指针 + let ptr = atomic_addr.as_mut_ptr(); + match op { + FutexOP::FUTEX_OP_SET => unsafe { + *((*ptr) as *mut u32) = oparg; + }, + FutexOP::FUTEX_OP_ADD => unsafe { + *((*ptr) as *mut u32) += oparg; + }, + FutexOP::FUTEX_OP_OR => unsafe { + *((*ptr) as *mut u32) |= oparg; + }, + FutexOP::FUTEX_OP_ANDN => unsafe { + *((*ptr) as *mut u32) &= oparg; + }, + FutexOP::FUTEX_OP_XOR => unsafe { + *((*ptr) as *mut u32) ^= oparg; + }, + _ => return Err(SystemError::ENOSYS), + } + + drop(guard); + + Ok(*oldval) + } +} + +#[no_mangle] +unsafe extern "C" fn rs_futex_init() { + Futex::init(); +} diff --git a/kernel/src/libs/futex/mod.rs b/kernel/src/libs/futex/mod.rs new file mode 100644 index 000000000..b8a5b7b97 --- /dev/null +++ b/kernel/src/libs/futex/mod.rs @@ -0,0 +1,3 @@ +pub mod constant; +pub mod futex; +pub mod syscall; diff --git a/kernel/src/libs/futex/syscall.rs b/kernel/src/libs/futex/syscall.rs new file mode 100644 index 000000000..98103b84f --- /dev/null +++ b/kernel/src/libs/futex/syscall.rs @@ -0,0 +1,106 @@ +use crate::{ + mm::VirtAddr, + syscall::{Syscall, SystemError}, + time::TimeSpec, +}; + +use super::{constant::*, futex::Futex}; + +impl Syscall { + pub fn do_futex( + uaddr: VirtAddr, + operation: FutexFlag, + val: u32, + timeout: Option, + uaddr2: VirtAddr, + val2: u32, + val3: u32, + ) -> Result { + let cmd = FutexArg::from_bits(operation.bits() & FutexFlag::FUTEX_CMD_MASK.bits()) + .ok_or(SystemError::ENOSYS)?; + + let mut flags = FutexFlag::FLAGS_MATCH_NONE; + + if !operation.contains(FutexFlag::FUTEX_PRIVATE_FLAG) { + flags.insert(FutexFlag::FLAGS_SHARED); + } + + if operation.contains(FutexFlag::FUTEX_CLOCK_REALTIME) { + flags.insert(FutexFlag::FLAGS_CLOCKRT); + if cmd != FutexArg::FUTEX_WAIT_BITSET + && cmd != FutexArg::FUTEX_WAIT_REQUEUE_PI + && cmd != FutexArg::FUTEX_LOCK_PI2 + { + return Err(SystemError::ENOSYS); + } + } + + match cmd { + FutexArg::FUTEX_WAIT => { + return Futex::futex_wait(uaddr, flags, val, timeout, FUTEX_BITSET_MATCH_ANY); + } + FutexArg::FUTEX_WAIT_BITSET => { + return Futex::futex_wait(uaddr, flags, val, timeout, val3); + } + FutexArg::FUTEX_WAKE => { + return Futex::futex_wake(uaddr, flags, val, FUTEX_BITSET_MATCH_ANY); + } + FutexArg::FUTEX_WAKE_BITSET => { + return Futex::futex_wake(uaddr, flags, val, val3); + } + FutexArg::FUTEX_REQUEUE => { + return Futex::futex_requeue( + uaddr, + flags, + uaddr2, + val as i32, + val2 as i32, + None, + false, + ); + } + FutexArg::FUTEX_CMP_REQUEUE => { + return Futex::futex_requeue( + uaddr, + flags, + uaddr2, + val as i32, + val2 as i32, + Some(val3), + false, + ); + } + FutexArg::FUTEX_WAKE_OP => { + return Futex::futex_wake_op( + uaddr, + flags, + uaddr2, + val as i32, + val2 as i32, + val3 as i32, + ); + } + FutexArg::FUTEX_LOCK_PI => { + todo!() + } + FutexArg::FUTEX_LOCK_PI2 => { + todo!() + } + FutexArg::FUTEX_UNLOCK_PI => { + todo!() + } + FutexArg::FUTEX_TRYLOCK_PI => { + todo!() + } + FutexArg::FUTEX_WAIT_REQUEUE_PI => { + todo!() + } + FutexArg::FUTEX_CMP_REQUEUE_PI => { + todo!() + } + _ => { + return Err(SystemError::ENOSYS); + } + } + } +} diff --git a/kernel/src/libs/mod.rs b/kernel/src/libs/mod.rs index d7d96e3fa..ac5e208c8 100644 --- a/kernel/src/libs/mod.rs +++ b/kernel/src/libs/mod.rs @@ -21,4 +21,6 @@ pub mod spinlock; pub mod vec_cursor; #[macro_use] pub mod volatile; +pub mod futex; +pub mod rand; pub mod wait_queue; diff --git a/kernel/src/libs/rand.rs b/kernel/src/libs/rand.rs new file mode 100644 index 000000000..581a04654 --- /dev/null +++ b/kernel/src/libs/rand.rs @@ -0,0 +1,7 @@ +bitflags! { + pub struct GRandFlags: u8{ + const GRND_NONBLOCK = 0x0001; + const GRND_RANDOM = 0x0002; + const GRND_INSECURE = 0x0004; + } +} diff --git a/kernel/src/main.c b/kernel/src/main.c index 9ae0f2cc9..20390bcaf 100644 --- a/kernel/src/main.c +++ b/kernel/src/main.c @@ -38,6 +38,7 @@ extern void rs_kthread_init(); extern void rs_init_intertrait(); extern void rs_init_before_mem_init(); extern int rs_setup_arch(); +extern void rs_futex_init(); extern int rs_hpet_init(); extern int rs_hpet_enable(); extern int rs_tsc_init(); @@ -152,6 +153,7 @@ void system_initialize() smp_init(); io_mfence(); + rs_futex_init(); cli(); rs_hpet_init(); rs_hpet_enable(); diff --git a/kernel/src/mm/ucontext.rs b/kernel/src/mm/ucontext.rs index 44605720a..43130d639 100644 --- a/kernel/src/mm/ucontext.rs +++ b/kernel/src/mm/ucontext.rs @@ -706,7 +706,7 @@ impl UserMappings { /// 请注意,在调用本函数之前,必须先确定region所在范围内没有VMA。 fn reserve_hole(&mut self, region: &VirtRegion) { let prev_hole: Option<(&VirtAddr, &mut usize)> = - self.vm_holes.range_mut(..region.start()).next_back(); + self.vm_holes.range_mut(..=region.start()).next_back(); if let Some((prev_hole_vaddr, prev_hole_size)) = prev_hole { let prev_hole_end = prev_hole_vaddr.add(*prev_hole_size); @@ -946,6 +946,8 @@ pub struct VMA { /// VMA所属的用户地址空间 user_address_space: Option>, self_ref: Weak, + + provider: Provider, } impl core::hash::Hash for VMA { @@ -956,6 +958,12 @@ impl core::hash::Hash for VMA { } } +/// 描述不同类型的内存提供者或资源 +#[derive(Debug)] +pub enum Provider { + Allocated, // TODO:其他 +} + #[allow(dead_code)] impl VMA { pub fn region(&self) -> &VirtRegion { @@ -974,6 +982,7 @@ impl VMA { mapped: self.mapped, user_address_space: self.user_address_space.clone(), self_ref: self.self_ref.clone(), + provider: Provider::Allocated, }; } @@ -1019,8 +1028,15 @@ impl VMA { /// /// - `prot_flags` 要检查的标志位 pub fn can_have_flags(&self, prot_flags: ProtFlags) -> bool { - return (self.flags.has_write() || !prot_flags.contains(ProtFlags::PROT_WRITE)) + let is_downgrade = (self.flags.has_write() || !prot_flags.contains(ProtFlags::PROT_WRITE)) && (self.flags.has_execute() || !prot_flags.contains(ProtFlags::PROT_EXEC)); + + match self.provider { + Provider::Allocated { .. } => true, + + #[allow(unreachable_patterns)] + _ => is_downgrade, + } } /// 把物理地址映射到虚拟地址 @@ -1070,6 +1086,7 @@ impl VMA { mapped: true, user_address_space: None, self_ref: Weak::default(), + provider: Provider::Allocated, }); return Ok(r); } @@ -1118,6 +1135,7 @@ impl VMA { mapped: true, user_address_space: None, self_ref: Weak::default(), + provider: Provider::Allocated, }); drop(flusher); // kdebug!("VMA::zeroed: flusher dropped"); diff --git a/kernel/src/process/c_adapter.rs b/kernel/src/process/c_adapter.rs index e607751ff..3cc782aeb 100644 --- a/kernel/src/process/c_adapter.rs +++ b/kernel/src/process/c_adapter.rs @@ -36,16 +36,25 @@ pub extern "C" fn rs_current_pcb_pid() -> u32 { #[no_mangle] pub extern "C" fn rs_current_pcb_preempt_count() -> u32 { + if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } { + return 0; + } return ProcessManager::current_pcb().preempt_count() as u32; } #[no_mangle] pub extern "C" fn rs_current_pcb_flags() -> u32 { + if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } { + return 0; + } return ProcessManager::current_pcb().flags().bits() as u32; } #[no_mangle] pub extern "C" fn rs_current_pcb_thread_rbp() -> u64 { + if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } { + return 0; + } return ProcessManager::current_pcb().arch_info_irqsave().rbp() as u64; } @@ -61,5 +70,8 @@ pub extern "C" fn rs_preempt_enable() { #[no_mangle] pub extern "C" fn rs_process_do_exit(exit_code: usize) -> usize { + if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } { + return 0; + } ProcessManager::exit(exit_code); } diff --git a/kernel/src/process/fork.rs b/kernel/src/process/fork.rs index 8e576a825..931eb3795 100644 --- a/kernel/src/process/fork.rs +++ b/kernel/src/process/fork.rs @@ -1,9 +1,13 @@ use alloc::{string::ToString, sync::Arc}; use crate::{ - arch::interrupt::TrapFrame, filesystem::procfs::procfs_register_pid, - ipc::signal::flush_signal_handlers, libs::rwlock::RwLock, process::ProcessFlags, - syscall::SystemError, + arch::interrupt::TrapFrame, + filesystem::procfs::procfs_register_pid, + ipc::signal::flush_signal_handlers, + libs::rwlock::RwLock, + mm::VirtAddr, + process::ProcessFlags, + syscall::{user_access::UserBufferWriter, SystemError}, }; use super::{ @@ -13,21 +17,116 @@ use super::{ bitflags! { /// 进程克隆标志 - pub struct CloneFlags: u32 { + pub struct CloneFlags: u64 { + /// 在进程间共享虚拟内存空间 + const CLONE_VM = 0x00000100; /// 在进程间共享文件系统信息 - const CLONE_FS = (1 << 0); - /// 克隆时,与父进程共享信号结构体 - const CLONE_SIGNAL = (1 << 1); + const CLONE_FS = 0x00000200; + /// 共享打开的文件 + const CLONE_FILES = 0x00000400; /// 克隆时,与父进程共享信号处理结构体 - const CLONE_SIGHAND = (1 << 2); - /// 克隆时,将原本被设置为SIG_IGNORE的信号,设置回SIG_DEFAULT - const CLONE_CLEAR_SIGHAND = (1 << 3); - /// 在进程间共享虚拟内存空间 - const CLONE_VM = (1 << 4); + const CLONE_SIGHAND = 0x00000800; + /// 返回进程的文件描述符 + const CLONE_PIDFD = 0x00001000; + /// 使克隆对象成为父进程的跟踪对象 + const CLONE_PTRACE = 0x00002000; + /// 在执行 exec() 或 _exit() 之前挂起父进程的执行 + const CLONE_VFORK = 0x00004000; + /// 使克隆对象的父进程为调用进程的父进程 + const CLONE_PARENT = 0x00008000; /// 拷贝线程 - const CLONE_THREAD = (1 << 5); - /// 共享打开的文件 - const CLONE_FILES = (1 << 6); + const CLONE_THREAD = 0x00010000; + /// 创建一个新的命名空间,其中包含独立的文件系统挂载点层次结构。 + const CLONE_NEWNS = 0x00020000; + /// 与父进程共享 System V 信号量。 + const CLONE_SYSVSEM = 0x00040000; + /// 设置其线程本地存储 + const CLONE_SETTLS = 0x00080000; + /// 设置partent_tid地址为子进程线程 ID + const CLONE_PARENT_SETTID = 0x00100000; + /// 在子进程中设置一个清除线程 ID 的用户空间地址 + const CLONE_CHILD_CLEARTID = 0x00200000; + /// 创建一个新线程,将其设置为分离状态 + const CLONE_DETACHED = 0x00400000; + /// 使其在创建者进程或线程视角下成为无法跟踪的。 + const CLONE_UNTRACED = 0x00800000; + /// 设置其子进程线程 ID + const CLONE_CHILD_SETTID = 0x01000000; + /// 将其放置在一个新的 cgroup 命名空间中 + const CLONE_NEWCGROUP = 0x02000000; + /// 将其放置在一个新的 UTS 命名空间中 + const CLONE_NEWUTS = 0x04000000; + /// 将其放置在一个新的 IPC 命名空间中 + const CLONE_NEWIPC = 0x08000000; + /// 将其放置在一个新的用户命名空间中 + const CLONE_NEWUSER = 0x10000000; + /// 将其放置在一个新的 PID 命名空间中 + const CLONE_NEWPID = 0x20000000; + /// 将其放置在一个新的网络命名空间中 + const CLONE_NEWNET = 0x40000000; + /// 在新的 I/O 上下文中运行它 + const CLONE_IO = 0x80000000; + /// 克隆时,与父进程共享信号结构体 + const CLONE_SIGNAL = 0x00010000 | 0x00000800; + /// 克隆时,将原本被设置为SIG_IGNORE的信号,设置回SIG_DEFAULT + const CLONE_CLEAR_SIGHAND = 0x100000000; + } +} + +/// ## clone与clone3系统调用的参数载体 +/// +/// 因为这两个系统调用的参数很多,所以有这样一个载体更灵活 +/// +/// 仅仅作为参数传递 +#[derive(Debug, Clone, Copy)] +pub struct KernelCloneArgs { + pub flags: CloneFlags, + + // 下列属性均来自用户空间 + pub pidfd: VirtAddr, + pub child_tid: VirtAddr, + pub parent_tid: VirtAddr, + pub set_tid: VirtAddr, + + pub exit_signal: i32, + + pub stack: usize, + // clone3用到 + pub stack_size: usize, + pub tls: usize, + + pub set_tid_size: usize, + pub cgroup: i32, + + pub io_thread: bool, + pub kthread: bool, + pub idle: bool, + pub func: VirtAddr, + pub fn_arg: VirtAddr, + // cgrp 和 cset? +} + +impl KernelCloneArgs { + pub fn new() -> Self { + let null_addr = VirtAddr::new(0); + Self { + flags: unsafe { CloneFlags::from_bits_unchecked(0) }, + pidfd: null_addr, + child_tid: null_addr, + parent_tid: null_addr, + set_tid: null_addr, + exit_signal: 0, + stack: 0, + stack_size: 0, + tls: 0, + set_tid_size: 0, + cgroup: 0, + io_thread: false, + kthread: false, + idle: false, + func: null_addr, + fn_arg: null_addr, + } } } @@ -56,53 +155,9 @@ impl ProcessManager { let name = current_pcb.basic().name().to_string(); let pcb = ProcessControlBlock::new(name, new_kstack); - // 克隆架构相关信息 - *pcb.arch_info() = current_pcb.arch_info_irqsave().clone(); - - // 为内核线程设置worker private字段。(也许由内核线程机制去做会更好?) - if current_pcb.flags().contains(ProcessFlags::KTHREAD) { - *pcb.worker_private() = Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new())) - } - - // 拷贝标志位 - ProcessManager::copy_flags(&clone_flags, &pcb).unwrap_or_else(|e| { - panic!( - "fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", - current_pcb.pid(), pcb.pid(), e - ) - }); - - // 拷贝用户地址空间 - ProcessManager::copy_mm(&clone_flags, ¤t_pcb, &pcb).unwrap_or_else(|e| { - panic!( - "fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", - current_pcb.pid(), pcb.pid(), e - ) - }); - - // 拷贝文件描述符表 - ProcessManager::copy_files(&clone_flags, ¤t_pcb, &pcb).unwrap_or_else(|e| { - panic!( - "fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", - current_pcb.pid(), pcb.pid(), e - ) - }); - - //拷贝信号相关数据 - ProcessManager::copy_sighand(&clone_flags, ¤t_pcb, &pcb).unwrap_or_else(|e| { - panic!( - "fork: Failed to copy sighands from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", - current_pcb.pid(), pcb.pid(), e - ) - }); - - // 拷贝线程 - ProcessManager::copy_thread(&clone_flags, ¤t_pcb, &pcb, ¤t_trapframe).unwrap_or_else(|e| { - panic!( - "fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", - current_pcb.pid(), pcb.pid(), e - ) - }); + let mut args = KernelCloneArgs::new(); + args.flags = clone_flags; + Self::copy_process(¤t_pcb, &pcb, args, current_trapframe)?; ProcessManager::add_pcb(pcb.clone()); @@ -168,7 +223,6 @@ impl ProcessManager { unsafe { new_pcb.basic_mut().set_user_vm(Some(old_address_space)) }; return Ok(()); } - let new_address_space = old_address_space.write().try_clone().unwrap_or_else(|e| { panic!( "copy_mm: Failed to clone address space of current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", @@ -215,4 +269,139 @@ impl ProcessManager { } return Ok(()); } + + /// 拷贝进程信息 + /// + /// ## panic: + /// 某一步拷贝失败时会引发panic + /// 例如:copy_mm等失败时会触发panic + /// + /// ## 参数 + /// + /// - clone_flags 标志位 + /// - des_pcb 目标pcb + /// - src_pcb 拷贝源pcb + /// + /// ## return + /// - 发生错误时返回Err(SystemError) + pub fn copy_process( + current_pcb: &Arc, + pcb: &Arc, + clone_args: KernelCloneArgs, + current_trapframe: &mut TrapFrame, + ) -> Result<(), SystemError> { + let clone_flags = clone_args.flags; + // 不允许与不同namespace的进程共享根目录 + if (clone_flags == (CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_FS)) + || clone_flags == (CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_FS) + { + return Err(SystemError::EINVAL); + } + + // 线程组必须共享信号,分离线程只能在线程组内启动。 + if clone_flags.contains(CloneFlags::CLONE_THREAD) + && !clone_flags.contains(CloneFlags::CLONE_SIGHAND) + { + return Err(SystemError::EINVAL); + } + + // 共享信号处理器意味着共享vm。 + // 线程组也意味着共享vm。阻止这种情况可以简化其他代码。 + if clone_flags.contains(CloneFlags::CLONE_SIGHAND) + && !clone_flags.contains(CloneFlags::CLONE_VM) + { + return Err(SystemError::EINVAL); + } + + // TODO: 处理CLONE_PARENT 与 SIGNAL_UNKILLABLE的情况 + + // 如果新进程使用不同的 pid 或 namespace, + // 则不允许它与分叉任务共享线程组。 + if clone_flags.contains(CloneFlags::CLONE_THREAD) { + if clone_flags.contains(CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_NEWPID) { + return Err(SystemError::EINVAL); + } + // TODO: 判断新进程与当前进程namespace是否相同,不同则返回错误 + } + + // 如果新进程将处于不同的time namespace, + // 则不能让它共享vm或线程组。 + if clone_flags.contains(CloneFlags::CLONE_THREAD | CloneFlags::CLONE_VM) { + // TODO: 判断time namespace,不同则返回错误 + } + + if clone_flags.contains(CloneFlags::CLONE_PIDFD) + && clone_flags.contains(CloneFlags::CLONE_DETACHED | CloneFlags::CLONE_THREAD) + { + return Err(SystemError::EINVAL); + } + + // TODO: 克隆前应该锁信号处理,等待克隆完成后再处理 + + // 克隆架构相关 + *pcb.arch_info() = current_pcb.arch_info_irqsave().clone(); + + // 为内核线程设置WorkerPrivate + if current_pcb.flags().contains(ProcessFlags::KTHREAD) { + *pcb.worker_private() = + Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new())); + } + + // 设置clear_child_tid,在线程结束时将其置0以通知父进程 + if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) { + pcb.thread.write().clear_child_tid = Some(clone_args.child_tid); + } + + // 设置child_tid,意味着子线程能够知道自己的id + if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) { + pcb.thread.write().set_child_tid = Some(clone_args.child_tid); + } + + // 将子进程/线程的id存储在用户态传进的地址中 + if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) { + let mut writer = UserBufferWriter::new( + clone_args.parent_tid.data() as *mut i32, + core::mem::size_of::(), + true, + )?; + + writer.copy_one_to_user(&(pcb.pid().0 as i32), 0)?; + } + + // 拷贝标志位 + Self::copy_flags(&clone_flags, &pcb).unwrap_or_else(|e| { + panic!( + "fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", + current_pcb.pid(), pcb.pid(), e + ) + }); + + // 拷贝用户地址空间 + Self::copy_mm(&clone_flags, ¤t_pcb, &pcb).unwrap_or_else(|e| { + panic!( + "fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", + current_pcb.pid(), pcb.pid(), e + ) + }); + + // 拷贝文件描述符表 + Self::copy_files(&clone_flags, ¤t_pcb, &pcb).unwrap_or_else(|e| { + panic!( + "fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", + current_pcb.pid(), pcb.pid(), e + ) + }); + + // todo: 拷贝信号相关数据 + + // 拷贝线程 + Self::copy_thread(¤t_pcb, &pcb, clone_args,¤t_trapframe).unwrap_or_else(|e| { + panic!( + "fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", + current_pcb.pid(), pcb.pid(), e + ) + }); + + Ok(()) + } } diff --git a/kernel/src/process/kthread.rs b/kernel/src/process/kthread.rs index 7060d7bef..a71abc68f 100644 --- a/kernel/src/process/kthread.rs +++ b/kernel/src/process/kthread.rs @@ -282,9 +282,11 @@ impl KernelThreadMechanism { // 初始化kthreadd let closure = KernelThreadClosure::EmptyClosure((Box::new(Self::kthread_daemon), ())); let info = KernelThreadCreateInfo::new(closure, "kthreadd".to_string()); - let kthreadd_pid: Pid = - Self::__inner_create(&info, CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL) - .expect("Failed to create kthread daemon"); + let kthreadd_pid: Pid = Self::__inner_create( + &info, + CloneFlags::CLONE_VM | CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL, + ) + .expect("Failed to create kthread daemon"); let pcb = ProcessManager::find(kthreadd_pid).unwrap(); ProcessManager::wakeup(&pcb).expect("Failed to wakeup kthread daemon"); diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index e3d06d729..04ec9042a 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -1,5 +1,6 @@ use core::{ hash::{Hash, Hasher}, + hint::spin_loop, intrinsics::{likely, unlikely}, mem::ManuallyDrop, sync::atomic::{compiler_fence, AtomicBool, AtomicI32, AtomicIsize, AtomicUsize, Ordering}, @@ -29,6 +30,10 @@ use crate::{ libs::{ align::AlignedBox, casting::DowncastArc, + futex::{ + constant::{FutexFlag, FUTEX_BITSET_MATCH_ANY}, + futex::Futex, + }, rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard}, spinlock::{SpinLock, SpinLockGuard}, wait_queue::WaitQueue, @@ -36,11 +41,12 @@ use crate::{ mm::{percpu::PerCpuVar, set_INITIAL_PROCESS_ADDRESS_SPACE, ucontext::AddressSpace, VirtAddr}, net::socket::SocketInode, sched::{ + completion::Completion, core::{sched_enqueue, CPU_EXECUTING}, SchedPolicy, SchedPriority, }, smp::kick_cpu, - syscall::{Syscall, SystemError}, + syscall::{user_access::clear_user, Syscall, SystemError}, }; use self::kthread::WorkerPrivate; @@ -116,6 +122,12 @@ impl ProcessManager { /// 获取当前进程的pcb pub fn current_pcb() -> Arc { + if unlikely(unsafe { !__PROCESS_MANAGEMENT_INIT_DONE }) { + kerror!("unsafe__PROCESS_MANAGEMENT_INIT_DONE == false"); + loop { + spin_loop(); + } + } return ProcessControlBlock::arch_current_pcb(); } @@ -252,7 +264,7 @@ impl ProcessManager { assert_eq!( CurrentIrqArch::is_irq_enabled(), false, - "interrupt must be disabled before enter ProcessManager::mark_sleep()" + "interrupt must be disabled before enter ProcessManager::mark_stop()" ); let pcb = ProcessManager::current_pcb(); @@ -306,9 +318,31 @@ impl ProcessManager { .write() .set_state(ProcessState::Exited(exit_code)); pcb.wait_queue.wakeup(Some(ProcessState::Blocked(true))); + + // 进行进程退出后的工作 + let thread = pcb.thread.write(); + if let Some(addr) = thread.set_child_tid { + unsafe { clear_user(addr, core::mem::size_of::()).expect("clear tid failed") }; + } + + if let Some(addr) = thread.clear_child_tid { + if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 { + let _ = + Futex::futex_wake(addr, FutexFlag::FLAGS_MATCH_NONE, 1, FUTEX_BITSET_MATCH_ANY); + } + unsafe { clear_user(addr, core::mem::size_of::()).expect("clear tid failed") }; + } + + // 如果是vfork出来的进程,则需要处理completion + if thread.vfork_done.is_some() { + thread.vfork_done.as_ref().unwrap().complete_all(); + } + drop(thread); + unsafe { pcb.basic_mut().set_user_vm(None) }; drop(pcb); ProcessManager::exit_notify(); drop(irq_guard); + sched(); loop {} } @@ -316,15 +350,21 @@ impl ProcessManager { pub unsafe fn release(pid: Pid) { let pcb = ProcessManager::find(pid); if !pcb.is_none() { - let pcb = pcb.unwrap(); + // let pcb = pcb.unwrap(); // 判断该pcb是否在全局没有任何引用 - if Arc::strong_count(&pcb) <= 1 { - drop(pcb); - ALL_PROCESS.lock().as_mut().unwrap().remove(&pid); - } else { - // 如果不为1就panic - panic!("pcb is still referenced"); - } + // TODO: 当前,pcb的Arc指针存在泄露问题,引用计数不正确,打算在接下来实现debug专用的Arc,方便调试,然后解决这个bug。 + // 因此目前暂时注释掉,使得能跑 + // if Arc::strong_count(&pcb) <= 2 { + // drop(pcb); + // ALL_PROCESS.lock().as_mut().unwrap().remove(&pid); + // } else { + // // 如果不为1就panic + // let msg = format!("pcb '{:?}' is still referenced, strong count={}",pcb.pid(), Arc::strong_count(&pcb)); + // kerror!("{}", msg); + // panic!() + // } + + ALL_PROCESS.lock().as_mut().unwrap().remove(&pid); } } @@ -482,10 +522,13 @@ pub struct ProcessControlBlock { parent_pcb: RwLock>, /// 子进程链表 - children: RwLock>>, + children: RwLock>, /// 等待队列 wait_queue: WaitQueue, + + /// 线程信息 + thread: RwLock, } impl ProcessControlBlock { @@ -545,20 +588,26 @@ impl ProcessControlBlock { sig_info: RwLock::new(ProcessSignalInfo::default()), sig_struct: SpinLock::new(SignalStruct::default()), parent_pcb: RwLock::new(ppcb), - children: RwLock::new(HashMap::new()), + children: RwLock::new(Vec::new()), wait_queue: WaitQueue::INIT, + thread: RwLock::new(ThreadInfo::new()), }; let pcb = Arc::new(pcb); // 设置进程的arc指针到内核栈的最低地址处 - unsafe { pcb.kernel_stack.write().set_pcb(Arc::clone(&pcb)).unwrap() }; + unsafe { + pcb.kernel_stack + .write() + .set_pcb(Arc::downgrade(&pcb)) + .unwrap() + }; // 将当前pcb加入父进程的子进程哈希表中 if pcb.pid() > Pid(1) { if let Some(ppcb_arc) = pcb.parent_pcb.read().upgrade() { let mut children = ppcb_arc.children.write(); - children.insert(pcb.pid(), pcb.clone()); + children.push(pcb.pid()); } else { panic!("parent pcb is None"); } @@ -700,11 +749,11 @@ impl ProcessControlBlock { unsafe fn adopt_childen(&self) -> Result<(), SystemError> { match ProcessManager::find(Pid(1)) { Some(init_pcb) => { - let mut childen_guard = self.children.write(); + let childen_guard = self.children.write(); let mut init_childen_guard = init_pcb.children.write(); - childen_guard.drain().for_each(|(pid, child)| { - init_childen_guard.insert(pid, child); + childen_guard.iter().for_each(|pid| { + init_childen_guard.push(*pid); }); return Ok(()); @@ -747,12 +796,31 @@ impl Drop for ProcessControlBlock { .unwrap_or_else(|e| panic!("procfs_unregister_pid failed: error: {e:?}")); if let Some(ppcb) = self.parent_pcb.read().upgrade() { - ppcb.children.write().remove(&self.pid()); + ppcb.children.write().drain_filter(|pid| *pid == self.pid()); } + } +} + +/// 线程信息 +#[derive(Debug)] +pub struct ThreadInfo { + // 来自用户空间记录用户线程id的地址,在该线程结束时将该地址置0以通知父进程 + clear_child_tid: Option, + set_child_tid: Option, - unsafe { ProcessManager::release(self.pid()) }; + vfork_done: Option>, +} + +impl ThreadInfo { + pub fn new() -> Self { + Self { + clear_child_tid: None, + set_child_tid: None, + vfork_done: None, + } } } + /// 进程的基本信息 /// /// 这个结构体保存进程的基本信息,主要是那些不会随着进程的运行而经常改变的信息。 @@ -994,9 +1062,9 @@ impl KernelStack { return VirtAddr::new(self.stack.as_ref().unwrap().as_ptr() as usize + Self::SIZE); } - pub unsafe fn set_pcb(&mut self, pcb: Arc) -> Result<(), SystemError> { - // 将一个Arc放到内核栈的最低地址处 - let p: *const ProcessControlBlock = Arc::into_raw(pcb); + pub unsafe fn set_pcb(&mut self, pcb: Weak) -> Result<(), SystemError> { + // 将一个Weak放到内核栈的最低地址处 + let p: *const ProcessControlBlock = Weak::into_raw(pcb); let stack_bottom_ptr = self.start_address().data() as *mut *const ProcessControlBlock; // 如果内核栈的最低地址处已经有了一个pcb,那么,这里就不再设置,直接返回错误 @@ -1021,10 +1089,10 @@ impl KernelStack { } // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用 - let arc_wrapper: ManuallyDrop> = - ManuallyDrop::new(Arc::from_raw(p)); + let weak_wrapper: ManuallyDrop> = + ManuallyDrop::new(Weak::from_raw(p)); - let new_arc: Arc = Arc::clone(&arc_wrapper); + let new_arc: Arc = weak_wrapper.upgrade()?; return Some(new_arc); } } @@ -1032,8 +1100,8 @@ impl KernelStack { impl Drop for KernelStack { fn drop(&mut self) { if !self.stack.is_none() { - let pcb_ptr: Arc = unsafe { - Arc::from_raw(self.stack.as_ref().unwrap().as_ptr() as *const ProcessControlBlock) + let pcb_ptr: Weak = unsafe { + Weak::from_raw(self.stack.as_ref().unwrap().as_ptr() as *const ProcessControlBlock) }; drop(pcb_ptr); } diff --git a/kernel/src/process/syscall.rs b/kernel/src/process/syscall.rs index c8f83e7ea..c0569a4fa 100644 --- a/kernel/src/process/syscall.rs +++ b/kernel/src/process/syscall.rs @@ -1,13 +1,24 @@ use core::ffi::c_void; -use alloc::{string::String, vec::Vec}; +use alloc::{ + string::{String, ToString}, + sync::Arc, + vec::Vec, +}; -use super::{abi::WaitOption, fork::CloneFlags, Pid, ProcessManager, ProcessState}; +use super::{ + abi::WaitOption, + fork::{CloneFlags, KernelCloneArgs}, + KernelStack, Pid, ProcessManager, ProcessState, +}; use crate::{ arch::{interrupt::TrapFrame, sched::sched, CurrentIrqArch}, exception::InterruptArch, - filesystem::vfs::MAX_PATHLEN, + filesystem::{procfs::procfs_register_pid, vfs::MAX_PATHLEN}, + include::bindings::bindings::verify_area, + mm::VirtAddr, process::ProcessControlBlock, + sched::completion::Completion, syscall::{ user_access::{ check_and_clone_cstr, check_and_clone_cstr_array, UserBufferReader, UserBufferWriter, @@ -42,6 +53,11 @@ impl Syscall { // argv, // envp // ); + // kdebug!( + // "before execve: strong count: {}", + // Arc::strong_count(&ProcessManager::current_pcb()) + // ); + if path.is_null() { return Err(SystemError::EINVAL); } @@ -66,6 +82,10 @@ impl Syscall { // 关闭设置了O_CLOEXEC的文件描述符 let fd_table = ProcessManager::current_pcb().fd_table(); fd_table.write().close_on_exec(); + // kdebug!( + // "after execve: strong count: {}", + // Arc::strong_count(&ProcessManager::current_pcb()) + // ); return Ok(()); } @@ -95,12 +115,13 @@ impl Syscall { if pid > 0 { let pid = Pid(pid as usize); - let child_pcb = rd_childen.get(&pid).ok_or(SystemError::ECHILD)?.clone(); + let child_pcb = ProcessManager::find(pid).ok_or(SystemError::ECHILD)?; drop(rd_childen); loop { + let state = child_pcb.sched_info().state(); // 获取退出码 - match child_pcb.sched_info().state() { + match state { ProcessState::Runnable => { if options.contains(WaitOption::WNOHANG) || options.contains(WaitOption::WNOWAIT) @@ -123,12 +144,16 @@ impl Syscall { } } ProcessState::Exited(status) => { + // kdebug!("wait4: child exited, pid: {:?}, status: {status}\n", pid); if !wstatus.is_null() { wstatus_buf.copy_one_to_user( - &(status | WaitOption::WEXITED.bits() as usize), + &(status as u32 | WaitOption::WEXITED.bits()), 0, )?; } + drop(child_pcb); + // kdebug!("wait4: to release {pid:?}"); + unsafe { ProcessManager::release(pid) }; return Ok(pid.into()); } }; @@ -147,7 +172,8 @@ impl Syscall { } else { // 等待任意子进程(这两) let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; - for (pid, pcb) in rd_childen.iter() { + for pid in rd_childen.iter() { + let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?; if pcb.sched_info().state().is_exited() { if !wstatus.is_null() { wstatus_buf.copy_one_to_user(&0, 0)?; @@ -200,4 +226,73 @@ impl Syscall { let current_pcb = ProcessManager::current_pcb(); return Ok(current_pcb.basic().ppid()); } + + pub fn clone( + current_trapframe: &mut TrapFrame, + clone_args: KernelCloneArgs, + ) -> Result { + let flags = clone_args.flags; + + let vfork = Arc::new(Completion::new()); + + if flags.contains(CloneFlags::CLONE_PIDFD) + && flags.contains(CloneFlags::CLONE_PARENT_SETTID) + { + return Err(SystemError::EINVAL); + } + + let current_pcb = ProcessManager::current_pcb(); + let new_kstack = KernelStack::new()?; + let name = current_pcb.basic().name().to_string(); + let pcb = ProcessControlBlock::new(name, new_kstack); + // 克隆pcb + ProcessManager::copy_process(¤t_pcb, &pcb, clone_args, current_trapframe)?; + ProcessManager::add_pcb(pcb.clone()); + + // 向procfs注册进程 + procfs_register_pid(pcb.pid()).unwrap_or_else(|e| { + panic!( + "fork: Failed to register pid to procfs, pid: [{:?}]. Error: {:?}", + pcb.pid(), + e + ) + }); + + if flags.contains(CloneFlags::CLONE_VFORK) { + pcb.thread.write().vfork_done = Some(vfork.clone()); + } + + if pcb.thread.read().set_child_tid.is_some() { + let addr = pcb.thread.read().set_child_tid.unwrap(); + let mut writer = + UserBufferWriter::new(addr.as_ptr::(), core::mem::size_of::(), true)?; + writer.copy_one_to_user(&(pcb.pid().data() as i32), 0)?; + } + + ProcessManager::wakeup(&pcb).unwrap_or_else(|e| { + panic!( + "fork: Failed to wakeup new process, pid: [{:?}]. Error: {:?}", + pcb.pid(), + e + ) + }); + + if flags.contains(CloneFlags::CLONE_VFORK) { + // 等待子进程结束或者exec; + vfork.wait_for_completion_interruptible()?; + } + + return Ok(pcb.pid().0); + } + + /// 设置线程地址 + pub fn set_tid_address(ptr: usize) -> Result { + if !unsafe { verify_area(ptr as u64, core::mem::size_of::() as u64) } { + return Err(SystemError::EFAULT); + } + + let pcb = ProcessManager::current_pcb(); + pcb.thread.write().clear_child_tid = Some(VirtAddr::new(ptr)); + Ok(pcb.pid.0) + } } diff --git a/kernel/src/sched/completion.rs b/kernel/src/sched/completion.rs index 3ec4b0809..35d653c36 100644 --- a/kernel/src/sched/completion.rs +++ b/kernel/src/sched/completion.rs @@ -97,7 +97,7 @@ impl Completion { } /// @brief 永久标记done为Complete_All,并从wait_queue中删除所有节点 - pub fn complete_all(&mut self) { + pub fn complete_all(&self) { let mut inner = self.inner.lock_irqsave(); inner.done = COMPLETE_ALL; inner.wait_queue.wakeup_all(None); diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 9e7578d59..6011fb74f 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -3,7 +3,10 @@ use core::{ sync::atomic::{AtomicBool, Ordering}, }; -use crate::kdebug; +use crate::{ + libs::{futex::constant::FutexFlag, rand::GRandFlags}, + process::fork::KernelCloneArgs, +}; use num_traits::{FromPrimitive, ToPrimitive}; @@ -21,7 +24,7 @@ use crate::{ libs::align::page_align_up, mm::{verify_area, MemoryManagementArch, VirtAddr}, net::syscall::SockAddr, - process::Pid, + process::{fork::CloneFlags, Pid}, time::{ syscall::{PosixTimeZone, PosixTimeval}, TimeSpec, @@ -372,7 +375,7 @@ pub const SYS_BIND: usize = 49; pub const SYS_LISTEN: usize = 50; pub const SYS_GETSOCKNAME: usize = 51; pub const SYS_GETPEERNAME: usize = 52; - +pub const SYS_SOCKET_PAIR: usize = 53; pub const SYS_SETSOCKOPT: usize = 54; pub const SYS_GETSOCKOPT: usize = 55; @@ -536,7 +539,6 @@ impl Syscall { Self::lseek(fd, w) } SYS_IOCTL => { - kdebug!("SYS_IOCTL"); let fd = args[0]; let cmd = args[1]; let data = args[2]; @@ -943,7 +945,7 @@ impl Syscall { SYS_MUNMAP => { let addr = args[0]; let len = page_align_up(args[1]); - if addr & MMArch::PAGE_SIZE != 0 { + if addr & (MMArch::PAGE_SIZE - 1) != 0 { // The addr argument is not a multiple of the page size Err(SystemError::EINVAL) } else { @@ -953,7 +955,7 @@ impl Syscall { SYS_MPROTECT => { let addr = args[0]; let len = page_align_up(args[1]); - if addr & MMArch::PAGE_SIZE != 0 { + if addr & (MMArch::PAGE_SIZE - 1) != 0 { // The addr argument is not a multiple of the page size Err(SystemError::EINVAL) } else { @@ -1023,6 +1025,102 @@ impl Syscall { Self::mknod(path as *const i8, flags, DeviceNumber::from(dev_t)) } + SYS_CLONE => { + let parent_tid = VirtAddr::new(args[2]); + let child_tid = VirtAddr::new(args[3]); + + // 地址校验 + verify_area(parent_tid, core::mem::size_of::())?; + verify_area(child_tid, core::mem::size_of::())?; + + let mut clone_args = KernelCloneArgs::new(); + clone_args.flags = CloneFlags::from_bits_truncate(args[0] as u64); + clone_args.stack = args[1]; + clone_args.parent_tid = parent_tid; + clone_args.child_tid = child_tid; + clone_args.tls = args[4]; + Self::clone(frame, clone_args) + } + + SYS_FUTEX => { + let uaddr = VirtAddr::new(args[0]); + let operation = FutexFlag::from_bits(args[1] as u32).ok_or(SystemError::ENOSYS)?; + let val = args[2] as u32; + let utime = args[3]; + let uaddr2 = VirtAddr::new(args[4]); + let val3 = args[5] as u32; + + verify_area(uaddr, core::mem::size_of::())?; + verify_area(uaddr2, core::mem::size_of::())?; + + let mut timespec = None; + if utime != 0 && operation.contains(FutexFlag::FLAGS_HAS_TIMEOUT) { + let reader = UserBufferReader::new( + utime as *const TimeSpec, + core::mem::size_of::(), + true, + )?; + + timespec = Some(reader.read_one_from_user::(0)?.clone()); + } + + Self::do_futex(uaddr, operation, val, timespec, uaddr2, utime as u32, val3) + } + + SYS_WRITEV => Self::writev(args[0] as i32, args[1], args[2]), + + SYS_ARCH_PRCTL => Self::arch_prctl(args[0], args[1]), + + SYS_SET_TID_ADDR => Self::set_tid_address(args[0]), + + SYS_STAT => { + let path: &CStr = unsafe { CStr::from_ptr(args[0] as *const c_char) }; + let path: Result<&str, core::str::Utf8Error> = path.to_str(); + let res = if path.is_err() { + Err(SystemError::EINVAL) + } else { + let path: &str = path.unwrap(); + let kstat = args[1] as *mut PosixKstat; + let vaddr = VirtAddr::new(kstat as usize); + match verify_area(vaddr, core::mem::size_of::()) { + Ok(_) => Self::stat(path, kstat), + Err(e) => Err(e), + } + }; + + res + } + + // 目前为了适配musl-libc,以下系统调用先这样写着 + SYS_GET_RANDOM => { + let flags = GRandFlags::from_bits(args[2] as u8).ok_or(SystemError::EINVAL)?; + Self::get_random(args[0] as *mut u8, args[1], flags) + } + + SYS_SOCKET_PAIR => { + unimplemented!() + } + + SYS_POLL => { + kwarn!("SYS_POLL has not yet been implemented"); + Ok(0) + } + + SYS_RT_SIGPROCMASK => { + kwarn!("SYS_RT_SIGPROCMASK has not yet been implemented"); + Ok(0) + } + + SYS_TKILL => { + kwarn!("SYS_TKILL has not yet been implemented"); + Ok(0) + } + + SYS_SIGALTSTACK => { + kwarn!("SYS_SIGALTSTACK has not yet been implemented"); + Ok(0) + } + _ => panic!("Unsupported syscall ID: {}", syscall_num), }; return r; diff --git a/kernel/src/syscall/user_access.rs b/kernel/src/syscall/user_access.rs index c500b5e44..899d8a134 100644 --- a/kernel/src/syscall/user_access.rs +++ b/kernel/src/syscall/user_access.rs @@ -172,6 +172,10 @@ impl<'a> UserBufferReader<'a> { }); } + pub fn size(&self) -> usize { + return self.buffer.len(); + } + /// 从用户空间读取数据(到变量中) /// /// @param offset 字节偏移量 @@ -266,6 +270,10 @@ impl<'a> UserBufferWriter<'a> { }); } + pub fn size(&self) -> usize { + return self.buffer.len(); + } + /// 从指定地址写入数据到用户空间 /// /// @param data 要写入的数据地址 @@ -274,7 +282,7 @@ impl<'a> UserBufferWriter<'a> { /// pub fn copy_to_user( &'a mut self, - src: &'a [T], + src: &[T], offset: usize, ) -> Result { let dst = Self::convert_with_offset(self.buffer, offset)?; @@ -290,7 +298,7 @@ impl<'a> UserBufferWriter<'a> { /// pub fn copy_one_to_user( &'a mut self, - src: &'a T, + src: &T, offset: usize, ) -> Result<(), SystemError> { let dst = Self::convert_one_with_offset::(self.buffer, offset)?; diff --git a/kernel/src/time/timer.rs b/kernel/src/time/timer.rs index 57c4086de..dc1f43d2c 100644 --- a/kernel/src/time/timer.rs +++ b/kernel/src/time/timer.rs @@ -72,6 +72,7 @@ impl Timer { expire_jiffies, timer_func, self_ref: Weak::default(), + triggered: false, }))); result.0.lock().self_ref = Arc::downgrade(&result); @@ -112,7 +113,9 @@ impl Timer { #[inline] fn run(&self) { - let r = self.0.lock().timer_func.run(); + let mut timer = self.0.lock(); + timer.triggered = true; + let r = timer.timer_func.run(); if unlikely(r.is_err()) { kerror!( "Failed to run timer function: {self:?} {:?}", @@ -120,6 +123,19 @@ impl Timer { ); } } + + /// ## 判断定时器是否已经触发 + pub fn timeout(&self) -> bool { + self.0.lock().triggered + } + + /// ## 取消定时器任务 + pub fn cancel(&self) -> bool { + TIMER_LIST + .lock() + .drain_filter(|x| Arc::::as_ptr(&x) == self as *const Timer); + true + } } /// 定时器类型 @@ -131,6 +147,8 @@ pub struct InnerTimer { pub timer_func: Box, /// self_ref self_ref: Weak, + /// 判断该计时器是否触发 + triggered: bool, } #[derive(Debug)]