From c48de3e45e6b2308e24a54f83cd464051addd69e Mon Sep 17 00:00:00 2001 From: Haitao Shan Date: Wed, 24 Jan 2018 14:58:39 -0800 Subject: [PATCH 1/2] IOCTL_GPA_PROT implementation v1. --- core/ept2.c | 6 +- core/gpa_space.c | 126 ++++++++++++++++++++++++++++++ core/hax.c | 1 + core/include/ept2.h | 3 +- core/include/hax_core_interface.h | 1 + core/include/memory.h | 24 ++++++ core/include/vm.h | 3 +- core/memory.c | 9 +++ core/vcpu.c | 9 ++- include/hax_interface.h | 14 ++++ include/windows/hax_windows.h | 12 +-- windows/hax_entry.c | 15 ++++ windows/hax_entry.h | 4 + 13 files changed, 217 insertions(+), 10 deletions(-) diff --git a/core/ept2.c b/core/ept2.c index 8bf00a1f..5d60f150 100644 --- a/core/ept2.c +++ b/core/ept2.c @@ -70,7 +70,8 @@ void ept_handle_mapping_changed(hax_gpa_space_listener *listener, } int ept_handle_access_violation(hax_gpa_space *gpa_space, hax_ept_tree *tree, - exit_qualification_t qual, uint64 gpa) + exit_qualification_t qual, uint64 gpa, + uint64 *fault_gfn) { uint combined_perm; uint64 gfn; @@ -102,6 +103,9 @@ int ept_handle_access_violation(hax_gpa_space *gpa_space, hax_ept_tree *tree, return 0; } + if (gpa_space_chunk_protected(gpa_space, gfn, fault_gfn)) + return -EPERM; + // The faulting GPA maps to RAM/ROM is_rom = slot->flags & HAX_MEMSLOT_READONLY; offset_within_slot = gpa - (slot->base_gfn << PG_ORDER_4K); diff --git a/core/gpa_space.c b/core/gpa_space.c index 96e98b11..49abade1 100644 --- a/core/gpa_space.c +++ b/core/gpa_space.c @@ -33,6 +33,7 @@ #include "../include/hax.h" #include "include/paging.h" #include "../include/hax_host_mem.h" +#include "ept2.h" int gpa_space_init(hax_gpa_space *gpa_space) { @@ -59,6 +60,13 @@ int gpa_space_init(hax_gpa_space *gpa_space) return ret; } +static uint64 gpa_space_prot_bitmap_size(uint64 npages) +{ + uint64 bitmap_size = (npages + 7)/8; + bitmap_size += 8; + return bitmap_size; +} + void gpa_space_free(hax_gpa_space *gpa_space) { hax_gpa_space_listener *listener, *tmp; @@ -75,6 +83,9 @@ void gpa_space_free(hax_gpa_space *gpa_space) hax_gpa_space_listener, entry) { hax_list_del(&listener->entry); } + if (gpa_space->prot_bitmap.bitmap) + hax_vfree(gpa_space->prot_bitmap.bitmap, + gpa_space_prot_bitmap_size(gpa_space->prot_bitmap.max_gpfn)); } void gpa_space_add_listener(hax_gpa_space *gpa_space, @@ -346,3 +357,118 @@ uint64 gpa_space_get_pfn(hax_gpa_space *gpa_space, uint64 gfn, uint8 *flags) return pfn; } + +int gpa_space_adjust_prot_bitmap(hax_gpa_space *gpa_space, uint64 max_gpfn) +{ + prot_bitmap *pb = &gpa_space->prot_bitmap; + uint8 *bmold = pb->bitmap, *bmnew = NULL; + + /* Bitmap size only grows until it is destroyed */ + if (max_gpfn <= pb->max_gpfn) + return 0; + + bmnew = hax_vmalloc(gpa_space_prot_bitmap_size(max_gpfn), HAX_MEM_NONPAGE); + if (!bmnew) { + hax_error("%s: Not enought memory for new protection bitmap\n", + __func__); + return -ENOMEM; + } + pb->bitmap = bmnew; + if (bmold) { + memcpy(bmnew, bmold, gpa_space_prot_bitmap_size(pb->max_gpfn)); + hax_vfree(bmold, gpa_space_prot_bitmap_size(pb->max_gpfn)); + } + pb->max_gpfn = max_gpfn; + return 0; +} + +static void gpa_space_set_prot_bitmap(uint64 start, uint64 nbits, + uint8 *bitmap, bool set) +{ + uint64 i = 0; + uint64 start_index = start / 8; + uint64 start_bit = start % 8; + uint64 end_index = (start + nbits) / 8; + uint64 end_bit = (start + nbits) % 8; + + if (start_index == end_index) { + for (i = start; i < start + nbits; i++) + if (set) + hax_test_and_set_bit(i, (uint64 *)bitmap); + else + hax_test_and_clear_bit(i, (uint64 *)bitmap); + return; + } + + for (i = start; i < (start_index + 1) * 8; i++) + if (set) + hax_test_and_set_bit(i, (uint64 *)bitmap); + else + hax_test_and_clear_bit(i, (uint64 *)bitmap); + + for (i = end_index * 8; i < start + nbits; i++) + if (set) + hax_test_and_set_bit(i, (uint64 *)bitmap); + else + hax_test_and_clear_bit(i, (uint64 *)bitmap); + + for (i = start_index + 1; i < end_index; i++) + if (set) + bitmap[i] = 0xFF; + else + bitmap[i] = 0; +} + +int gpa_space_test_prot_bitmap(struct hax_gpa_space *gpa_space, uint64 gfn) +{ + struct prot_bitmap *pbm = &gpa_space->prot_bitmap; + + if (!pbm) + return 0; + + if (gfn >= pbm->max_gpfn) + return 0; + + return hax_test_bit(gfn, (uint64 *)pbm->bitmap); +} + +int gpa_space_chunk_protected(struct hax_gpa_space *gpa_space, uint64 gfn, + uint64 *fault_gfn) +{ + uint64 __gfn = gfn / HAX_CHUNK_NR_PAGES * HAX_CHUNK_NR_PAGES; + for (gfn = __gfn; gfn < __gfn + HAX_CHUNK_NR_PAGES; gfn++) + if (gpa_space_test_prot_bitmap(gpa_space, gfn)) { + *fault_gfn = gfn; + return 1; + } + + return 0; +} + +int gpa_space_protect_range(struct hax_gpa_space *gpa_space, + struct hax_ept_tree *ept_tree, + uint64 start_gpa, uint64 len, int8 flags) +{ + uint64 gfn; + uint npages; + hax_memslot *slot; + + if (len == 0) { + hax_error("%s: len = 0\n", __func__); + return -EINVAL; + } + + /* Did not support specific prot on r/w/e now */ + if (flags != 0 && (flags & HAX_GPA_PROT_MASK) != HAX_GPA_PROT_ALL) + return -EINVAL; + + gfn = start_gpa >> PG_ORDER_4K; + npages = (len + PAGE_SIZE_4K - 1) >> PG_ORDER_4K; + + gpa_space_set_prot_bitmap(gfn, npages, gpa_space->prot_bitmap.bitmap, !flags); + + if (!flags) + ept_tree_invalidate_entries(ept_tree, gfn, npages); + + return 0; +} diff --git a/core/hax.c b/core/hax.c index 244538d9..00bca3ce 100644 --- a/core/hax.c +++ b/core/hax.c @@ -347,6 +347,7 @@ int hax_get_capability(void *buf, int bufLeng, int *outLength) cap->winfo |= HAX_CAP_64BIT_SETRAM; #endif cap->winfo |= HAX_CAP_TUNNEL_PAGE; + cap->winfo |= HAX_CAP_GPA_PROTECTION; if (cpu_data->vmx_info._ept_cap) { cap->winfo |= HAX_CAP_EPT; } diff --git a/core/include/ept2.h b/core/include/ept2.h index 4d248ff3..42788e91 100644 --- a/core/include/ept2.h +++ b/core/include/ept2.h @@ -249,7 +249,8 @@ void ept_handle_mapping_changed(hax_gpa_space_listener *listener, // present, but the access violates the permissions it allows. // -ENOMEM: Memory allocation/mapping error. int ept_handle_access_violation(hax_gpa_space *gpa_space, hax_ept_tree *tree, - exit_qualification_t qual, uint64 gpa); + exit_qualification_t qual, uint64 gpa, + uint64 *fault_gfn); // Handles an EPT misconfiguration caught by hardware while it tries to // translate a GPA. diff --git a/core/include/hax_core_interface.h b/core/include/hax_core_interface.h index 9c9ed82a..21a77a87 100644 --- a/core/include/hax_core_interface.h +++ b/core/include/hax_core_interface.h @@ -59,6 +59,7 @@ int hax_vm_set_ram2(struct vm_t *vm, struct hax_set_ram_info2 *info); int hax_vm_free_all_ram(struct vm_t *vm); int in_pmem_range(struct hax_vcpu_mem *pmem, uint64_t va); int hax_vm_add_ramblock(struct vm_t *vm, uint64_t start_uva, uint64_t size); +int hax_vm_gpa_prot(struct vm_t *vm, struct hax_gpa_prot_info *info); void * get_vm_host(struct vm_t *vm); int set_vm_host(struct vm_t *vm, void *vm_host); diff --git a/core/include/memory.h b/core/include/memory.h index 5e350f10..958205f3 100644 --- a/core/include/memory.h +++ b/core/include/memory.h @@ -36,6 +36,7 @@ #define HAX_CHUNK_SHIFT 21 #define HAX_CHUNK_SIZE (1U << HAX_CHUNK_SHIFT) // 2MB +#define HAX_CHUNK_NR_PAGES (HAX_CHUNK_SIZE/PAGE_SIZE_4K) typedef struct hax_chunk { hax_memdesc_user memdesc; @@ -80,12 +81,20 @@ typedef struct hax_memslot { // Used only by memslot_set_mapping(), not by any hax_memslot #define HAX_MEMSLOT_INVALID 0x80 +typedef struct prot_bitmap { + // R/W/E Protection Bitmap + uint8 *bitmap; + // Last gpfn + uint64 max_gpfn; +} prot_bitmap; + typedef struct hax_gpa_space { // TODO: Add a lock to prevent concurrent accesses to |ramblock_list| and // |memslot_list| hax_list_head ramblock_list; hax_list_head memslot_list; hax_list_head listener_list; + prot_bitmap prot_bitmap; } hax_gpa_space; typedef struct hax_gpa_space_listener hax_gpa_space_listener; @@ -298,6 +307,21 @@ void gpa_space_unmap_page(hax_gpa_space *gpa_space, hax_kmap_user *kmap); // MMIO. uint64 gpa_space_get_pfn(hax_gpa_space *gpa_space, uint64 gfn, uint8 *flags); +int gpa_space_protect_range(struct hax_gpa_space *gpa_space, + struct hax_ept_tree *ept_tree, + uint64 start_gpa, uint64 len, int8 flags); + +// Adjust gpa protection bitmap size. Once a bigger gfn is met, allocate +// a new bitmap and copy the old bitmap contents. +// |gpa_space|: The GPA space of the guest. +// |max_gpfn|: max gfn that the bitmap can hold. +int gpa_space_adjust_prot_bitmap(struct hax_gpa_space *gpa_space, + uint64 max_gpfn); + +int gpa_space_test_prot_bitmap(struct hax_gpa_space *gpa_space, uint64 gfn); +int gpa_space_chunk_protected(struct hax_gpa_space *gpa_space, uint64 gfn, + uint64 *fault_gfn); + // Allocates a |hax_chunk| for the given UVA range, and pins the corresponding // host page frames in RAM. // |base_uva|: The start of the UVA range. Should be page-aligned. diff --git a/core/include/vm.h b/core/include/vm.h index 1aaad461..64d21bdd 100644 --- a/core/include/vm.h +++ b/core/include/vm.h @@ -109,7 +109,8 @@ enum exit_status { HAX_EXIT_HLT, HAX_EXIT_STATECHANGE, HAX_EXIT_PAUSED, - HAX_EXIT_FAST_MMIO + HAX_EXIT_FAST_MMIO, + HAX_EXIT_GPAPROT }; enum run_flag { diff --git a/core/memory.c b/core/memory.c index a5550d61..c724feef 100644 --- a/core/memory.c +++ b/core/memory.c @@ -261,6 +261,7 @@ static int handle_set_ram(struct vm_t *vm, uint64 start_gpa, uint64 size, gpa_space = &vm->gpa_space; start_gfn = start_gpa >> PG_ORDER_4K; npages = size >> PG_ORDER_4K; + gpa_space_adjust_prot_bitmap(gpa_space, start_gfn + npages); ret = memslot_set_mapping(gpa_space, start_gfn, npages, start_uva, flags); if (ret) { hax_error("%s: memslot_set_mapping() failed: ret=%d, start_gfn=0x%llx," @@ -376,6 +377,14 @@ int hax_vm_set_ram2(struct vm_t *vm, struct hax_set_ram_info2 *info) } #endif // CONFIG_HAX_EPT2 +int hax_vm_gpa_prot(struct vm_t *vm, struct hax_gpa_prot_info *info) +{ + uint8_t flags = info->flags; + + return gpa_space_protect_range(&vm->gpa_space, &vm->ept_tree, + info->pa_start, info->size, info->flags); +} + int hax_vcpu_setup_hax_tunnel(struct vcpu_t *cv, struct hax_tunnel_info *info) { int ret = -ENOMEM; diff --git a/core/vcpu.c b/core/vcpu.c index 03945591..e902a167 100644 --- a/core/vcpu.c +++ b/core/vcpu.c @@ -3958,6 +3958,7 @@ static int exit_ept_violation(struct vcpu_t *vcpu, struct hax_tunnel *htun) paddr_t gpa; struct decode dec; int ret = 0; + uint64 fault_gfn; htun->_exit_reason = vmx(vcpu, exit_reason).basic_reason; @@ -3972,7 +3973,13 @@ static int exit_ept_violation(struct vcpu_t *vcpu, struct hax_tunnel *htun) #ifdef CONFIG_HAX_EPT2 ret = ept_handle_access_violation(&vcpu->vm->gpa_space, &vcpu->vm->ept_tree, - *qual, gpa); + *qual, gpa, &fault_gfn); + if (ret == -EPERM) { + htun->gpaprot.access = (qual->raw >> 3) & 7; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->_exit_status = HAX_EXIT_GPAPROT; + return HAX_EXIT; + } if (ret == -EACCES) { /* * For some reason, during boot-up, Chrome OS guests make hundreds of diff --git a/include/hax_interface.h b/include/hax_interface.h index a4d5b942..4451f650 100644 --- a/include/hax_interface.h +++ b/include/hax_interface.h @@ -128,6 +128,11 @@ struct hax_tunnel { struct { paddr_t gla; } mmio; + struct { + paddr_t gpa; + uint8_t access; + uint8_t pad[7]; + } gpaprot; struct { paddr_t dummy; } state; @@ -169,6 +174,7 @@ struct hax_module_version { #define HAX_CAP_64BIT_RAMBLOCK (1 << 3) #define HAX_CAP_64BIT_SETRAM (1 << 4) #define HAX_CAP_TUNNEL_PAGE (1 << 5) +#define HAX_CAP_GPA_PROTECTION (1 << 6) struct hax_capabilityinfo { /* @@ -236,6 +242,14 @@ struct hax_set_ram_info2 { uint64_t reserved2; } PACKED; +#define HAX_GPA_PROT_MASK 0x7 // one bit each for r/w/e +#define HAX_GPA_PROT_ALL 0x7 // disable r/w/e all +struct hax_gpa_prot_info { + uint64_t pa_start; + uint64_t size; + uint64_t flags; +} PACKED; + /* This interface is support only after API version 2 */ struct hax_qemu_version { /* Current API version in QEMU*/ diff --git a/include/windows/hax_windows.h b/include/windows/hax_windows.h index 15e57bc0..d7f925e3 100644 --- a/include/windows/hax_windows.h +++ b/include/windows/hax_windows.h @@ -116,10 +116,10 @@ static inline void hax_mutex_free(hax_mutex lock) } /* Return true if the bit is set already */ -static int hax_test_and_set_bit(int bit, uint64_t *memory) +static int hax_test_and_set_bit(uint64 bit, uint64_t *memory) { long *base = (long *)memory; - long nr_long; + uint64 nr_long; long bitoffset_in_long; long bits_per_long = sizeof(long) * 8; @@ -139,10 +139,10 @@ static int hax_test_and_set_bit(int bit, uint64_t *memory) * Return true if the bit is cleared already * Notice that InterlockedBitTestAndReset return original value in that bit */ -static int hax_test_and_clear_bit(int bit, uint64_t *memory) +static int hax_test_and_clear_bit(uint64 bit, uint64_t *memory) { long * base = (long *)memory; - long nr_long; + uint64 nr_long; long bitoffset_in_long; long bits_per_long = sizeof(long) * 8; @@ -159,9 +159,9 @@ static int hax_test_and_clear_bit(int bit, uint64_t *memory) } /* Don't care for the big endian situation */ -static bool hax_test_bit(int bit, uint64_t *memory) +static bool hax_test_bit(uint64 bit, uint64_t *memory) { - int byte = bit / 8; + uint64 byte = bit / 8; unsigned char *p; int offset = bit % 8; diff --git a/windows/hax_entry.c b/windows/hax_entry.c index 572a39b1..5fa56bd4 100644 --- a/windows/hax_entry.c +++ b/windows/hax_entry.c @@ -581,6 +581,21 @@ NTSTATUS HaxVmControl(PDEVICE_OBJECT DeviceObject, struct hax_vm_windows *ext, hax_vm_set_qemuversion(cvm, info); break; } + case HAX_VM_IOCTL_GPA_PROT: { + struct hax_gpa_prot_info *info; + int res; + if (inBufLength < sizeof(struct hax_gpa_prot_info)) { + ret = STATUS_INVALID_PARAMETER; + goto done; + } + info = (struct hax_gpa_prot_info *)inBuf; + res = hax_vm_gpa_prot(cvm, info); + if (res) { + ret = res == -EINVAL ? STATUS_INVALID_PARAMETER + : STATUS_UNSUCCESSFUL; + } + break; + } default: ret = STATUS_INVALID_PARAMETER; break; diff --git a/windows/hax_entry.h b/windows/hax_entry.h index 5be9eadd..a9934ad1 100644 --- a/windows/hax_entry.h +++ b/windows/hax_entry.h @@ -161,4 +161,8 @@ extern PDRIVER_OBJECT HaxDriverObject; #define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION \ CTL_CODE(HAX_DEVICE_TYPE, 0x910, METHOD_BUFFERED, FILE_ANY_ACCESS) +/* API version 3.0 */ +#define HAX_VM_IOCTL_GPA_PROT \ + CTL_CODE(HAX_DEVICE_TYPE, 0x915, METHOD_BUFFERED, FILE_ANY_ACCESS) + #endif // HAX_WINDOWS_HAX_ENTRY_H_ From c835f4fbcbbfda790229a5bebbe8f5a0de8d43c4 Mon Sep 17 00:00:00 2001 From: Haitao Shan Date: Wed, 21 Mar 2018 15:31:53 -0700 Subject: [PATCH 2/2] Protection of HAXM's accessing guest GPA. --- core/gpa_space.c | 51 +++++++---- core/include/memory.h | 13 ++- core/include/page_walker.h | 2 +- core/include/vtlb.h | 18 ++-- core/page_walker.c | 47 +++++++--- core/ramblock.c | 1 + core/vcpu.c | 173 +++++++++++++++++++++++++++++-------- core/vtlb.c | 133 ++++++++++++++++++---------- include/hax.h | 4 +- 9 files changed, 318 insertions(+), 124 deletions(-) diff --git a/core/gpa_space.c b/core/gpa_space.c index 49abade1..b8c521d4 100644 --- a/core/gpa_space.c +++ b/core/gpa_space.c @@ -120,9 +120,9 @@ void gpa_space_remove_listener(hax_gpa_space *gpa_space, // hax_unmap_user_pages(). static int gpa_space_map_range(hax_gpa_space *gpa_space, uint64 start_gpa, int len, uint8 **buf, hax_kmap_user *kmap, - bool *writable) + bool *writable, uint64 *fault_gfn) { - uint64 gfn; + uint64 gfn, i; uint delta, size, npages; hax_memslot *slot; hax_ramblock *block; @@ -144,6 +144,14 @@ static int gpa_space_map_range(hax_gpa_space *gpa_space, uint64 start_gpa, delta = (uint) (start_gpa - (gfn << PG_ORDER_4K)); size = (uint) len + delta; npages = (size + PAGE_SIZE_4K - 1) >> PG_ORDER_4K; + + // Check gpa protection bitmap + for (i = gfn; i < gfn + npages;) + if (gpa_space_chunk_protected(gpa_space, i, fault_gfn)) + return -EPERM; + else + i = (i/HAX_CHUNK_NR_PAGES + 1)*HAX_CHUNK_NR_PAGES; + slot = memslot_find(gpa_space, gfn); if (!slot) { hax_error("%s: start_gpa=0x%llx is reserved for MMIO\n", __func__, @@ -194,7 +202,7 @@ static int gpa_space_map_range(hax_gpa_space *gpa_space, uint64 start_gpa, } int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, - uint8 *data) + uint8 *data, uint64 *fault_gfn) { uint8 *buf; hax_kmap_user kmap; @@ -205,10 +213,12 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, return -EINVAL; } - ret = gpa_space_map_range(gpa_space, start_gpa, len, &buf, &kmap, NULL); + ret = gpa_space_map_range(gpa_space, start_gpa, len, + &buf, &kmap, NULL, fault_gfn); if (ret < 0) { - hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx," - " len=%d\n", __func__, start_gpa, len); + if (ret != -EPERM) + hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx," + " len=%d\n", __func__, start_gpa, len); return ret; } @@ -232,7 +242,7 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, } int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, - uint8 *data) + uint8 *data, uint64 *fault_gfn) { uint8 *buf; hax_kmap_user kmap; @@ -245,10 +255,11 @@ int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, } ret = gpa_space_map_range(gpa_space, start_gpa, len, &buf, &kmap, - &writable); + &writable, fault_gfn); if (ret < 0) { - hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx," - " len=%d\n", __func__, start_gpa, len); + if (ret != -EPERM) + hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx," + " len=%d\n", __func__, start_gpa, len); return ret; } if (!writable) { @@ -276,24 +287,26 @@ int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, return nbytes; } -void * gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn, - hax_kmap_user *kmap, bool *writable) +int gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn, + hax_kmap_user *kmap, bool *writable, + void **kva, uint64 *fault_gfn) { uint8 *buf; int ret; - void *kva; assert(gpa_space != NULL); assert(kmap != NULL); ret = gpa_space_map_range(gpa_space, gfn << PG_ORDER_4K, PAGE_SIZE_4K, &buf, - kmap, writable); + kmap, writable, fault_gfn); if (ret < PAGE_SIZE_4K) { - hax_error("%s: gpa_space_map_range() returned %d\n", __func__, ret); - return NULL; + if (ret != -EPERM) + hax_error("%s: gpa_space_map_range() returned %d\n", __func__, ret); + *kva = NULL; + return ret; } - kva = (void *) buf; - assert(kva != NULL); - return kva; + *kva = (void *) buf; + assert(*kva != NULL); + return 0; } void gpa_space_unmap_page(hax_gpa_space *gpa_space, hax_kmap_user *kmap) diff --git a/core/include/memory.h b/core/include/memory.h index 958205f3..1ede31a1 100644 --- a/core/include/memory.h +++ b/core/include/memory.h @@ -254,13 +254,15 @@ void gpa_space_remove_listener(hax_gpa_space *gpa_space, // |len|: The number of bytes to copy. // |data|: The destination buffer to copy the bytes into, whose size must be at // least |len| bytes. +// |fault_gpn|: The faulting gpn as a result of gpa range protection. // Returns the number of bytes actually copied, or one of the following error // codes: // -EINVAL: Invalid input, e.g. |data| is NULL, or the GPA range specified by // |start_gpa| and |len| touches an MMIO region. // -ENOMEM: Unable to map the requested guest page frames into KVA space. +// -EPARM: Fault occurred due to violation of gpa range protection. int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, - uint8 *data); + uint8 *data, uint64 *fault_gfn); // Copies the given number of bytes from the given buffer to guest RAM. // |gpa_space|: The |hax_gpa_space| of the guest. @@ -270,6 +272,7 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, // |len|: The number of bytes to copy. // |data|: The source buffer to copy the bytes from, whose size must be at least // |len| bytes. +// |fault_gpn|: The faulting gpn as a result of gpa range protection. // Returns the number of bytes actually copied, or one of the following error // codes: // -EINVAL: Invalid input, e.g. |data| is NULL, or the GPA range specified by @@ -277,8 +280,9 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, // -ENOMEM: Unable to map the requested guest page frames into KVA space. // -EACCES: The GPA range specified by |start_gpa| and |len| touches a ROM // region. +// -EPARM: Fault occurred due to violation of gpa range protection. int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, - uint8 *data); + uint8 *data, uint64 *fault_gfn); // Maps the given guest page frame into KVA space, stores the KVA mapping in the // given buffer, and returns the KVA. The caller must destroy the KVA mapping @@ -291,8 +295,9 @@ int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len, // page frame is writable (i.e. maps to RAM). Can be NULL if the // caller only wants to read from the page. // Returns NULL on error. -void * gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn, - hax_kmap_user *kmap, bool *writable); +int gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn, + hax_kmap_user *kmap, bool *writable, + void **kva, uint64 *fault_gfn); // Destroys the KVA mapping previously created by gpa_space_map_page(). void gpa_space_unmap_page(hax_gpa_space *gpa_space, hax_kmap_user *kmap); diff --git a/core/include/page_walker.h b/core/include/page_walker.h index 4dd48762..8903114d 100644 --- a/core/include/page_walker.h +++ b/core/include/page_walker.h @@ -65,6 +65,6 @@ typedef uint64 ADDRESS; uint32 pw_perform_page_walk(IN struct vcpu_t *vcpu, IN uint64 virt_addr, IN uint32 access, OUT uint64 *gpa_out, OUT uint *order, IN bool set_ad_bits, - IN bool is_fetch); + IN bool is_fetch, OUT uint64 *fault_gfn); #endif // HAX_CORE_PAGE_WALKER_H_ diff --git a/core/include/vtlb.h b/core/include/vtlb.h index 211f570d..da1d218b 100644 --- a/core/include/vtlb.h +++ b/core/include/vtlb.h @@ -43,7 +43,8 @@ enum { TF_WRITE = 0x00000002, // Fault due to write TF_USER = 0x00000004, // Fault due to user mode TF_RSVD = 0x00000008, // Fault due to reserved bit violation - TF_EXEC = 0x00000010 // Fault due to exec protection + TF_EXEC = 0x00000010, // Fault due to exec protection + TF_GPA_PROT= 0x00000020 // Fault due to gpa space protection }; #define EXECUTION_DISABLE_MASK 0x8000000000000000ULL @@ -100,16 +101,17 @@ void vcpu_invalidate_tlb_addr(struct vcpu_t *vcpu, vaddr_t va); uint vcpu_vtlb_alloc(struct vcpu_t *vcpu); void vcpu_vtlb_free(struct vcpu_t *vcpu); -bool handle_vtlb(struct vcpu_t *vcpu); +int handle_vtlb(struct vcpu_t *vcpu, uint64 *fault_gfn); uint vcpu_translate(struct vcpu_t *vcpu, vaddr_t va, uint access, paddr_t *pa, - uint64 *len, bool update); + uint64 *len, bool update, uint64 *fault_gfn); -uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, - uint32 dst_buflen, uint32 size, uint flag); -uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, +int vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, + uint32 dst_buflen, uint32 size, uint flag, + uint32 *cnt_read, uint64 *fault_gfn); +int vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, uint32 dst_buflen, const void *src, uint32 size, - uint flag); + uint flag, uint32 *cnt_write, uint64 *fault_gfn); #ifdef CONFIG_HAX_EPT2 /* @@ -127,7 +129,7 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, * -ENOMEM: Memory allocation/mapping error. */ int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, - int len); + int len, uint64 *fault_gfn); #endif // CONFIG_HAX_EPT2 void hax_inject_page_fault(struct vcpu_t *vcpu, mword error_code); diff --git a/core/page_walker.c b/core/page_walker.c index 661abd63..91136776 100644 --- a/core/page_walker.c +++ b/core/page_walker.c @@ -561,8 +561,9 @@ static void pw_update_ad_bits( uint32 pw_perform_page_walk( IN struct vcpu_t *vcpu, IN uint64 virt_addr, IN uint32 access, OUT uint64 *gpa_out, OUT uint *order, IN bool set_ad_bits, - IN bool is_fetch) + IN bool is_fetch, OUT uint64 *fault_gfn) { + int ret; uint32 retval = TF_OK; uint64 efer_value = vcpu->state->_efer; bool is_nxe = ((efer_value & IA32_EFER_XD) != 0); @@ -617,9 +618,16 @@ uint32 pw_perform_page_walk( if (is_lme) { pml4t_gpa = first_table; #ifdef CONFIG_HAX_EPT2 - pml4t_hva = gpa_space_map_page(&vcpu->vm->gpa_space, - pml4t_gpa >> PG_ORDER_4K, - &pml4t_kmap, NULL); + ret = gpa_space_map_page(&vcpu->vm->gpa_space, + pml4t_gpa >> PG_ORDER_4K, + &pml4t_kmap, NULL, + &pml4t_hva, fault_gfn); + if (ret < 0) { + retval = TF_FAILED; + if (ret == -EPERM) + retval |= TF_GPA_PROT; + goto out; + } #else // !CONFIG_HAX_EPT2 #if (!defined(__MACH__) && !defined(_WIN64)) pml4t_hva = hax_map_gpfn(vcpu->vm, pml4t_gpa >> 12, is_kernel, cr3, @@ -653,9 +661,16 @@ uint32 pw_perform_page_walk( } #ifdef CONFIG_HAX_EPT2 - pdpt_hva = gpa_space_map_page(&vcpu->vm->gpa_space, + ret = gpa_space_map_page(&vcpu->vm->gpa_space, pdpt_gpa >> PG_ORDER_4K, - &pdpt_kmap, NULL); + &pdpt_kmap, NULL, + &pdpt_hva, fault_gfn); + if (ret < 0) { + retval = TF_FAILED; + if (ret == -EPERM) + retval |= TF_GPA_PROT; + goto out; + } #else // !CONFIG_HAX_EPT2 #if (!defined(__MACH__) && !defined(_WIN64)) pdpt_hva = hax_map_gpfn(vcpu->vm, pdpt_gpa >> 12, is_kernel, cr3, 1); @@ -729,8 +744,14 @@ uint32 pw_perform_page_walk( pd_gpa = is_pae ? pw_retrieve_phys_addr(&pdpte_val, is_pae) : first_table; #ifdef CONFIG_HAX_EPT2 - pd_hva = gpa_space_map_page(&vcpu->vm->gpa_space, pd_gpa >> PG_ORDER_4K, - &pd_kmap, NULL); + ret = gpa_space_map_page(&vcpu->vm->gpa_space, pd_gpa >> PG_ORDER_4K, + &pd_kmap, NULL, &pd_hva, fault_gfn); + if (ret < 0) { + retval = TF_FAILED; + if (ret == -EPERM) + retval |= TF_GPA_PROT; + goto out; + } #else // !CONFIG_HAX_EPT2 #if (!defined(__MACH__) && !defined(_WIN64)) pd_hva = hax_map_gpfn(vcpu->vm, pd_gpa >> 12, is_kernel, cr3, 2); @@ -807,8 +828,14 @@ uint32 pw_perform_page_walk( *order = PG_ORDER_4K; pt_gpa = pw_retrieve_phys_addr(&pde_val, is_pae); #ifdef CONFIG_HAX_EPT2 - pt_hva = gpa_space_map_page(&vcpu->vm->gpa_space, pt_gpa >> 12, &pt_kmap, - NULL); + ret = gpa_space_map_page(&vcpu->vm->gpa_space, pt_gpa >> 12, &pt_kmap, + NULL, &pt_hva, fault_gfn); + if (ret < 0) { + retval = TF_FAILED; + if (ret == -EPERM) + retval |= TF_GPA_PROT; + goto out; + } #else // !CONFIG_HAX_EPT2 #if (!defined(__MACH__) && !defined(_WIN64)) pt_hva = hax_map_gpfn(vcpu->vm, pt_gpa >> 12, is_kernel, cr3, 1); diff --git a/core/ramblock.c b/core/ramblock.c index 4fb1b4ba..d1294414 100644 --- a/core/ramblock.c +++ b/core/ramblock.c @@ -354,6 +354,7 @@ hax_chunk * ramblock_get_chunk(hax_ramblock *block, uint64 uva_offset, hax_error("%s: Failed to allocate chunk: ret=%d, index=%llu," " base_uva=0x%llx, size=0x%llx, was_clear=%d\n", __func__, ret, chunk_index, chunk_base_uva, chunk_size, was_clear); + DbgBreakPoint(); return NULL; } assert(chunk != NULL); diff --git a/core/vcpu.c b/core/vcpu.c index e902a167..9d4d73b3 100644 --- a/core/vcpu.c +++ b/core/vcpu.c @@ -1404,12 +1404,14 @@ static int write_low_bits(uint64 *pdst, uint64 src, uint8 size) return 0; } -static void handle_mmio_post(struct vcpu_t *vcpu, struct hax_fastmmio *hft) +static int handle_mmio_post(struct vcpu_t *vcpu, struct hax_fastmmio *hft, + uint64 *fault_gfn) { struct vcpu_state_t *state = vcpu->state; + int ret; if (hft->direction) - return; + return 0; if (vcpu->post_mmio.op == VCPU_POST_MMIO_WRITE_REG) { uint64 value; @@ -1438,29 +1440,45 @@ static void handle_mmio_post(struct vcpu_t *vcpu, struct hax_fastmmio *hft) hft->size); } else if (vcpu->post_mmio.op == VCPU_POST_MMIO_WRITE_MEM) { // Assume little-endian - if (!vcpu_write_guest_virtual(vcpu, vcpu->post_mmio.va, hft->size, - (uint8 *)&hft->value, hft->size, 0)) { - hax_panic_vcpu(vcpu, "Error writing %u bytes to guest RAM " + uint32 cnt_write; + if (!(ret = vcpu_write_guest_virtual(vcpu, vcpu->post_mmio.va, + hft->size, + (uint8 *)&hft->value, hft->size, + 0, &cnt_write, fault_gfn))) { + if (ret != -EPERM) + hax_panic_vcpu(vcpu, "Error writing %u bytes to guest RAM " "(va=0x%llx, value=0x%llx)\n", hft->size, vcpu->post_mmio.va, hft->value); + return ret; } } else { hax_warning("Unknown post-MMIO operation %d\n", vcpu->post_mmio.op); } + return 0; } -static void handle_io_post(struct vcpu_t *vcpu, struct hax_tunnel *htun) +static int handle_io_post(struct vcpu_t *vcpu, struct hax_tunnel *htun) { int size; struct vcpu_state_t *state = vcpu->state; if (htun->io._direction == HAX_IO_OUT) - return; + return 0; if (htun->io._flags == 1) { + int ret; + uint32 cnt_write; + uint64 fault_gfn; size = htun->io._count * htun->io._size; - if (!vcpu_write_guest_virtual(vcpu, htun->io._vaddr, IOS_MAX_BUFFER, - (void *)vcpu->io_buf, size, 0)) { + if (!(ret = vcpu_write_guest_virtual(vcpu, htun->io._vaddr, + IOS_MAX_BUFFER, (void *)vcpu->io_buf, + size, 0, &cnt_write, &fault_gfn))) { + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 1; + return ret; + } hax_panic_vcpu(vcpu, "Unexpected page fault, kill the VM!\n"); dump_vmcs(vcpu); } @@ -1483,12 +1501,14 @@ static void handle_io_post(struct vcpu_t *vcpu, struct hax_tunnel *htun) } } } + return 0; } int vcpu_execute(struct vcpu_t *vcpu) { struct hax_tunnel *htun = vcpu->tunnel; - int err = 0; + int err = 0, ret = 0; + uint64 fault_gfn; hax_mutex_lock(vcpu->tmutex); hax_debug("vcpu begin to run....\n"); @@ -1502,10 +1522,20 @@ int vcpu_execute(struct vcpu_t *vcpu) hax_debug("vcpu begin to run....in PE\n"); if (htun->_exit_status == HAX_EXIT_IO) { - handle_io_post(vcpu, htun); + ret = handle_io_post(vcpu, htun); + if (ret == -EPERM) + goto out; } if (htun->_exit_status == HAX_EXIT_FAST_MMIO) { - handle_mmio_post(vcpu, (struct hax_fastmmio *)vcpu->io_buf); + ret = handle_mmio_post(vcpu, (struct hax_fastmmio *)vcpu->io_buf, + &fault_gfn); + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 1; + goto out; + } + } err = cpu_vmx_execute(vcpu, htun); vcpu_is_panic(vcpu); @@ -1690,9 +1720,9 @@ void vcpu_vmwrite_all(struct vcpu_t *vcpu, int force_tlb_flush) // a) The guest is running in EPT mode (see IASDM Vol. 3C 26.3.2.4), and // b) Preemption is enabled for the current CPU. // Returns 0 on success, < 0 on error. -static int vcpu_prepare_pae_pdpt(struct vcpu_t *vcpu) +static int vcpu_prepare_pae_pdpt(struct vcpu_t *vcpu, struct hax_tunnel *htun) { - uint64 cr3 = vcpu->state->_cr3; + uint64 cr3 = vcpu->state->_cr3, fault_gfn; int pdpt_size = (int)sizeof(vcpu->pae_pdptes); #ifdef CONFIG_HAX_EPT2 // CR3 is the GPA of the page-directory-pointer table. According to IASDM @@ -1706,7 +1736,13 @@ static int vcpu_prepare_pae_pdpt(struct vcpu_t *vcpu) // simply disabling IRQs). Therefore, it is not safe to call this function // with preemption disabled. ret = gpa_space_read_data(&vcpu->vm->gpa_space, gpa, pdpt_size, - (uint8 *)vcpu->pae_pdptes); + (uint8 *)vcpu->pae_pdptes, &fault_gfn); + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 1; + return ret; + } // The PAE PDPT cannot span two page frames if (ret != pdpt_size) { hax_error("%s: Failed to read PAE PDPT: cr3=0x%llx, ret=%d\n", __func__, @@ -2013,7 +2049,8 @@ static bool is_mmio_address(struct vcpu_t *vcpu, paddr_t gpa) } // Returns 0 on success, < 0 on error, > 0 if HAX_EXIT_MMIO is necessary. -static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc) +static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc, + uint64 *fault_gfn) { uint64 cs_base = vcpu->state->_cs.base; uint64 rip = vcpu->state->_rip; @@ -2037,6 +2074,7 @@ static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc) int use_16bit_operands; uint8 operand_size; bool has_esc = false; // Whether opcode begins with 0f (escape opcode byte) + int ret = 0; if (!qemu_support_fastmmio(vcpu)) { hax_warning("vcpu_simple_decode: QEMU does not support fast MMIO!\n"); @@ -2050,7 +2088,10 @@ static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc) // limit and privilege checks va = is_64bit_mode ? rip : cs_base + rip; #ifdef CONFIG_HAX_EPT2 - if (mmio_fetch_instruction(vcpu, va, instr, INSTR_MAX_LEN)) { + ret = mmio_fetch_instruction(vcpu, va, instr, INSTR_MAX_LEN, fault_gfn); + if (ret) { + if (ret == -EPERM) + return ret; hax_panic_vcpu(vcpu, "%s: mmio_fetch_instruction() failed: vcpu_id=%u," " gva=0x%llx (CS:IP=0x%llx:0x%llx), mmio_gpa=0x%llx\n", __func__, vcpu->vcpu_id, va, cs_base, rip, dc->gpa); @@ -2330,8 +2371,16 @@ static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc) } src_pa = dst_pa = 0xffffffffffffffffULL; // TODO: Can vcpu_translate() fail? - vcpu_translate(vcpu, src_va, 0, &src_pa, NULL, true); - vcpu_translate(vcpu, dst_va, 0, &dst_pa, NULL, true); + ret = vcpu_translate(vcpu, src_va, 0, &src_pa, NULL, true, + fault_gfn); + if (ret == TF_FAILED | TF_GPA_PROT) { + return -EPERM; + } + ret = vcpu_translate(vcpu, dst_va, 0, &dst_pa, NULL, true, + fault_gfn); + if (ret == TF_FAILED | TF_GPA_PROT) { + return -EPERM; + } is_src_mmio = src_pa == dc->gpa || is_mmio_address(vcpu, src_pa); is_dst_mmio = dst_pa == dc->gpa || is_mmio_address(vcpu, dst_pa); if (is_src_mmio && is_dst_mmio) { @@ -2480,8 +2529,13 @@ static int hax_setup_fastmmio(struct vcpu_t *vcpu, struct hax_tunnel *htun, break; } case OPCODE_MOVS_MEM_TO_IOMEM: { + uint32 cnt_read; + uint64 fault_gfn; // Source operand (saved in dec->va) is a non-I/O GVA - if (!vcpu_read_guest_virtual(vcpu, dec->va, buf, 8, dec->size, 0)) { + if (!vcpu_read_guest_virtual(vcpu, dec->va, buf, 8, dec->size, 0, + &cnt_read, &fault_gfn)) { + // hax_simple_decode should have detect protection fault + // and we do not do it twice here. hax_panic_vcpu(vcpu, "Error reading %u bytes from guest RAM" " (va=0x%llx, DS:RSI=0x%llx:0x%llx)\n", dec->size, dec->va, vcpu->state->_ds.base, @@ -2618,7 +2672,7 @@ static int exit_exc_nmi(struct vcpu_t *vcpu, struct hax_tunnel *htun) { struct vcpu_state_t *state = vcpu->state; interruption_info_t exit_intr_info; - uint64 cr0; + uint64 cr0, fault_gfn; exit_intr_info.raw = vmx(vcpu, exit_intr_info).raw; htun->_exit_reason = vmx(vcpu, exit_reason).basic_reason; @@ -2631,22 +2685,41 @@ static int exit_exc_nmi(struct vcpu_t *vcpu, struct hax_tunnel *htun) } case EXC_PAGEFAULT: { if (vtlb_active(vcpu)) { - if (handle_vtlb(vcpu)) + int ret; + if ((ret = handle_vtlb(vcpu, &fault_gfn)) > 0) return HAX_RESUME; - + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 1; + return HAX_EXIT; + } paddr_t pa; struct decode dec; - int ret; vaddr_t cr2 = vmx(vcpu, exit_qualification).address; + uint64 fault_gfn; - ret = vcpu_simple_decode(vcpu, &dec); + ret = vcpu_simple_decode(vcpu, &dec, &fault_gfn); if (ret < 0) { + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 0x1; + return HAX_EXIT; + } // vcpu_simple_decode() has called hax_panic_vcpu() return HAX_RESUME; } else if (ret > 0) { handle_mem_fault(vcpu, htun); } else { - vcpu_translate(vcpu, cr2, 0, &pa, (uint64_t *)NULL, 0); + ret = vcpu_translate(vcpu, cr2, 0, &pa, (uint64_t *)NULL, + 0, &fault_gfn); + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 0x1; + return HAX_EXIT; + } dec.gpa = pa & 0xffffffff; if (hax_setup_fastmmio(vcpu, htun, &dec)) { // hax_setup_fastmmio() has called hax_panic_vcpu() @@ -3168,13 +3241,16 @@ static int exit_cr_access(struct vcpu_t *vcpu, struct hax_tunnel *htun) // Vol. 3A 4.1.2, Figure 4-1) and needs to load its PDPTE // registers, or already in PAE mode and needs to reload those // registers - int ret = vcpu_prepare_pae_pdpt(vcpu); - if (ret) { - hax_panic_vcpu(vcpu, "vCPU #%u failed to (re)load PDPT for" + int ret = vcpu_prepare_pae_pdpt(vcpu, htun); + switch (ret) { + case HAX_EXIT: + return ret; + default: + hax_panic_vcpu(vcpu, "vCPU #%u failed to (re)load PDPT for" " EPT+PAE mode: ret=%d\n", vcpu->vcpu_id, ret); - dump_vmcs(vcpu); - return HAX_RESUME; + dump_vmcs(vcpu); + return HAX_RESUME; } } @@ -3331,6 +3407,9 @@ static int handle_string_io(struct vcpu_t *vcpu, exit_qualification_t *qual, struct vcpu_state_t *state = vcpu->state; uint real_size, count, required_size; vaddr_t start, rindex; + int ret; + uint32 cnt_read, cnt_write; + uint64 fault_gfn; htun->io._flags = 1; @@ -3368,14 +3447,30 @@ static int handle_string_io(struct vcpu_t *vcpu, exit_qualification_t *qual, } if (qual->io.direction == HAX_IO_OUT) { - if (!vcpu_read_guest_virtual(vcpu, start, vcpu->io_buf, IOS_MAX_BUFFER, - real_size, 0)) + if (!(ret = vcpu_read_guest_virtual(vcpu, start, vcpu->io_buf, + IOS_MAX_BUFFER, real_size, 0, + &cnt_read, &fault_gfn))) { + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 1; + return HAX_EXIT; + } return HAX_RESUME; + } } else { // HACK: Just ensure the buffer is mapped in the kernel. - if (!vcpu_write_guest_virtual(vcpu, start, IOS_MAX_BUFFER, vcpu->io_buf, - real_size, 0)) + if (!(ret = vcpu_write_guest_virtual(vcpu, start, IOS_MAX_BUFFER, + vcpu->io_buf, real_size, 0, + &cnt_write, &fault_gfn))) { + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 1; + return HAX_EXIT; + } return HAX_RESUME; + } } if (required_size <= IOS_MAX_BUFFER) { @@ -4005,8 +4100,14 @@ static int exit_ept_violation(struct vcpu_t *vcpu, struct hax_tunnel *htun) mmio_handler: #endif - ret = vcpu_simple_decode(vcpu, &dec); + ret = vcpu_simple_decode(vcpu, &dec, &fault_gfn); if (ret < 0) { + if (ret == -EPERM) { + htun->_exit_status = HAX_EXIT_GPAPROT; + htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K; + htun->gpaprot.access = 1; + return HAX_EXIT; + } // vcpu_simple_decode() has called hax_panic_vcpu() return HAX_RESUME; } else if (ret > 0) { diff --git a/core/vtlb.c b/core/vtlb.c index f2b859ef..bf5f1549 100644 --- a/core/vtlb.c +++ b/core/vtlb.c @@ -72,7 +72,7 @@ static pagemode_t vcpu_get_pagemode(struct vcpu_t *vcpu); static pte64_t * vtlb_get_pde(hax_mmu_t *mmu, vaddr_t va, bool is_shadow); static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access, paddr_t *pa, uint *order, uint64 *flags, - bool update, bool prefetch); + bool update, bool prefetch, uint64 *fault_gfn); static void vtlb_update_pde(pte64_t *pde, pte64_t *shadow_pde, struct hax_page *page) @@ -413,7 +413,8 @@ void vtlb_invalidate(hax_mmu_t *mmu) } static uint vtlb_handle_page_fault(struct vcpu_t *vcpu, pagemode_t guest_mode, - paddr_t pdir, vaddr_t va, uint32 access) + paddr_t pdir, vaddr_t va, uint32 access, + uint64 *fault_gfn) { uint r; paddr_t gpa; @@ -474,7 +475,7 @@ static uint vtlb_handle_page_fault(struct vcpu_t *vcpu, pagemode_t guest_mode, } case PM_2LVL: { r = vcpu_mmu_walk(vcpu, va, access, &gpa, &tlb.guest_order, - &tlb.flags, true, /*true*/false); + &tlb.flags, true, /*true*/false, fault_gfn); break; } default: { @@ -538,8 +539,9 @@ uint64 vtlb_get_cr3(struct vcpu_t *vcpu) */ static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access, paddr_t *pa, uint *order, uint64 *flags, - bool update, bool prefetch) + bool update, bool prefetch, uint64 *fault_gfn) { + int ret; uint lvl, idx; void *pte_va; #ifdef CONFIG_HAX_EPT2 @@ -585,9 +587,14 @@ static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access, // Fetch the page table entry. idx = pte32_get_idx(lvl, va); #ifdef CONFIG_HAX_EPT2 - pte_va = gpa_space_map_page(&vcpu->vm->gpa_space, - gpt_base >> PG_ORDER_4K, &pte_kmap, - &writable); + ret = gpa_space_map_page(&vcpu->vm->gpa_space, + gpt_base >> PG_ORDER_4K, &pte_kmap, + &writable, &pte_va, fault_gfn); + if (ret < 0) { + if (ret == -EPERM) + return TF_FAILED | TF_GPA_PROT; + return TF_FAILED; + } #else // !CONFIG_HAX_EPT2 #if (!defined(__MACH__) && !defined(_WIN64)) pte_va = hax_map_gpfn(vcpu->vm, gpt_base >> 12, is_kernel, g_cr3, lvl); @@ -778,14 +785,15 @@ static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access, return TF_OK; } -bool handle_vtlb(struct vcpu_t *vcpu) +int handle_vtlb(struct vcpu_t *vcpu, uint64 *fault_gfn) { uint32 access = vmx(vcpu, exit_exception_error_code); pagemode_t mode = vcpu_get_pagemode(vcpu); paddr_t pdir = vcpu->state->_cr3 & (mode == PM_PAE ? ~0x1fULL : ~0xfffULL); vaddr_t cr2 = vmx(vcpu, exit_qualification).address; - uint32 ret = vtlb_handle_page_fault(vcpu, mode, pdir, cr2, access); + uint32 ret = vtlb_handle_page_fault(vcpu, mode, pdir, cr2, access, + fault_gfn); hax_debug("handle vtlb fault @%llx\n", cr2); if (ret == 0) { @@ -798,6 +806,9 @@ bool handle_vtlb(struct vcpu_t *vcpu) return 0; } + if (ret == (TF_FAILED | TF_GPA_PROT)) + return -EPERM; + // Otherwise, inject PF into guest access = ret & (vcpu->state->_efer & IA32_EFER_XD ? 0x1f : 0x0f); vcpu->state->_cr2 = cr2; @@ -845,35 +856,45 @@ static inline void * mmio_map_guest_virtual_page_fast(struct vcpu_t *vcpu, return vcpu->mmio_fetch.kva; } -static void * mmio_map_guest_virtual_page_slow(struct vcpu_t *vcpu, uint64 gva, - hax_kmap_user *kmap) +static int mmio_map_guest_virtual_page_slow(struct vcpu_t *vcpu, uint64 gva, + hax_kmap_user *kmap, + void **addr, uint64 *fault_gfn) { uint64 gva_aligned = gva & pgmask(PG_ORDER_4K); uint64 gpa; uint ret; void *kva; - ret = vcpu_translate(vcpu, gva_aligned, 0, &gpa, NULL, true); + ret = vcpu_translate(vcpu, gva_aligned, 0, &gpa, NULL, true, fault_gfn); if (ret) { - hax_error("%s: vcpu_translate() returned 0x%x: vcpu_id=%u," + if (ret != -EPERM) + hax_error("%s: vcpu_translate() returned 0x%x: vcpu_id=%u," " gva=0x%llx\n", __func__, ret, vcpu->vcpu_id, gva); // TODO: Inject a guest page fault? - return NULL; + *addr = NULL; + return ret; } hax_debug("%s: gva=0x%llx => gpa=0x%llx, vcpu_id=0x%u\n", __func__, gva_aligned, gpa, vcpu->vcpu_id); - kva = gpa_space_map_page(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K, kmap, - NULL); + ret = gpa_space_map_page(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K, kmap, + NULL, &kva, fault_gfn); + if (ret == -EPERM) { + *addr = NULL; + return ret; + } if (!kva) { hax_error("%s: gpa_space_map_page() failed: vcpu_id=%u, gva=0x%llx," " gpa=0x%llx\n", __func__, vcpu->vcpu_id, gva, gpa); - return NULL; + *addr = NULL; + return ret; } - return kva; + *addr = kva; + return 0; } -int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len) +int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, + int len, uint64 *fault_gfn) { uint64 end_gva; uint8 *src_buf; @@ -885,12 +906,16 @@ int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len) assert(len > 0 && len <= 15); end_gva = gva + (uint)len - 1; if ((gva >> PG_ORDER_4K) != (end_gva >> PG_ORDER_4K)) { - uint32 ret; + int ret; + uint32 cnt_read; hax_info("%s: GVA range spans two pages: gva=0x%llx, len=%d\n", __func__, gva, len); - ret = vcpu_read_guest_virtual(vcpu, gva, buf, (uint)len, (uint)len, 0); + ret = vcpu_read_guest_virtual(vcpu, gva, buf, (uint)len, (uint)len, 0, + &cnt_read, fault_gfn); if (!ret) { + if (ret == -EPERM) + return ret; hax_error("%s: vcpu_read_guest_virtual() failed: vcpu_id=%u," " gva=0x%llx, len=%d\n", __func__, vcpu->vcpu_id, gva, len); @@ -901,8 +926,11 @@ int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len) src_buf = mmio_map_guest_virtual_page_fast(vcpu, gva, len); if (!src_buf) { - src_buf = mmio_map_guest_virtual_page_slow(vcpu, gva, - &vcpu->mmio_fetch.kmap); + int ret = mmio_map_guest_virtual_page_slow(vcpu, gva, + &vcpu->mmio_fetch.kmap, + &src_buf, fault_gfn); + if (ret == -EPERM) + return ret; if (!src_buf) { return -ENOMEM; } @@ -929,12 +957,14 @@ int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len) * If flag is 2, the memory read is for internal use. It does not update the * guest page tables. It returns the number of bytes read. */ -uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, - uint32 dst_buflen, uint32 size, uint flag) +int vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, + uint32 dst_buflen, uint32 size, uint flag, + uint32 *cnt_read, uint64 *fault_gfn) { // TBD: use guest CPL for access checks char *dstp = dst; uint32 offset = 0; + int ret = 0; #ifdef CONFIG_HAX_EPT2 int len2; #else // !CONFIG_HAX_EPT2 @@ -957,10 +987,15 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, while (offset < size) { paddr_t gpa; uint64 len = size - offset; - uint r = vcpu_translate(vcpu, addr + offset, 0, &gpa, &len, flag != 2); + uint r = vcpu_translate(vcpu, addr + offset, 0, &gpa, &len, flag != 2, + fault_gfn); if (r != 0) { - if (flag != 0) - return offset; // Number of bytes successfully read + if (r == (TF_FAILED | TF_GPA_PROT)) + return -EPERM; + if (flag != 0) { + *cnt_read = offset; + return true; // Number of bytes successfully read + } if (r & TF_GP2HP) { hax_error("read_guest_virtual(%llx, %x) failed\n", addr, size); } @@ -976,12 +1011,14 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, // } #ifdef CONFIG_HAX_EPT2 len2 = gpa_space_read_data(&vcpu->vm->gpa_space, gpa, (int)len, - (uint8 *)(dstp + offset)); + (uint8 *)(dstp + offset), fault_gfn); if (len2 <= 0) { - hax_panic_vcpu( + if (len2 != -EPERM) + hax_panic_vcpu( vcpu, "read guest virtual error, gpa:0x%llx, len:0x%llx\n", gpa, len); - return false; + *cnt_read = 0; + return len2; } else { len = (uint64)len2; } @@ -1006,7 +1043,8 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, offset += len; } - return flag != 0 ? size : true; + *cnt_read = size; + return true; } /* @@ -1021,9 +1059,9 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst, * A flag value of 2 is implemented, but not used. It does not update the guest * page tables. It returns the number of bytes written. */ -uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, - uint32 dst_buflen, const void *src, uint32 size, - uint flag) +int vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, + uint32 dst_buflen, const void *src, uint32 size, + uint flag, uint32 *cnt_write, uint64 *fault_gfn) { // TODO: use guest CPL for access checks const char *srcp = src; @@ -1052,10 +1090,14 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, paddr_t gpa; uint64 len = size - offset; uint r = vcpu_translate(vcpu, addr + offset, TF_WRITE, &gpa, &len, - flag != 2); + flag != 2, fault_gfn); if (r != 0) { - if (flag != 0) - return offset; // Number of bytes successfully written + if (r == (TF_FAILED | TF_GPA_PROT)) + return -EPERM; + if (flag != 0) { + *cnt_write = offset; + return true; // Number of bytes successfully written + } if (r & TF_GP2HP) { hax_panic_vcpu(vcpu, "write_guest_virtual(%llx, %x) failed\n", addr, size); @@ -1068,12 +1110,14 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, } #ifdef CONFIG_HAX_EPT2 len2 = (uint64)gpa_space_write_data(&vcpu->vm->gpa_space, gpa, len, - (uint8 *)(srcp + offset)); + (uint8 *)(srcp + offset), fault_gfn); if (len2 <= 0) { - hax_panic_vcpu( + if (len2 != -EPERM) + hax_panic_vcpu( vcpu, "write guest virtual error, gpa:0x%llx, len:0x%llx\n", gpa, len); - return false; + *cnt_write = 0; + return len2; } else { len = len2; } @@ -1098,7 +1142,8 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, offset += len; } - return flag != 0 ? size : true; + *cnt_write = offset; + return true; } /* @@ -1112,7 +1157,7 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, * number otherwise. */ uint vcpu_translate(struct vcpu_t *vcpu, vaddr_t va, uint access, paddr_t *pa, - uint64 *len, bool update) + uint64 *len, bool update, uint64 *fault_gfn) { pagemode_t mode = vcpu_get_pagemode(vcpu); uint order = 0; @@ -1133,7 +1178,7 @@ uint vcpu_translate(struct vcpu_t *vcpu, vaddr_t va, uint access, paddr_t *pa, case PM_PAE: case PM_PML4: { r = pw_perform_page_walk(vcpu, va, access, pa, &order, update, - false); + false, fault_gfn); break; } default: { diff --git a/include/hax.h b/include/hax.h index 75d278d0..81c9b7c9 100644 --- a/include/hax.h +++ b/include/hax.h @@ -38,8 +38,8 @@ extern int hax_page_size; -#define HAX_CUR_VERSION 0x0004 -#define HAX_COMPAT_VERSION 0x0001 +#define HAX_CUR_VERSION 0x0005 +#define HAX_COMPAT_VERSION 0x0002 // EPT2 refers to the new memory virtualization engine, which implements lazy // allocation, and therefore greatly speeds up ALLOC_RAM and SET_RAM VM ioctls