-
Notifications
You must be signed in to change notification settings - Fork 884
[Draft] GPA Protection Implementation #35
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,7 @@ | |
#include "../include/hax.h" | ||
#include "include/paging.h" | ||
#include "../include/hax_host_mem.h" | ||
#include "ept2.h" | ||
|
||
int gpa_space_init(hax_gpa_space *gpa_space) | ||
{ | ||
|
@@ -59,6 +60,13 @@ int gpa_space_init(hax_gpa_space *gpa_space) | |
return ret; | ||
} | ||
|
||
static uint64 gpa_space_prot_bitmap_size(uint64 npages) | ||
{ | ||
uint64 bitmap_size = (npages + 7)/8; | ||
bitmap_size += 8; | ||
return bitmap_size; | ||
} | ||
|
||
void gpa_space_free(hax_gpa_space *gpa_space) | ||
{ | ||
hax_gpa_space_listener *listener, *tmp; | ||
|
@@ -75,6 +83,9 @@ void gpa_space_free(hax_gpa_space *gpa_space) | |
hax_gpa_space_listener, entry) { | ||
hax_list_del(&listener->entry); | ||
} | ||
if (gpa_space->prot_bitmap.bitmap) | ||
hax_vfree(gpa_space->prot_bitmap.bitmap, | ||
gpa_space_prot_bitmap_size(gpa_space->prot_bitmap.max_gpfn)); | ||
} | ||
|
||
void gpa_space_add_listener(hax_gpa_space *gpa_space, | ||
|
@@ -346,3 +357,118 @@ uint64 gpa_space_get_pfn(hax_gpa_space *gpa_space, uint64 gfn, uint8 *flags) | |
|
||
return pfn; | ||
} | ||
|
||
int gpa_space_adjust_prot_bitmap(hax_gpa_space *gpa_space, uint64 max_gpfn) | ||
{ | ||
prot_bitmap *pb = &gpa_space->prot_bitmap; | ||
uint8 *bmold = pb->bitmap, *bmnew = NULL; | ||
|
||
/* Bitmap size only grows until it is destroyed */ | ||
if (max_gpfn <= pb->max_gpfn) | ||
return 0; | ||
|
||
bmnew = hax_vmalloc(gpa_space_prot_bitmap_size(max_gpfn), HAX_MEM_NONPAGE); | ||
if (!bmnew) { | ||
hax_error("%s: Not enought memory for new protection bitmap\n", | ||
__func__); | ||
return -ENOMEM; | ||
} | ||
pb->bitmap = bmnew; | ||
if (bmold) { | ||
memcpy(bmnew, bmold, gpa_space_prot_bitmap_size(pb->max_gpfn)); | ||
hax_vfree(bmold, gpa_space_prot_bitmap_size(pb->max_gpfn)); | ||
} | ||
pb->max_gpfn = max_gpfn; | ||
return 0; | ||
} | ||
|
||
static void gpa_space_set_prot_bitmap(uint64 start, uint64 nbits, | ||
uint8 *bitmap, bool set) | ||
{ | ||
uint64 i = 0; | ||
uint64 start_index = start / 8; | ||
uint64 start_bit = start % 8; | ||
uint64 end_index = (start + nbits) / 8; | ||
uint64 end_bit = (start + nbits) % 8; | ||
|
||
if (start_index == end_index) { | ||
for (i = start; i < start + nbits; i++) | ||
if (set) | ||
hax_test_and_set_bit(i, (uint64 *)bitmap); | ||
else | ||
hax_test_and_clear_bit(i, (uint64 *)bitmap); | ||
return; | ||
} | ||
|
||
for (i = start; i < (start_index + 1) * 8; i++) | ||
if (set) | ||
hax_test_and_set_bit(i, (uint64 *)bitmap); | ||
else | ||
hax_test_and_clear_bit(i, (uint64 *)bitmap); | ||
|
||
for (i = end_index * 8; i < start + nbits; i++) | ||
if (set) | ||
hax_test_and_set_bit(i, (uint64 *)bitmap); | ||
else | ||
hax_test_and_clear_bit(i, (uint64 *)bitmap); | ||
|
||
for (i = start_index + 1; i < end_index; i++) | ||
if (set) | ||
bitmap[i] = 0xFF; | ||
else | ||
bitmap[i] = 0; | ||
} | ||
|
||
int gpa_space_test_prot_bitmap(struct hax_gpa_space *gpa_space, uint64 gfn) | ||
{ | ||
struct prot_bitmap *pbm = &gpa_space->prot_bitmap; | ||
|
||
if (!pbm) | ||
return 0; | ||
|
||
if (gfn >= pbm->max_gpfn) | ||
return 0; | ||
|
||
return hax_test_bit(gfn, (uint64 *)pbm->bitmap); | ||
} | ||
|
||
int gpa_space_chunk_protected(struct hax_gpa_space *gpa_space, uint64 gfn, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see the QEMU patch make use of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory, (I mean I did not test it), QEMU does not need to know the chunk size. It can resolve only one page one time, or 1M, etc. It simply means in that case, QEMU would receive more EPT_FAULT_EXIT at the same place with different gpa reported. I did not design it in a way that user space has to rely on the exact chunk size. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So basically it's an optimization. I'd prefer that we process/fill one guest page per exit to QEMU, which is the same page that hardware (EPT violation) gave us, because
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If the performance is not good enough, we could add the chunk-based optimization in a later patch. |
||
uint64 *fault_gfn) | ||
{ | ||
uint64 __gfn = gfn / HAX_CHUNK_NR_PAGES * HAX_CHUNK_NR_PAGES; | ||
for (gfn = __gfn; gfn < __gfn + HAX_CHUNK_NR_PAGES; gfn++) | ||
if (gpa_space_test_prot_bitmap(gpa_space, gfn)) { | ||
*fault_gfn = gfn; | ||
return 1; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
int gpa_space_protect_range(struct hax_gpa_space *gpa_space, | ||
struct hax_ept_tree *ept_tree, | ||
uint64 start_gpa, uint64 len, int8 flags) | ||
{ | ||
uint64 gfn; | ||
uint npages; | ||
hax_memslot *slot; | ||
|
||
if (len == 0) { | ||
hax_error("%s: len = 0\n", __func__); | ||
return -EINVAL; | ||
} | ||
|
||
/* Did not support specific prot on r/w/e now */ | ||
if (flags != 0 && (flags & HAX_GPA_PROT_MASK) != HAX_GPA_PROT_ALL) | ||
return -EINVAL; | ||
|
||
gfn = start_gpa >> PG_ORDER_4K; | ||
npages = (len + PAGE_SIZE_4K - 1) >> PG_ORDER_4K; | ||
|
||
gpa_space_set_prot_bitmap(gfn, npages, gpa_space->prot_bitmap.bitmap, !flags); | ||
|
||
if (!flags) | ||
ept_tree_invalidate_entries(ept_tree, gfn, npages); | ||
|
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,7 @@ int hax_vm_set_ram2(struct vm_t *vm, struct hax_set_ram_info2 *info); | |
int hax_vm_free_all_ram(struct vm_t *vm); | ||
int in_pmem_range(struct hax_vcpu_mem *pmem, uint64_t va); | ||
int hax_vm_add_ramblock(struct vm_t *vm, uint64_t start_uva, uint64_t size); | ||
int hax_vm_gpa_prot(struct vm_t *vm, struct hax_gpa_prot_info *info); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about |
||
|
||
void * get_vm_host(struct vm_t *vm); | ||
int set_vm_host(struct vm_t *vm, void *vm_host); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ | |
|
||
#define HAX_CHUNK_SHIFT 21 | ||
#define HAX_CHUNK_SIZE (1U << HAX_CHUNK_SHIFT) // 2MB | ||
#define HAX_CHUNK_NR_PAGES (HAX_CHUNK_SIZE/PAGE_SIZE_4K) | ||
|
||
typedef struct hax_chunk { | ||
hax_memdesc_user memdesc; | ||
|
@@ -80,12 +81,20 @@ typedef struct hax_memslot { | |
// Used only by memslot_set_mapping(), not by any hax_memslot | ||
#define HAX_MEMSLOT_INVALID 0x80 | ||
|
||
typedef struct prot_bitmap { | ||
// R/W/E Protection Bitmap | ||
uint8 *bitmap; | ||
// Last gpfn | ||
uint64 max_gpfn; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are correct. The max only means "max known so far". End is better. |
||
} prot_bitmap; | ||
|
||
typedef struct hax_gpa_space { | ||
// TODO: Add a lock to prevent concurrent accesses to |ramblock_list| and | ||
// |memslot_list| | ||
hax_list_head ramblock_list; | ||
hax_list_head memslot_list; | ||
hax_list_head listener_list; | ||
prot_bitmap prot_bitmap; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe rename the field to |
||
} hax_gpa_space; | ||
|
||
typedef struct hax_gpa_space_listener hax_gpa_space_listener; | ||
|
@@ -298,6 +307,21 @@ void gpa_space_unmap_page(hax_gpa_space *gpa_space, hax_kmap_user *kmap); | |
// MMIO. | ||
uint64 gpa_space_get_pfn(hax_gpa_space *gpa_space, uint64 gfn, uint8 *flags); | ||
|
||
int gpa_space_protect_range(struct hax_gpa_space *gpa_space, | ||
struct hax_ept_tree *ept_tree, | ||
uint64 start_gpa, uint64 len, int8 flags); | ||
|
||
// Adjust gpa protection bitmap size. Once a bigger gfn is met, allocate | ||
// a new bitmap and copy the old bitmap contents. | ||
// |gpa_space|: The GPA space of the guest. | ||
// |max_gpfn|: max gfn that the bitmap can hold. | ||
int gpa_space_adjust_prot_bitmap(struct hax_gpa_space *gpa_space, | ||
uint64 max_gpfn); | ||
|
||
int gpa_space_test_prot_bitmap(struct hax_gpa_space *gpa_space, uint64 gfn); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
int gpa_space_chunk_protected(struct hax_gpa_space *gpa_space, uint64 gfn, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Return type: |
||
uint64 *fault_gfn); | ||
|
||
// Allocates a |hax_chunk| for the given UVA range, and pins the corresponding | ||
// host page frames in RAM. | ||
// |base_uva|: The start of the UVA range. Should be page-aligned. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,7 +109,8 @@ enum exit_status { | |
HAX_EXIT_HLT, | ||
HAX_EXIT_STATECHANGE, | ||
HAX_EXIT_PAUSED, | ||
HAX_EXIT_FAST_MMIO | ||
HAX_EXIT_FAST_MMIO, | ||
HAX_EXIT_GPAPROT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good! |
||
}; | ||
|
||
enum run_flag { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,6 +128,11 @@ struct hax_tunnel { | |
struct { | ||
paddr_t gla; | ||
} mmio; | ||
struct { | ||
paddr_t gpa; | ||
uint8_t access; | ||
uint8_t pad[7]; | ||
} gpaprot; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about |
||
struct { | ||
paddr_t dummy; | ||
} state; | ||
|
@@ -169,6 +174,7 @@ struct hax_module_version { | |
#define HAX_CAP_64BIT_RAMBLOCK (1 << 3) | ||
#define HAX_CAP_64BIT_SETRAM (1 << 4) | ||
#define HAX_CAP_TUNNEL_PAGE (1 << 5) | ||
#define HAX_CAP_GPA_PROTECTION (1 << 6) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could actually return both the GPA and HVA to QEMU at every |
||
|
||
struct hax_capabilityinfo { | ||
/* | ||
|
@@ -236,6 +242,14 @@ struct hax_set_ram_info2 { | |
uint64_t reserved2; | ||
} PACKED; | ||
|
||
#define HAX_GPA_PROT_MASK 0x7 // one bit each for r/w/e | ||
#define HAX_GPA_PROT_ALL 0x7 // disable r/w/e all | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was confused at first whether "disable all" means to disallow or allow all accesses. So how about
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree. |
||
struct hax_gpa_prot_info { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. |
||
uint64_t pa_start; | ||
uint64_t size; | ||
uint64_t flags; | ||
} PACKED; | ||
|
||
/* This interface is support only after API version 2 */ | ||
struct hax_qemu_version { | ||
/* Current API version in QEMU*/ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,10 +116,10 @@ static inline void hax_mutex_free(hax_mutex lock) | |
} | ||
|
||
/* Return true if the bit is set already */ | ||
static int hax_test_and_set_bit(int bit, uint64_t *memory) | ||
static int hax_test_and_set_bit(uint64 bit, uint64_t *memory) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have concerns about this change. Is it safe to use a 64-bit integer as an array index ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed. |
||
{ | ||
long *base = (long *)memory; | ||
long nr_long; | ||
uint64 nr_long; | ||
long bitoffset_in_long; | ||
long bits_per_long = sizeof(long) * 8; | ||
|
||
|
@@ -139,10 +139,10 @@ static int hax_test_and_set_bit(int bit, uint64_t *memory) | |
* Return true if the bit is cleared already | ||
* Notice that InterlockedBitTestAndReset return original value in that bit | ||
*/ | ||
static int hax_test_and_clear_bit(int bit, uint64_t *memory) | ||
static int hax_test_and_clear_bit(uint64 bit, uint64_t *memory) | ||
{ | ||
long * base = (long *)memory; | ||
long nr_long; | ||
uint64 nr_long; | ||
long bitoffset_in_long; | ||
long bits_per_long = sizeof(long) * 8; | ||
|
||
|
@@ -159,9 +159,9 @@ static int hax_test_and_clear_bit(int bit, uint64_t *memory) | |
} | ||
|
||
/* Don't care for the big endian situation */ | ||
static bool hax_test_bit(int bit, uint64_t *memory) | ||
static bool hax_test_bit(uint64 bit, uint64_t *memory) | ||
{ | ||
int byte = bit / 8; | ||
uint64 byte = bit / 8; | ||
unsigned char *p; | ||
int offset = bit % 8; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this is the intrusive change you were talking about :) It didn't look very intrusive to me until I peeked at the second patch. I need to spend more time tomorrow to understand the rationale better and think about possible alternatives. But maybe I can ask you this for now: if we don't have to worry about chunks (see my other long comment below), do we still need this extra parameter?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, yes.
I also think about something like:
__try {
hax_vcpu_run();
--------> call RaiseException when we failed to pass protection check
} __except (xxxx) {
//fill related htun and exit
}
But I have two concerns and I do not know the answer:
1> it seems too windows-specific, is it possible for mac (I did not work on mac so far). I believe it is OK not to implement this on mac from google point of view.
2> I am not sure what exception I should raise when checking for protection bitmap. My concern is if I choose existing exceptions defined (page fault, for example) and later a real page fault happened due to bug in code, it could be interpreted as our ram protection fault. And I do not know whether user-defined exception code is acceptable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My question was probably not clear enough, so you must have mistaken it for something else - but thanks to that, I get to see this very interesting idea in your answer :)
I totally understand that Google's priority is to bring on-demand snapshot loading to Windows, so it's fine to leave out Mac for the QEMU (Android Emulator) side patch. But I still hope the new API we add this time works on both Windows and Mac, with the same behavior, as all the existing HAXM APIs do. So I agree with you that any Windows-specific API design should be avoided.
Back to my original question: I know this new
fault_gfn
parameter is a result of the call togpa_space_chunk_protected()
. My point is, if we don't worry about chunks and just stick to processing (filling) one page per exit to QEMU (viaHAX_EXIT_PAGEFAULT
), the pagefault GPA we return to QEMU will be the same asgpa
, so there's no need to callgpa_space_chunk_protected()
just because it might return a different GPA.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By the way, I've thought about handling HAXM's own accesses to guest RAM, and your "intrusive" approach makes more sense to me. But I also came to realize how complicated the problem is - for example, since it's possible for HAXM to access a protected GPA in the middle of its MMIO or PIO handler (instruction emulation) code, we have to make sure the code before the GPA access does nothing "irreversible", because as soon as we realize the GPA is protected, we'll abort the handler and return to QEMU with the new exit reason (instead of
HAX_EXIT_FAST_MMIO
orHAX_EXIT_IO
), and must be able to emulate the same MMIO/PIO instruction again when QEMU re-executes it.