From c48de3e45e6b2308e24a54f83cd464051addd69e Mon Sep 17 00:00:00 2001
From: Haitao Shan <hshan@google.com>
Date: Wed, 24 Jan 2018 14:58:39 -0800
Subject: [PATCH 1/2] IOCTL_GPA_PROT implementation v1.

---
 core/ept2.c                       |   6 +-
 core/gpa_space.c                  | 126 ++++++++++++++++++++++++++++++
 core/hax.c                        |   1 +
 core/include/ept2.h               |   3 +-
 core/include/hax_core_interface.h |   1 +
 core/include/memory.h             |  24 ++++++
 core/include/vm.h                 |   3 +-
 core/memory.c                     |   9 +++
 core/vcpu.c                       |   9 ++-
 include/hax_interface.h           |  14 ++++
 include/windows/hax_windows.h     |  12 +--
 windows/hax_entry.c               |  15 ++++
 windows/hax_entry.h               |   4 +
 13 files changed, 217 insertions(+), 10 deletions(-)

diff --git a/core/ept2.c b/core/ept2.c
index 8bf00a1f..5d60f150 100644
--- a/core/ept2.c
+++ b/core/ept2.c
@@ -70,7 +70,8 @@ void ept_handle_mapping_changed(hax_gpa_space_listener *listener,
 }
 
 int ept_handle_access_violation(hax_gpa_space *gpa_space, hax_ept_tree *tree,
-                                exit_qualification_t qual, uint64 gpa)
+                                exit_qualification_t qual, uint64 gpa,
+                                uint64 *fault_gfn)
 {
     uint combined_perm;
     uint64 gfn;
@@ -102,6 +103,9 @@ int ept_handle_access_violation(hax_gpa_space *gpa_space, hax_ept_tree *tree,
         return 0;
     }
 
+    if (gpa_space_chunk_protected(gpa_space, gfn, fault_gfn))
+        return -EPERM;
+
     // The faulting GPA maps to RAM/ROM
     is_rom = slot->flags & HAX_MEMSLOT_READONLY;
     offset_within_slot = gpa - (slot->base_gfn << PG_ORDER_4K);
diff --git a/core/gpa_space.c b/core/gpa_space.c
index 96e98b11..49abade1 100644
--- a/core/gpa_space.c
+++ b/core/gpa_space.c
@@ -33,6 +33,7 @@
 #include "../include/hax.h"
 #include "include/paging.h"
 #include "../include/hax_host_mem.h"
+#include "ept2.h"
 
 int gpa_space_init(hax_gpa_space *gpa_space)
 {
@@ -59,6 +60,13 @@ int gpa_space_init(hax_gpa_space *gpa_space)
     return ret;
 }
 
+static uint64 gpa_space_prot_bitmap_size(uint64 npages)
+{
+    uint64 bitmap_size = (npages + 7)/8;
+    bitmap_size += 8;
+    return bitmap_size;
+}
+
 void gpa_space_free(hax_gpa_space *gpa_space)
 {
     hax_gpa_space_listener *listener, *tmp;
@@ -75,6 +83,9 @@ void gpa_space_free(hax_gpa_space *gpa_space)
                                  hax_gpa_space_listener, entry) {
         hax_list_del(&listener->entry);
     }
+    if (gpa_space->prot_bitmap.bitmap)
+        hax_vfree(gpa_space->prot_bitmap.bitmap,
+                  gpa_space_prot_bitmap_size(gpa_space->prot_bitmap.max_gpfn));
 }
 
 void gpa_space_add_listener(hax_gpa_space *gpa_space,
@@ -346,3 +357,118 @@ uint64 gpa_space_get_pfn(hax_gpa_space *gpa_space, uint64 gfn, uint8 *flags)
 
     return pfn;
 }
+
+int gpa_space_adjust_prot_bitmap(hax_gpa_space *gpa_space, uint64 max_gpfn)
+{
+    prot_bitmap *pb = &gpa_space->prot_bitmap;
+    uint8 *bmold = pb->bitmap, *bmnew = NULL;
+
+    /* Bitmap size only grows until it is destroyed */
+    if (max_gpfn <= pb->max_gpfn)
+        return 0;
+
+    bmnew = hax_vmalloc(gpa_space_prot_bitmap_size(max_gpfn), HAX_MEM_NONPAGE);
+    if (!bmnew) {
+        hax_error("%s: Not enought memory for new protection bitmap\n",
+                  __func__);
+        return -ENOMEM;
+    }
+    pb->bitmap = bmnew;
+    if (bmold) {
+        memcpy(bmnew, bmold, gpa_space_prot_bitmap_size(pb->max_gpfn));
+        hax_vfree(bmold, gpa_space_prot_bitmap_size(pb->max_gpfn));
+    }
+    pb->max_gpfn = max_gpfn;
+    return 0;
+}
+
+static void gpa_space_set_prot_bitmap(uint64 start, uint64 nbits,
+                                      uint8 *bitmap, bool set)
+{
+    uint64 i = 0;
+    uint64 start_index = start / 8;
+    uint64 start_bit = start % 8;
+    uint64 end_index = (start + nbits) / 8;
+    uint64 end_bit = (start + nbits) % 8;
+
+    if (start_index == end_index) {
+        for (i = start; i < start + nbits; i++)
+            if (set)
+                hax_test_and_set_bit(i, (uint64 *)bitmap);
+            else
+                hax_test_and_clear_bit(i, (uint64 *)bitmap);
+        return;
+    }
+
+    for (i = start; i < (start_index + 1) * 8; i++)
+        if (set)
+            hax_test_and_set_bit(i, (uint64 *)bitmap);
+        else
+            hax_test_and_clear_bit(i, (uint64 *)bitmap);
+
+    for (i = end_index * 8; i < start + nbits; i++)
+        if (set)
+            hax_test_and_set_bit(i, (uint64 *)bitmap);
+        else
+            hax_test_and_clear_bit(i, (uint64 *)bitmap);
+
+    for (i = start_index + 1; i < end_index; i++)
+        if (set)
+            bitmap[i] = 0xFF;
+        else
+            bitmap[i] = 0;
+}
+
+int gpa_space_test_prot_bitmap(struct hax_gpa_space *gpa_space, uint64 gfn)
+{
+    struct prot_bitmap *pbm = &gpa_space->prot_bitmap;
+
+    if (!pbm)
+        return 0;
+
+    if (gfn >= pbm->max_gpfn)
+        return 0;
+
+    return hax_test_bit(gfn, (uint64 *)pbm->bitmap);
+}
+
+int gpa_space_chunk_protected(struct hax_gpa_space *gpa_space, uint64 gfn,
+                              uint64 *fault_gfn)
+{
+    uint64 __gfn = gfn / HAX_CHUNK_NR_PAGES * HAX_CHUNK_NR_PAGES;
+    for (gfn = __gfn; gfn < __gfn + HAX_CHUNK_NR_PAGES; gfn++)
+        if (gpa_space_test_prot_bitmap(gpa_space, gfn)) {
+            *fault_gfn = gfn;
+            return 1;
+        }
+
+    return 0;
+}
+
+int gpa_space_protect_range(struct hax_gpa_space *gpa_space,
+                            struct hax_ept_tree *ept_tree,
+                            uint64 start_gpa, uint64 len, int8 flags)
+{
+    uint64 gfn;
+    uint npages;
+    hax_memslot *slot;
+
+    if (len == 0) {
+        hax_error("%s: len = 0\n", __func__);
+        return -EINVAL;
+    }
+
+    /* Did not support specific prot on r/w/e now */
+    if (flags != 0 && (flags & HAX_GPA_PROT_MASK) != HAX_GPA_PROT_ALL)
+        return -EINVAL;
+
+    gfn = start_gpa >> PG_ORDER_4K;
+    npages = (len + PAGE_SIZE_4K - 1) >> PG_ORDER_4K;
+
+    gpa_space_set_prot_bitmap(gfn, npages, gpa_space->prot_bitmap.bitmap, !flags);
+
+    if (!flags)
+        ept_tree_invalidate_entries(ept_tree, gfn, npages);
+
+    return 0;
+}
diff --git a/core/hax.c b/core/hax.c
index 244538d9..00bca3ce 100644
--- a/core/hax.c
+++ b/core/hax.c
@@ -347,6 +347,7 @@ int hax_get_capability(void *buf, int bufLeng, int *outLength)
         cap->winfo |= HAX_CAP_64BIT_SETRAM;
 #endif
         cap->winfo |= HAX_CAP_TUNNEL_PAGE;
+        cap->winfo |= HAX_CAP_GPA_PROTECTION;
         if (cpu_data->vmx_info._ept_cap) {
             cap->winfo |= HAX_CAP_EPT;
         }
diff --git a/core/include/ept2.h b/core/include/ept2.h
index 4d248ff3..42788e91 100644
--- a/core/include/ept2.h
+++ b/core/include/ept2.h
@@ -249,7 +249,8 @@ void ept_handle_mapping_changed(hax_gpa_space_listener *listener,
 //          present, but the access violates the permissions it allows.
 // -ENOMEM: Memory allocation/mapping error.
 int ept_handle_access_violation(hax_gpa_space *gpa_space, hax_ept_tree *tree,
-                                exit_qualification_t qual, uint64 gpa);
+                                exit_qualification_t qual, uint64 gpa,
+                                uint64 *fault_gfn);
 
 // Handles an EPT misconfiguration caught by hardware while it tries to
 // translate a GPA.
diff --git a/core/include/hax_core_interface.h b/core/include/hax_core_interface.h
index 9c9ed82a..21a77a87 100644
--- a/core/include/hax_core_interface.h
+++ b/core/include/hax_core_interface.h
@@ -59,6 +59,7 @@ int hax_vm_set_ram2(struct vm_t *vm, struct hax_set_ram_info2 *info);
 int hax_vm_free_all_ram(struct vm_t *vm);
 int in_pmem_range(struct hax_vcpu_mem *pmem, uint64_t va);
 int hax_vm_add_ramblock(struct vm_t *vm, uint64_t start_uva, uint64_t size);
+int hax_vm_gpa_prot(struct vm_t *vm, struct hax_gpa_prot_info *info);
 
 void * get_vm_host(struct vm_t *vm);
 int set_vm_host(struct vm_t *vm, void *vm_host);
diff --git a/core/include/memory.h b/core/include/memory.h
index 5e350f10..958205f3 100644
--- a/core/include/memory.h
+++ b/core/include/memory.h
@@ -36,6 +36,7 @@
 
 #define HAX_CHUNK_SHIFT 21
 #define HAX_CHUNK_SIZE  (1U << HAX_CHUNK_SHIFT)  // 2MB
+#define HAX_CHUNK_NR_PAGES (HAX_CHUNK_SIZE/PAGE_SIZE_4K)
 
 typedef struct hax_chunk {
     hax_memdesc_user memdesc;
@@ -80,12 +81,20 @@ typedef struct hax_memslot {
 // Used only by memslot_set_mapping(), not by any hax_memslot
 #define HAX_MEMSLOT_INVALID  0x80
 
+typedef struct prot_bitmap {
+    // R/W/E Protection Bitmap
+    uint8 *bitmap;
+    // Last gpfn
+    uint64 max_gpfn;
+} prot_bitmap;
+
 typedef struct hax_gpa_space {
     // TODO: Add a lock to prevent concurrent accesses to |ramblock_list| and
     // |memslot_list|
     hax_list_head ramblock_list;
     hax_list_head memslot_list;
     hax_list_head listener_list;
+    prot_bitmap prot_bitmap;
 } hax_gpa_space;
 
 typedef struct hax_gpa_space_listener hax_gpa_space_listener;
@@ -298,6 +307,21 @@ void gpa_space_unmap_page(hax_gpa_space *gpa_space, hax_kmap_user *kmap);
 // MMIO.
 uint64 gpa_space_get_pfn(hax_gpa_space *gpa_space, uint64 gfn, uint8 *flags);
 
+int gpa_space_protect_range(struct hax_gpa_space *gpa_space,
+                            struct hax_ept_tree *ept_tree,
+                            uint64 start_gpa, uint64 len, int8 flags);
+
+// Adjust gpa protection bitmap size. Once a bigger gfn is met, allocate
+// a new bitmap and copy the old bitmap contents.
+// |gpa_space|: The GPA space of the guest.
+// |max_gpfn|: max gfn that the bitmap can hold.
+int gpa_space_adjust_prot_bitmap(struct hax_gpa_space *gpa_space,
+                                 uint64 max_gpfn);
+
+int gpa_space_test_prot_bitmap(struct hax_gpa_space *gpa_space, uint64 gfn);
+int gpa_space_chunk_protected(struct hax_gpa_space *gpa_space, uint64 gfn,
+                              uint64 *fault_gfn);
+
 // Allocates a |hax_chunk| for the given UVA range, and pins the corresponding
 // host page frames in RAM.
 // |base_uva|: The start of the UVA range. Should be page-aligned.
diff --git a/core/include/vm.h b/core/include/vm.h
index 1aaad461..64d21bdd 100644
--- a/core/include/vm.h
+++ b/core/include/vm.h
@@ -109,7 +109,8 @@ enum exit_status {
     HAX_EXIT_HLT,
     HAX_EXIT_STATECHANGE,
     HAX_EXIT_PAUSED,
-    HAX_EXIT_FAST_MMIO
+    HAX_EXIT_FAST_MMIO,
+    HAX_EXIT_GPAPROT
 };
 
 enum run_flag {
diff --git a/core/memory.c b/core/memory.c
index a5550d61..c724feef 100644
--- a/core/memory.c
+++ b/core/memory.c
@@ -261,6 +261,7 @@ static int handle_set_ram(struct vm_t *vm, uint64 start_gpa, uint64 size,
     gpa_space = &vm->gpa_space;
     start_gfn = start_gpa >> PG_ORDER_4K;
     npages = size >> PG_ORDER_4K;
+    gpa_space_adjust_prot_bitmap(gpa_space, start_gfn + npages);
     ret = memslot_set_mapping(gpa_space, start_gfn, npages, start_uva, flags);
     if (ret) {
         hax_error("%s: memslot_set_mapping() failed: ret=%d, start_gfn=0x%llx,"
@@ -376,6 +377,14 @@ int hax_vm_set_ram2(struct vm_t *vm, struct hax_set_ram_info2 *info)
 }
 #endif  // CONFIG_HAX_EPT2
 
+int hax_vm_gpa_prot(struct vm_t *vm, struct hax_gpa_prot_info *info)
+{
+    uint8_t flags = info->flags;
+
+    return gpa_space_protect_range(&vm->gpa_space, &vm->ept_tree,
+                                   info->pa_start, info->size, info->flags);
+}
+
 int hax_vcpu_setup_hax_tunnel(struct vcpu_t *cv, struct hax_tunnel_info *info)
 {
     int ret = -ENOMEM;
diff --git a/core/vcpu.c b/core/vcpu.c
index 03945591..e902a167 100644
--- a/core/vcpu.c
+++ b/core/vcpu.c
@@ -3958,6 +3958,7 @@ static int exit_ept_violation(struct vcpu_t *vcpu, struct hax_tunnel *htun)
     paddr_t gpa;
     struct decode dec;
     int ret = 0;
+    uint64 fault_gfn;
 
     htun->_exit_reason = vmx(vcpu, exit_reason).basic_reason;
 
@@ -3972,7 +3973,13 @@ static int exit_ept_violation(struct vcpu_t *vcpu, struct hax_tunnel *htun)
 
 #ifdef CONFIG_HAX_EPT2
     ret = ept_handle_access_violation(&vcpu->vm->gpa_space, &vcpu->vm->ept_tree,
-                                      *qual, gpa);
+                                      *qual, gpa, &fault_gfn);
+    if (ret == -EPERM) {
+        htun->gpaprot.access = (qual->raw >> 3) & 7;
+        htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+        htun->_exit_status = HAX_EXIT_GPAPROT;
+        return HAX_EXIT;
+    }
     if (ret == -EACCES) {
         /*
          * For some reason, during boot-up, Chrome OS guests make hundreds of
diff --git a/include/hax_interface.h b/include/hax_interface.h
index a4d5b942..4451f650 100644
--- a/include/hax_interface.h
+++ b/include/hax_interface.h
@@ -128,6 +128,11 @@ struct hax_tunnel {
         struct {
             paddr_t gla;
         } mmio;
+        struct {
+            paddr_t gpa;
+            uint8_t access;
+            uint8_t pad[7];
+        } gpaprot;
         struct {
             paddr_t dummy;
         } state;
@@ -169,6 +174,7 @@ struct hax_module_version {
 #define HAX_CAP_64BIT_RAMBLOCK     (1 << 3)
 #define HAX_CAP_64BIT_SETRAM       (1 << 4)
 #define HAX_CAP_TUNNEL_PAGE        (1 << 5)
+#define HAX_CAP_GPA_PROTECTION     (1 << 6)
 
 struct hax_capabilityinfo {
     /*
@@ -236,6 +242,14 @@ struct hax_set_ram_info2 {
     uint64_t reserved2;
 } PACKED;
 
+#define HAX_GPA_PROT_MASK    0x7   // one bit each for r/w/e
+#define HAX_GPA_PROT_ALL     0x7   // disable r/w/e all
+struct hax_gpa_prot_info {
+    uint64_t pa_start;
+    uint64_t size;
+    uint64_t flags;
+} PACKED;
+
 /* This interface is support only after API version 2 */
 struct hax_qemu_version {
     /* Current API version in QEMU*/
diff --git a/include/windows/hax_windows.h b/include/windows/hax_windows.h
index 15e57bc0..d7f925e3 100644
--- a/include/windows/hax_windows.h
+++ b/include/windows/hax_windows.h
@@ -116,10 +116,10 @@ static inline void hax_mutex_free(hax_mutex lock)
 }
 
 /* Return true if the bit is set already */
-static int hax_test_and_set_bit(int bit, uint64_t *memory)
+static int hax_test_and_set_bit(uint64 bit, uint64_t *memory)
 {
     long *base = (long *)memory;
-    long nr_long;
+    uint64 nr_long;
     long bitoffset_in_long;
     long bits_per_long = sizeof(long) * 8;
 
@@ -139,10 +139,10 @@ static int hax_test_and_set_bit(int bit, uint64_t *memory)
  * Return true if the bit is cleared already
  * Notice that InterlockedBitTestAndReset return original value in that bit
  */
-static int hax_test_and_clear_bit(int bit, uint64_t *memory)
+static int hax_test_and_clear_bit(uint64 bit, uint64_t *memory)
 {
     long * base = (long *)memory;
-    long nr_long;
+    uint64 nr_long;
     long bitoffset_in_long;
     long bits_per_long = sizeof(long) * 8;
 
@@ -159,9 +159,9 @@ static int hax_test_and_clear_bit(int bit, uint64_t *memory)
 }
 
 /* Don't care for the big endian situation */
-static bool hax_test_bit(int bit, uint64_t *memory)
+static bool hax_test_bit(uint64 bit, uint64_t *memory)
 {
-    int byte = bit / 8;
+    uint64 byte = bit / 8;
     unsigned char *p;
     int offset = bit % 8;
 
diff --git a/windows/hax_entry.c b/windows/hax_entry.c
index 572a39b1..5fa56bd4 100644
--- a/windows/hax_entry.c
+++ b/windows/hax_entry.c
@@ -581,6 +581,21 @@ NTSTATUS HaxVmControl(PDEVICE_OBJECT DeviceObject, struct hax_vm_windows *ext,
             hax_vm_set_qemuversion(cvm, info);
             break;
         }
+        case HAX_VM_IOCTL_GPA_PROT: {
+            struct hax_gpa_prot_info *info;
+            int res;
+            if (inBufLength < sizeof(struct hax_gpa_prot_info)) {
+                ret = STATUS_INVALID_PARAMETER;
+                goto done;
+            }
+            info = (struct hax_gpa_prot_info *)inBuf;
+            res = hax_vm_gpa_prot(cvm, info);
+            if (res) {
+                ret = res == -EINVAL ? STATUS_INVALID_PARAMETER
+                      : STATUS_UNSUCCESSFUL;
+            }
+            break;
+        }
         default:
             ret = STATUS_INVALID_PARAMETER;
             break;
diff --git a/windows/hax_entry.h b/windows/hax_entry.h
index 5be9eadd..a9934ad1 100644
--- a/windows/hax_entry.h
+++ b/windows/hax_entry.h
@@ -161,4 +161,8 @@ extern PDRIVER_OBJECT HaxDriverObject;
 #define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION \
         CTL_CODE(HAX_DEVICE_TYPE, 0x910, METHOD_BUFFERED, FILE_ANY_ACCESS)
 
+/* API version 3.0 */
+#define HAX_VM_IOCTL_GPA_PROT \
+        CTL_CODE(HAX_DEVICE_TYPE, 0x915, METHOD_BUFFERED, FILE_ANY_ACCESS)
+
 #endif // HAX_WINDOWS_HAX_ENTRY_H_

From c835f4fbcbbfda790229a5bebbe8f5a0de8d43c4 Mon Sep 17 00:00:00 2001
From: Haitao Shan <hshan@google.com>
Date: Wed, 21 Mar 2018 15:31:53 -0700
Subject: [PATCH 2/2] Protection of HAXM's accessing guest GPA.

---
 core/gpa_space.c           |  51 +++++++----
 core/include/memory.h      |  13 ++-
 core/include/page_walker.h |   2 +-
 core/include/vtlb.h        |  18 ++--
 core/page_walker.c         |  47 +++++++---
 core/ramblock.c            |   1 +
 core/vcpu.c                | 173 +++++++++++++++++++++++++++++--------
 core/vtlb.c                | 133 ++++++++++++++++++----------
 include/hax.h              |   4 +-
 9 files changed, 318 insertions(+), 124 deletions(-)

diff --git a/core/gpa_space.c b/core/gpa_space.c
index 49abade1..b8c521d4 100644
--- a/core/gpa_space.c
+++ b/core/gpa_space.c
@@ -120,9 +120,9 @@ void gpa_space_remove_listener(hax_gpa_space *gpa_space,
 // hax_unmap_user_pages().
 static int gpa_space_map_range(hax_gpa_space *gpa_space, uint64 start_gpa,
                                int len, uint8 **buf, hax_kmap_user *kmap,
-                               bool *writable)
+                               bool *writable, uint64 *fault_gfn)
 {
-    uint64 gfn;
+    uint64 gfn, i;
     uint delta, size, npages;
     hax_memslot *slot;
     hax_ramblock *block;
@@ -144,6 +144,14 @@ static int gpa_space_map_range(hax_gpa_space *gpa_space, uint64 start_gpa,
     delta = (uint) (start_gpa - (gfn << PG_ORDER_4K));
     size = (uint) len + delta;
     npages = (size + PAGE_SIZE_4K - 1) >> PG_ORDER_4K;
+
+    // Check gpa protection bitmap
+    for (i = gfn; i < gfn + npages;)
+        if (gpa_space_chunk_protected(gpa_space, i, fault_gfn))
+            return -EPERM;
+        else
+            i = (i/HAX_CHUNK_NR_PAGES + 1)*HAX_CHUNK_NR_PAGES;
+
     slot = memslot_find(gpa_space, gfn);
     if (!slot) {
         hax_error("%s: start_gpa=0x%llx is reserved for MMIO\n", __func__,
@@ -194,7 +202,7 @@ static int gpa_space_map_range(hax_gpa_space *gpa_space, uint64 start_gpa,
 }
 
 int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
-                        uint8 *data)
+                        uint8 *data, uint64 *fault_gfn)
 {
     uint8 *buf;
     hax_kmap_user kmap;
@@ -205,10 +213,12 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
         return -EINVAL;
     }
 
-    ret = gpa_space_map_range(gpa_space, start_gpa, len, &buf, &kmap, NULL);
+    ret = gpa_space_map_range(gpa_space, start_gpa, len,
+                              &buf, &kmap, NULL, fault_gfn);
     if (ret < 0) {
-        hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx,"
-                  " len=%d\n", __func__, start_gpa, len);
+        if (ret != -EPERM)
+            hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx,"
+                      " len=%d\n", __func__, start_gpa, len);
         return ret;
     }
 
@@ -232,7 +242,7 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
 }
 
 int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
-                         uint8 *data)
+                         uint8 *data, uint64 *fault_gfn)
 {
     uint8 *buf;
     hax_kmap_user kmap;
@@ -245,10 +255,11 @@ int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
     }
 
     ret = gpa_space_map_range(gpa_space, start_gpa, len, &buf, &kmap,
-                              &writable);
+                              &writable, fault_gfn);
     if (ret < 0) {
-        hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx,"
-                  " len=%d\n", __func__, start_gpa, len);
+        if (ret != -EPERM)
+            hax_error("%s: gpa_space_map_range() failed: start_gpa=0x%llx,"
+                      " len=%d\n", __func__, start_gpa, len);
         return ret;
     }
     if (!writable) {
@@ -276,24 +287,26 @@ int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
     return nbytes;
 }
 
-void * gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn,
-                          hax_kmap_user *kmap, bool *writable)
+int gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn,
+                          hax_kmap_user *kmap, bool *writable,
+                          void **kva, uint64 *fault_gfn)
 {
     uint8 *buf;
     int ret;
-    void *kva;
 
     assert(gpa_space != NULL);
     assert(kmap != NULL);
     ret = gpa_space_map_range(gpa_space, gfn << PG_ORDER_4K, PAGE_SIZE_4K, &buf,
-                              kmap, writable);
+                              kmap, writable, fault_gfn);
     if (ret < PAGE_SIZE_4K) {
-        hax_error("%s: gpa_space_map_range() returned %d\n", __func__, ret);
-        return NULL;
+        if (ret != -EPERM)
+            hax_error("%s: gpa_space_map_range() returned %d\n", __func__, ret);
+        *kva = NULL;
+        return ret;
     }
-    kva = (void *) buf;
-    assert(kva != NULL);
-    return kva;
+    *kva = (void *) buf;
+    assert(*kva != NULL);
+    return 0;
 }
 
 void gpa_space_unmap_page(hax_gpa_space *gpa_space, hax_kmap_user *kmap)
diff --git a/core/include/memory.h b/core/include/memory.h
index 958205f3..1ede31a1 100644
--- a/core/include/memory.h
+++ b/core/include/memory.h
@@ -254,13 +254,15 @@ void gpa_space_remove_listener(hax_gpa_space *gpa_space,
 // |len|: The number of bytes to copy.
 // |data|: The destination buffer to copy the bytes into, whose size must be at
 //         least |len| bytes.
+// |fault_gpn|: The faulting gpn as a result of gpa range protection.
 // Returns the number of bytes actually copied, or one of the following error
 // codes:
 // -EINVAL: Invalid input, e.g. |data| is NULL, or the GPA range specified by
 //          |start_gpa| and |len| touches an MMIO region.
 // -ENOMEM: Unable to map the requested guest page frames into KVA space.
+// -EPARM:  Fault occurred due to violation of gpa range protection.
 int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
-                        uint8 *data);
+                        uint8 *data, uint64 *fault_gfn);
 
 // Copies the given number of bytes from the given buffer to guest RAM.
 // |gpa_space|: The |hax_gpa_space| of the guest.
@@ -270,6 +272,7 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
 // |len|: The number of bytes to copy.
 // |data|: The source buffer to copy the bytes from, whose size must be at least
 //         |len| bytes.
+// |fault_gpn|: The faulting gpn as a result of gpa range protection.
 // Returns the number of bytes actually copied, or one of the following error
 // codes:
 // -EINVAL: Invalid input, e.g. |data| is NULL, or the GPA range specified by
@@ -277,8 +280,9 @@ int gpa_space_read_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
 // -ENOMEM: Unable to map the requested guest page frames into KVA space.
 // -EACCES: The GPA range specified by |start_gpa| and |len| touches a ROM
 //          region.
+// -EPARM:  Fault occurred due to violation of gpa range protection.
 int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
-                         uint8 *data);
+                         uint8 *data, uint64 *fault_gfn);
 
 // Maps the given guest page frame into KVA space, stores the KVA mapping in the
 // given buffer, and returns the KVA. The caller must destroy the KVA mapping
@@ -291,8 +295,9 @@ int gpa_space_write_data(hax_gpa_space *gpa_space, uint64 start_gpa, int len,
 //             page frame is writable (i.e. maps to RAM). Can be NULL if the
 //             caller only wants to read from the page.
 // Returns NULL on error.
-void * gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn,
-                          hax_kmap_user *kmap, bool *writable);
+int gpa_space_map_page(hax_gpa_space *gpa_space, uint64 gfn,
+                       hax_kmap_user *kmap, bool *writable,
+                       void **kva, uint64 *fault_gfn);
 
 // Destroys the KVA mapping previously created by gpa_space_map_page().
 void gpa_space_unmap_page(hax_gpa_space *gpa_space, hax_kmap_user *kmap);
diff --git a/core/include/page_walker.h b/core/include/page_walker.h
index 4dd48762..8903114d 100644
--- a/core/include/page_walker.h
+++ b/core/include/page_walker.h
@@ -65,6 +65,6 @@ typedef uint64 ADDRESS;
 uint32 pw_perform_page_walk(IN struct vcpu_t *vcpu, IN uint64 virt_addr,
                             IN uint32 access, OUT uint64 *gpa_out,
                             OUT uint *order, IN bool set_ad_bits,
-                            IN bool is_fetch);
+                            IN bool is_fetch, OUT uint64 *fault_gfn);
 
 #endif  // HAX_CORE_PAGE_WALKER_H_
diff --git a/core/include/vtlb.h b/core/include/vtlb.h
index 211f570d..da1d218b 100644
--- a/core/include/vtlb.h
+++ b/core/include/vtlb.h
@@ -43,7 +43,8 @@ enum {
     TF_WRITE   = 0x00000002,    // Fault due to write
     TF_USER    = 0x00000004,    // Fault due to user mode
     TF_RSVD    = 0x00000008,    // Fault due to reserved bit violation
-    TF_EXEC    = 0x00000010     // Fault due to exec protection
+    TF_EXEC    = 0x00000010,    // Fault due to exec protection
+    TF_GPA_PROT= 0x00000020     // Fault due to gpa space protection
 };
 
 #define EXECUTION_DISABLE_MASK 0x8000000000000000ULL
@@ -100,16 +101,17 @@ void vcpu_invalidate_tlb_addr(struct vcpu_t *vcpu, vaddr_t va);
 uint vcpu_vtlb_alloc(struct vcpu_t *vcpu);
 void vcpu_vtlb_free(struct vcpu_t *vcpu);
 
-bool handle_vtlb(struct vcpu_t *vcpu);
+int handle_vtlb(struct vcpu_t *vcpu, uint64 *fault_gfn);
 
 uint vcpu_translate(struct vcpu_t *vcpu, vaddr_t va, uint access, paddr_t *pa,
-                    uint64 *len, bool update);
+                    uint64 *len, bool update, uint64 *fault_gfn);
 
-uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
-                               uint32 dst_buflen, uint32 size, uint flag);
-uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
+int vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
+                               uint32 dst_buflen, uint32 size, uint flag,
+                               uint32 *cnt_read, uint64 *fault_gfn);
+int vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
                                 uint32 dst_buflen, const void *src, uint32 size,
-                                uint flag);
+                                uint flag, uint32 *cnt_write, uint64 *fault_gfn);
 
 #ifdef CONFIG_HAX_EPT2
 /*
@@ -127,7 +129,7 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
  * -ENOMEM: Memory allocation/mapping error.
  */
 int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf,
-                           int len);
+                           int len, uint64 *fault_gfn);
 #endif  // CONFIG_HAX_EPT2
 
 void hax_inject_page_fault(struct vcpu_t *vcpu, mword error_code);
diff --git a/core/page_walker.c b/core/page_walker.c
index 661abd63..91136776 100644
--- a/core/page_walker.c
+++ b/core/page_walker.c
@@ -561,8 +561,9 @@ static void pw_update_ad_bits(
 uint32 pw_perform_page_walk(
         IN struct vcpu_t *vcpu, IN uint64 virt_addr, IN uint32 access,
         OUT uint64 *gpa_out, OUT uint *order, IN bool set_ad_bits,
-        IN bool is_fetch)
+        IN bool is_fetch, OUT uint64 *fault_gfn)
 {
+    int ret;
     uint32 retval = TF_OK;
     uint64 efer_value = vcpu->state->_efer;
     bool is_nxe = ((efer_value & IA32_EFER_XD) != 0);
@@ -617,9 +618,16 @@ uint32 pw_perform_page_walk(
         if (is_lme) {
             pml4t_gpa = first_table;
 #ifdef CONFIG_HAX_EPT2
-            pml4t_hva = gpa_space_map_page(&vcpu->vm->gpa_space,
-                                           pml4t_gpa >> PG_ORDER_4K,
-                                           &pml4t_kmap, NULL);
+            ret = gpa_space_map_page(&vcpu->vm->gpa_space,
+                                     pml4t_gpa >> PG_ORDER_4K,
+                                     &pml4t_kmap, NULL,
+                                     &pml4t_hva, fault_gfn);
+            if (ret < 0) {
+                retval = TF_FAILED;
+                if (ret == -EPERM)
+                    retval |= TF_GPA_PROT;
+                goto out;
+            }
 #else // !CONFIG_HAX_EPT2
 #if (!defined(__MACH__) && !defined(_WIN64))
             pml4t_hva = hax_map_gpfn(vcpu->vm, pml4t_gpa >> 12, is_kernel, cr3,
@@ -653,9 +661,16 @@ uint32 pw_perform_page_walk(
         }
 
 #ifdef CONFIG_HAX_EPT2
-        pdpt_hva = gpa_space_map_page(&vcpu->vm->gpa_space,
+        ret = gpa_space_map_page(&vcpu->vm->gpa_space,
                                       pdpt_gpa >> PG_ORDER_4K,
-                                      &pdpt_kmap, NULL);
+                                      &pdpt_kmap, NULL,
+                                      &pdpt_hva, fault_gfn);
+        if (ret < 0) {
+            retval = TF_FAILED;
+            if (ret == -EPERM)
+                retval |= TF_GPA_PROT;
+            goto out;
+        }
 #else // !CONFIG_HAX_EPT2
 #if (!defined(__MACH__) && !defined(_WIN64))
         pdpt_hva = hax_map_gpfn(vcpu->vm, pdpt_gpa >> 12, is_kernel, cr3, 1);
@@ -729,8 +744,14 @@ uint32 pw_perform_page_walk(
 
     pd_gpa = is_pae ? pw_retrieve_phys_addr(&pdpte_val, is_pae) : first_table;
 #ifdef CONFIG_HAX_EPT2
-    pd_hva = gpa_space_map_page(&vcpu->vm->gpa_space, pd_gpa >> PG_ORDER_4K,
-                                &pd_kmap, NULL);
+    ret = gpa_space_map_page(&vcpu->vm->gpa_space, pd_gpa >> PG_ORDER_4K,
+                             &pd_kmap, NULL, &pd_hva, fault_gfn);
+    if (ret < 0) {
+        retval = TF_FAILED;
+        if (ret == -EPERM)
+            retval |= TF_GPA_PROT;
+        goto out;
+    }
 #else // !CONFIG_HAX_EPT2
 #if (!defined(__MACH__) && !defined(_WIN64))
     pd_hva = hax_map_gpfn(vcpu->vm, pd_gpa >> 12, is_kernel, cr3, 2);
@@ -807,8 +828,14 @@ uint32 pw_perform_page_walk(
     *order = PG_ORDER_4K;
     pt_gpa = pw_retrieve_phys_addr(&pde_val, is_pae);
 #ifdef CONFIG_HAX_EPT2
-    pt_hva = gpa_space_map_page(&vcpu->vm->gpa_space, pt_gpa >> 12, &pt_kmap,
-                                NULL);
+    ret = gpa_space_map_page(&vcpu->vm->gpa_space, pt_gpa >> 12, &pt_kmap,
+                             NULL, &pt_hva, fault_gfn);
+    if (ret < 0) {
+        retval = TF_FAILED;
+        if (ret == -EPERM)
+            retval |= TF_GPA_PROT;
+        goto out;
+    }
 #else // !CONFIG_HAX_EPT2
 #if (!defined(__MACH__) && !defined(_WIN64))
     pt_hva = hax_map_gpfn(vcpu->vm, pt_gpa >> 12, is_kernel, cr3, 1);
diff --git a/core/ramblock.c b/core/ramblock.c
index 4fb1b4ba..d1294414 100644
--- a/core/ramblock.c
+++ b/core/ramblock.c
@@ -354,6 +354,7 @@ hax_chunk * ramblock_get_chunk(hax_ramblock *block, uint64 uva_offset,
             hax_error("%s: Failed to allocate chunk: ret=%d, index=%llu,"
                       " base_uva=0x%llx, size=0x%llx, was_clear=%d\n", __func__,
                       ret, chunk_index, chunk_base_uva, chunk_size, was_clear);
+            DbgBreakPoint();
             return NULL;
         }
         assert(chunk != NULL);
diff --git a/core/vcpu.c b/core/vcpu.c
index e902a167..9d4d73b3 100644
--- a/core/vcpu.c
+++ b/core/vcpu.c
@@ -1404,12 +1404,14 @@ static int write_low_bits(uint64 *pdst, uint64 src, uint8 size)
     return 0;
 }
 
-static void handle_mmio_post(struct vcpu_t *vcpu, struct hax_fastmmio *hft)
+static int handle_mmio_post(struct vcpu_t *vcpu, struct hax_fastmmio *hft,
+                            uint64 *fault_gfn)
 {
     struct vcpu_state_t *state = vcpu->state;
+    int ret;
 
     if (hft->direction)
-        return;
+        return 0;
 
     if (vcpu->post_mmio.op == VCPU_POST_MMIO_WRITE_REG) {
         uint64 value;
@@ -1438,29 +1440,45 @@ static void handle_mmio_post(struct vcpu_t *vcpu, struct hax_fastmmio *hft)
                        hft->size);
     } else if (vcpu->post_mmio.op == VCPU_POST_MMIO_WRITE_MEM) {
         // Assume little-endian
-        if (!vcpu_write_guest_virtual(vcpu, vcpu->post_mmio.va, hft->size,
-                                      (uint8 *)&hft->value, hft->size, 0)) {
-            hax_panic_vcpu(vcpu, "Error writing %u bytes to guest RAM "
+        uint32 cnt_write;
+        if (!(ret = vcpu_write_guest_virtual(vcpu, vcpu->post_mmio.va,
+                                      hft->size,
+                                      (uint8 *)&hft->value, hft->size,
+                                      0, &cnt_write, fault_gfn))) {
+            if (ret != -EPERM)
+                hax_panic_vcpu(vcpu, "Error writing %u bytes to guest RAM "
                            "(va=0x%llx, value=0x%llx)\n", hft->size,
                            vcpu->post_mmio.va, hft->value);
+            return ret;
         }
     } else {
         hax_warning("Unknown post-MMIO operation %d\n", vcpu->post_mmio.op);
     }
+    return 0;
 }
 
-static void handle_io_post(struct vcpu_t *vcpu, struct hax_tunnel *htun)
+static int handle_io_post(struct vcpu_t *vcpu, struct hax_tunnel *htun)
 {
     int size;
     struct vcpu_state_t *state = vcpu->state;
 
     if (htun->io._direction == HAX_IO_OUT)
-        return;
+        return 0;
 
     if (htun->io._flags == 1) {
+        int ret;
+        uint32 cnt_write;
+        uint64 fault_gfn;
         size = htun->io._count * htun->io._size;
-        if (!vcpu_write_guest_virtual(vcpu, htun->io._vaddr, IOS_MAX_BUFFER,
-                                      (void *)vcpu->io_buf, size, 0)) {
+        if (!(ret = vcpu_write_guest_virtual(vcpu, htun->io._vaddr,
+                                      IOS_MAX_BUFFER, (void *)vcpu->io_buf,
+                                      size, 0, &cnt_write, &fault_gfn))) {
+            if (ret == -EPERM) {
+                htun->_exit_status = HAX_EXIT_GPAPROT;
+                htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+                htun->gpaprot.access = 1;
+                return ret;
+            }
             hax_panic_vcpu(vcpu, "Unexpected page fault, kill the VM!\n");
             dump_vmcs(vcpu);
         }
@@ -1483,12 +1501,14 @@ static void handle_io_post(struct vcpu_t *vcpu, struct hax_tunnel *htun)
             }
         }
     }
+    return 0;
 }
 
 int vcpu_execute(struct vcpu_t *vcpu)
 {
     struct hax_tunnel *htun = vcpu->tunnel;
-    int err = 0;
+    int err = 0, ret = 0;
+    uint64 fault_gfn;
 
     hax_mutex_lock(vcpu->tmutex);
     hax_debug("vcpu begin to run....\n");
@@ -1502,10 +1522,20 @@ int vcpu_execute(struct vcpu_t *vcpu)
     hax_debug("vcpu begin to run....in PE\n");
 
     if (htun->_exit_status == HAX_EXIT_IO) {
-        handle_io_post(vcpu, htun);
+        ret = handle_io_post(vcpu, htun);
+        if (ret == -EPERM)
+            goto out;
     }
     if (htun->_exit_status == HAX_EXIT_FAST_MMIO) {
-        handle_mmio_post(vcpu, (struct hax_fastmmio *)vcpu->io_buf);
+        ret = handle_mmio_post(vcpu, (struct hax_fastmmio *)vcpu->io_buf,
+                               &fault_gfn);
+        if (ret == -EPERM) {
+            htun->_exit_status = HAX_EXIT_GPAPROT;
+            htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+            htun->gpaprot.access = 1;
+            goto out;
+        }
+
     }
     err = cpu_vmx_execute(vcpu, htun);
     vcpu_is_panic(vcpu);
@@ -1690,9 +1720,9 @@ void vcpu_vmwrite_all(struct vcpu_t *vcpu, int force_tlb_flush)
 // a) The guest is running in EPT mode (see IASDM Vol. 3C 26.3.2.4), and
 // b) Preemption is enabled for the current CPU.
 // Returns 0 on success, < 0 on error.
-static int vcpu_prepare_pae_pdpt(struct vcpu_t *vcpu)
+static int vcpu_prepare_pae_pdpt(struct vcpu_t *vcpu, struct hax_tunnel *htun)
 {
-    uint64 cr3 = vcpu->state->_cr3;
+    uint64 cr3 = vcpu->state->_cr3, fault_gfn;
     int pdpt_size = (int)sizeof(vcpu->pae_pdptes);
 #ifdef CONFIG_HAX_EPT2
     // CR3 is the GPA of the page-directory-pointer table. According to IASDM
@@ -1706,7 +1736,13 @@ static int vcpu_prepare_pae_pdpt(struct vcpu_t *vcpu)
     // simply disabling IRQs). Therefore, it is not safe to call this function
     // with preemption disabled.
     ret = gpa_space_read_data(&vcpu->vm->gpa_space, gpa, pdpt_size,
-                              (uint8 *)vcpu->pae_pdptes);
+                              (uint8 *)vcpu->pae_pdptes, &fault_gfn);
+    if (ret == -EPERM) {
+        htun->_exit_status = HAX_EXIT_GPAPROT;
+        htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+        htun->gpaprot.access = 1;
+        return ret;
+    }
     // The PAE PDPT cannot span two page frames
     if (ret != pdpt_size) {
         hax_error("%s: Failed to read PAE PDPT: cr3=0x%llx, ret=%d\n", __func__,
@@ -2013,7 +2049,8 @@ static bool is_mmio_address(struct vcpu_t *vcpu, paddr_t gpa)
 }
 
 // Returns 0 on success, < 0 on error, > 0 if HAX_EXIT_MMIO is necessary.
-static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc)
+static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc,
+                              uint64 *fault_gfn)
 {
     uint64 cs_base = vcpu->state->_cs.base;
     uint64 rip = vcpu->state->_rip;
@@ -2037,6 +2074,7 @@ static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc)
     int use_16bit_operands;
     uint8 operand_size;
     bool has_esc = false;  // Whether opcode begins with 0f (escape opcode byte)
+    int ret = 0;
 
     if (!qemu_support_fastmmio(vcpu)) {
         hax_warning("vcpu_simple_decode: QEMU does not support fast MMIO!\n");
@@ -2050,7 +2088,10 @@ static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc)
     // limit and privilege checks
     va = is_64bit_mode ? rip : cs_base + rip;
 #ifdef CONFIG_HAX_EPT2
-    if (mmio_fetch_instruction(vcpu, va, instr, INSTR_MAX_LEN)) {
+    ret = mmio_fetch_instruction(vcpu, va, instr, INSTR_MAX_LEN, fault_gfn);
+    if (ret) {
+        if (ret == -EPERM)
+            return ret;
         hax_panic_vcpu(vcpu, "%s: mmio_fetch_instruction() failed: vcpu_id=%u,"
                        " gva=0x%llx (CS:IP=0x%llx:0x%llx), mmio_gpa=0x%llx\n",
                        __func__, vcpu->vcpu_id, va, cs_base, rip, dc->gpa);
@@ -2330,8 +2371,16 @@ static int vcpu_simple_decode(struct vcpu_t *vcpu, struct decode *dc)
             }
             src_pa = dst_pa = 0xffffffffffffffffULL;
             // TODO: Can vcpu_translate() fail?
-            vcpu_translate(vcpu, src_va, 0, &src_pa, NULL, true);
-            vcpu_translate(vcpu, dst_va, 0, &dst_pa, NULL, true);
+            ret = vcpu_translate(vcpu, src_va, 0, &src_pa, NULL, true,
+                                 fault_gfn);
+            if (ret == TF_FAILED | TF_GPA_PROT) {
+                return -EPERM;
+            }
+            ret = vcpu_translate(vcpu, dst_va, 0, &dst_pa, NULL, true,
+                                 fault_gfn);
+            if (ret == TF_FAILED | TF_GPA_PROT) {
+                return -EPERM;
+            }
             is_src_mmio = src_pa == dc->gpa || is_mmio_address(vcpu, src_pa);
             is_dst_mmio = dst_pa == dc->gpa || is_mmio_address(vcpu, dst_pa);
             if (is_src_mmio && is_dst_mmio) {
@@ -2480,8 +2529,13 @@ static int hax_setup_fastmmio(struct vcpu_t *vcpu, struct hax_tunnel *htun,
             break;
         }
         case OPCODE_MOVS_MEM_TO_IOMEM: {
+            uint32 cnt_read;
+            uint64 fault_gfn;
             // Source operand (saved in dec->va) is a non-I/O GVA
-            if (!vcpu_read_guest_virtual(vcpu, dec->va, buf, 8, dec->size, 0)) {
+            if (!vcpu_read_guest_virtual(vcpu, dec->va, buf, 8, dec->size, 0,
+                                         &cnt_read, &fault_gfn)) {
+                // hax_simple_decode should have detect protection fault
+                // and we do not do it twice here.
                 hax_panic_vcpu(vcpu, "Error reading %u bytes from guest RAM"
                                " (va=0x%llx, DS:RSI=0x%llx:0x%llx)\n",
                                dec->size, dec->va, vcpu->state->_ds.base,
@@ -2618,7 +2672,7 @@ static int exit_exc_nmi(struct vcpu_t *vcpu, struct hax_tunnel *htun)
 {
     struct vcpu_state_t *state = vcpu->state;
     interruption_info_t exit_intr_info;
-    uint64 cr0;
+    uint64 cr0, fault_gfn;
 
     exit_intr_info.raw = vmx(vcpu, exit_intr_info).raw;
     htun->_exit_reason = vmx(vcpu, exit_reason).basic_reason;
@@ -2631,22 +2685,41 @@ static int exit_exc_nmi(struct vcpu_t *vcpu, struct hax_tunnel *htun)
         }
         case EXC_PAGEFAULT: {
             if (vtlb_active(vcpu)) {
-                if (handle_vtlb(vcpu))
+                int ret;
+                if ((ret = handle_vtlb(vcpu, &fault_gfn)) > 0)
                     return HAX_RESUME;
-
+                if (ret == -EPERM) {
+                    htun->_exit_status = HAX_EXIT_GPAPROT;
+                    htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+                    htun->gpaprot.access = 1;
+                    return HAX_EXIT;
+                }
                 paddr_t pa;
                 struct decode dec;
-                int ret;
                 vaddr_t cr2 = vmx(vcpu, exit_qualification).address;
+                uint64 fault_gfn;
 
-                ret = vcpu_simple_decode(vcpu, &dec);
+                ret = vcpu_simple_decode(vcpu, &dec, &fault_gfn);
                 if (ret < 0) {
+                    if (ret == -EPERM) {
+                        htun->_exit_status = HAX_EXIT_GPAPROT;
+                        htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+                        htun->gpaprot.access = 0x1;
+                        return HAX_EXIT;
+                    }
                     // vcpu_simple_decode() has called hax_panic_vcpu()
                     return HAX_RESUME;
                 } else if (ret > 0) {
                     handle_mem_fault(vcpu, htun);
                 } else {
-                    vcpu_translate(vcpu, cr2, 0, &pa, (uint64_t *)NULL, 0);
+                    ret = vcpu_translate(vcpu, cr2, 0, &pa, (uint64_t *)NULL,
+                                         0, &fault_gfn);
+                    if (ret == -EPERM) {
+                        htun->_exit_status = HAX_EXIT_GPAPROT;
+                        htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+                        htun->gpaprot.access = 0x1;
+                        return HAX_EXIT;
+                    }
                     dec.gpa = pa & 0xffffffff;
                     if (hax_setup_fastmmio(vcpu, htun, &dec)) {
                         // hax_setup_fastmmio() has called hax_panic_vcpu()
@@ -3168,13 +3241,16 @@ static int exit_cr_access(struct vcpu_t *vcpu, struct hax_tunnel *htun)
                 // Vol. 3A 4.1.2, Figure 4-1) and needs to load its PDPTE
                 // registers, or already in PAE mode and needs to reload those
                 // registers
-                int ret = vcpu_prepare_pae_pdpt(vcpu);
-                if (ret) {
-                    hax_panic_vcpu(vcpu, "vCPU #%u failed to (re)load PDPT for"
+                int ret = vcpu_prepare_pae_pdpt(vcpu, htun);
+                switch (ret) {
+                    case HAX_EXIT:
+                        return ret;
+                    default:
+                        hax_panic_vcpu(vcpu, "vCPU #%u failed to (re)load PDPT for"
                                    " EPT+PAE mode: ret=%d\n",
                                    vcpu->vcpu_id, ret);
-                    dump_vmcs(vcpu);
-                    return HAX_RESUME;
+                        dump_vmcs(vcpu);
+                        return HAX_RESUME;
                 }
             }
 
@@ -3331,6 +3407,9 @@ static int handle_string_io(struct vcpu_t *vcpu, exit_qualification_t *qual,
     struct vcpu_state_t *state = vcpu->state;
     uint real_size, count, required_size;
     vaddr_t start, rindex;
+    int ret;
+    uint32 cnt_read, cnt_write;
+    uint64 fault_gfn;
 
     htun->io._flags = 1;
 
@@ -3368,14 +3447,30 @@ static int handle_string_io(struct vcpu_t *vcpu, exit_qualification_t *qual,
     }
 
     if (qual->io.direction == HAX_IO_OUT) {
-        if (!vcpu_read_guest_virtual(vcpu, start, vcpu->io_buf, IOS_MAX_BUFFER,
-                                     real_size, 0))
+        if (!(ret = vcpu_read_guest_virtual(vcpu, start, vcpu->io_buf,
+                                            IOS_MAX_BUFFER, real_size, 0,
+                                            &cnt_read, &fault_gfn))) {
+            if (ret == -EPERM) {
+                htun->_exit_status = HAX_EXIT_GPAPROT;
+                htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+                htun->gpaprot.access = 1;
+                return HAX_EXIT;
+            }
             return HAX_RESUME;
+        }
     } else {
         // HACK: Just ensure the buffer is mapped in the kernel.
-        if (!vcpu_write_guest_virtual(vcpu, start, IOS_MAX_BUFFER, vcpu->io_buf,
-                                      real_size, 0))
+        if (!(ret = vcpu_write_guest_virtual(vcpu, start, IOS_MAX_BUFFER,
+                                             vcpu->io_buf, real_size, 0,
+                                             &cnt_write, &fault_gfn))) {
+            if (ret == -EPERM) {
+                htun->_exit_status = HAX_EXIT_GPAPROT;
+                htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+                htun->gpaprot.access = 1;
+                return HAX_EXIT;
+            }
             return HAX_RESUME;
+        }
     }
 
     if (required_size <= IOS_MAX_BUFFER) {
@@ -4005,8 +4100,14 @@ static int exit_ept_violation(struct vcpu_t *vcpu, struct hax_tunnel *htun)
 mmio_handler:
 #endif
 
-    ret = vcpu_simple_decode(vcpu, &dec);
+    ret = vcpu_simple_decode(vcpu, &dec, &fault_gfn);
     if (ret < 0) {
+        if (ret == -EPERM) {
+            htun->_exit_status = HAX_EXIT_GPAPROT;
+            htun->gpaprot.gpa = fault_gfn << PG_ORDER_4K;
+            htun->gpaprot.access = 1;
+            return HAX_EXIT;
+        }
         // vcpu_simple_decode() has called hax_panic_vcpu()
         return HAX_RESUME;
     } else if (ret > 0) {
diff --git a/core/vtlb.c b/core/vtlb.c
index f2b859ef..bf5f1549 100644
--- a/core/vtlb.c
+++ b/core/vtlb.c
@@ -72,7 +72,7 @@ static pagemode_t vcpu_get_pagemode(struct vcpu_t *vcpu);
 static pte64_t * vtlb_get_pde(hax_mmu_t *mmu, vaddr_t va, bool is_shadow);
 static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access,
                             paddr_t *pa, uint *order, uint64 *flags,
-                            bool update, bool prefetch);
+                            bool update, bool prefetch, uint64 *fault_gfn);
 
 static void vtlb_update_pde(pte64_t *pde, pte64_t *shadow_pde,
                             struct hax_page *page)
@@ -413,7 +413,8 @@ void vtlb_invalidate(hax_mmu_t *mmu)
 }
 
 static uint vtlb_handle_page_fault(struct vcpu_t *vcpu, pagemode_t guest_mode,
-                                   paddr_t pdir, vaddr_t va, uint32 access)
+                                   paddr_t pdir, vaddr_t va, uint32 access,
+                                   uint64 *fault_gfn)
 {
     uint r;
     paddr_t gpa;
@@ -474,7 +475,7 @@ static uint vtlb_handle_page_fault(struct vcpu_t *vcpu, pagemode_t guest_mode,
         }
         case PM_2LVL: {
             r = vcpu_mmu_walk(vcpu, va, access, &gpa, &tlb.guest_order,
-                              &tlb.flags, true, /*true*/false);
+                              &tlb.flags, true, /*true*/false, fault_gfn);
             break;
         }
         default: {
@@ -538,8 +539,9 @@ uint64 vtlb_get_cr3(struct vcpu_t *vcpu)
  */
 static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access,
                             paddr_t *pa, uint *order, uint64 *flags,
-                            bool update, bool prefetch)
+                            bool update, bool prefetch, uint64 *fault_gfn)
 {
+    int ret;
     uint lvl, idx;
     void *pte_va;
 #ifdef CONFIG_HAX_EPT2
@@ -585,9 +587,14 @@ static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access,
         // Fetch the page table entry.
         idx = pte32_get_idx(lvl, va);
 #ifdef CONFIG_HAX_EPT2
-        pte_va = gpa_space_map_page(&vcpu->vm->gpa_space,
-                                    gpt_base >> PG_ORDER_4K, &pte_kmap,
-                                    &writable);
+        ret = gpa_space_map_page(&vcpu->vm->gpa_space,
+                                 gpt_base >> PG_ORDER_4K, &pte_kmap,
+                                 &writable, &pte_va, fault_gfn);
+        if (ret < 0) {
+            if (ret == -EPERM)
+                return TF_FAILED | TF_GPA_PROT;
+            return TF_FAILED;
+        }
 #else // !CONFIG_HAX_EPT2
 #if (!defined(__MACH__) && !defined(_WIN64))
         pte_va = hax_map_gpfn(vcpu->vm, gpt_base >> 12, is_kernel, g_cr3, lvl);
@@ -778,14 +785,15 @@ static uint32 vcpu_mmu_walk(struct vcpu_t *vcpu, vaddr_t va, uint32 access,
     return TF_OK;
 }
 
-bool handle_vtlb(struct vcpu_t *vcpu)
+int handle_vtlb(struct vcpu_t *vcpu, uint64 *fault_gfn)
 {
     uint32 access = vmx(vcpu, exit_exception_error_code);
     pagemode_t mode = vcpu_get_pagemode(vcpu);
     paddr_t pdir = vcpu->state->_cr3 & (mode == PM_PAE ? ~0x1fULL : ~0xfffULL);
     vaddr_t cr2 = vmx(vcpu, exit_qualification).address;
 
-    uint32 ret = vtlb_handle_page_fault(vcpu, mode, pdir, cr2, access);
+    uint32 ret = vtlb_handle_page_fault(vcpu, mode, pdir, cr2, access,
+                                        fault_gfn);
 
     hax_debug("handle vtlb fault @%llx\n", cr2);
     if (ret == 0) {
@@ -798,6 +806,9 @@ bool handle_vtlb(struct vcpu_t *vcpu)
         return 0;
     }
 
+    if (ret == (TF_FAILED | TF_GPA_PROT))
+        return -EPERM;
+
     // Otherwise, inject PF into guest
     access = ret & (vcpu->state->_efer & IA32_EFER_XD ? 0x1f : 0x0f);
     vcpu->state->_cr2 = cr2;
@@ -845,35 +856,45 @@ static inline void * mmio_map_guest_virtual_page_fast(struct vcpu_t *vcpu,
     return vcpu->mmio_fetch.kva;
 }
 
-static void * mmio_map_guest_virtual_page_slow(struct vcpu_t *vcpu, uint64 gva,
-                                               hax_kmap_user *kmap)
+static int mmio_map_guest_virtual_page_slow(struct vcpu_t *vcpu, uint64 gva,
+                                            hax_kmap_user *kmap,
+                                            void **addr, uint64 *fault_gfn)
 {
     uint64 gva_aligned = gva & pgmask(PG_ORDER_4K);
     uint64 gpa;
     uint ret;
     void *kva;
 
-    ret = vcpu_translate(vcpu, gva_aligned, 0, &gpa, NULL, true);
+    ret = vcpu_translate(vcpu, gva_aligned, 0, &gpa, NULL, true, fault_gfn);
     if (ret) {
-        hax_error("%s: vcpu_translate() returned 0x%x: vcpu_id=%u,"
+        if (ret != -EPERM)
+            hax_error("%s: vcpu_translate() returned 0x%x: vcpu_id=%u,"
                   " gva=0x%llx\n", __func__, ret, vcpu->vcpu_id, gva);
         // TODO: Inject a guest page fault?
-        return NULL;
+        *addr = NULL;
+        return ret;
     }
     hax_debug("%s: gva=0x%llx => gpa=0x%llx, vcpu_id=0x%u\n", __func__,
               gva_aligned, gpa, vcpu->vcpu_id);
 
-    kva = gpa_space_map_page(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K, kmap,
-                             NULL);
+    ret = gpa_space_map_page(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K, kmap,
+                             NULL, &kva, fault_gfn);
+    if (ret == -EPERM) {
+        *addr = NULL;
+        return ret;
+    }
     if (!kva) {
         hax_error("%s: gpa_space_map_page() failed: vcpu_id=%u, gva=0x%llx,"
                   " gpa=0x%llx\n", __func__, vcpu->vcpu_id, gva, gpa);
-        return NULL;
+        *addr = NULL;
+        return ret;
     }
-    return kva;
+    *addr = kva;
+    return 0;
 }
 
-int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len)
+int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf,
+                           int len, uint64 *fault_gfn)
 {
     uint64 end_gva;
     uint8 *src_buf;
@@ -885,12 +906,16 @@ int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len)
     assert(len > 0 && len <= 15);
     end_gva = gva + (uint)len - 1;
     if ((gva >> PG_ORDER_4K) != (end_gva >> PG_ORDER_4K)) {
-        uint32 ret;
+        int ret;
+        uint32 cnt_read;
 
         hax_info("%s: GVA range spans two pages: gva=0x%llx, len=%d\n",
                  __func__, gva, len);
-        ret = vcpu_read_guest_virtual(vcpu, gva, buf, (uint)len, (uint)len, 0);
+        ret = vcpu_read_guest_virtual(vcpu, gva, buf, (uint)len, (uint)len, 0,
+                                      &cnt_read, fault_gfn);
         if (!ret) {
+            if (ret == -EPERM)
+                return ret;
             hax_error("%s: vcpu_read_guest_virtual() failed: vcpu_id=%u,"
                       " gva=0x%llx, len=%d\n", __func__, vcpu->vcpu_id, gva,
                       len);
@@ -901,8 +926,11 @@ int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len)
 
     src_buf = mmio_map_guest_virtual_page_fast(vcpu, gva, len);
     if (!src_buf) {
-        src_buf = mmio_map_guest_virtual_page_slow(vcpu, gva,
-                                                   &vcpu->mmio_fetch.kmap);
+        int ret = mmio_map_guest_virtual_page_slow(vcpu, gva,
+                                                   &vcpu->mmio_fetch.kmap,
+                                                   &src_buf, fault_gfn);
+        if (ret == -EPERM)
+            return ret;
         if (!src_buf) {
             return -ENOMEM;
         }
@@ -929,12 +957,14 @@ int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64 gva, uint8 *buf, int len)
  * If flag is 2, the memory read is for internal use. It does not update the
  * guest page tables. It returns the number of bytes read.
  */
-uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
-                               uint32 dst_buflen, uint32 size, uint flag)
+int vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
+                               uint32 dst_buflen, uint32 size, uint flag,
+                               uint32 *cnt_read, uint64 *fault_gfn)
 {
     // TBD: use guest CPL for access checks
     char *dstp = dst;
     uint32 offset = 0;
+    int ret = 0;
 #ifdef CONFIG_HAX_EPT2
     int len2;
 #else // !CONFIG_HAX_EPT2
@@ -957,10 +987,15 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
     while (offset < size) {
         paddr_t gpa;
         uint64 len = size - offset;
-        uint r = vcpu_translate(vcpu, addr + offset, 0, &gpa, &len, flag != 2);
+        uint r = vcpu_translate(vcpu, addr + offset, 0, &gpa, &len, flag != 2,
+                                fault_gfn);
         if (r != 0) {
-            if (flag != 0)
-                return offset;  // Number of bytes successfully read
+            if (r == (TF_FAILED | TF_GPA_PROT))
+                return -EPERM;
+            if (flag != 0) {
+                *cnt_read = offset;
+                return true;  // Number of bytes successfully read
+            }
             if (r & TF_GP2HP) {
                 hax_error("read_guest_virtual(%llx, %x) failed\n", addr, size);
             }
@@ -976,12 +1011,14 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
 //      }
 #ifdef CONFIG_HAX_EPT2
         len2 = gpa_space_read_data(&vcpu->vm->gpa_space, gpa, (int)len,
-                                   (uint8 *)(dstp + offset));
+                                   (uint8 *)(dstp + offset), fault_gfn);
         if (len2 <= 0) {
-            hax_panic_vcpu(
+            if (len2 != -EPERM)
+                hax_panic_vcpu(
                     vcpu, "read guest virtual error, gpa:0x%llx, len:0x%llx\n",
                     gpa, len);
-            return false;
+            *cnt_read = 0;
+            return len2;
         } else {
             len = (uint64)len2;
         }
@@ -1006,7 +1043,8 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
         offset += len;
     }
 
-    return flag != 0 ? size : true;
+    *cnt_read = size;
+    return true;
 }
 
 /*
@@ -1021,9 +1059,9 @@ uint32 vcpu_read_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr, void *dst,
  * A flag value of 2 is implemented, but not used. It does not update the guest
  * page tables. It returns the number of bytes written.
  */
-uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
-                                uint32 dst_buflen, const void *src, uint32 size,
-                                uint flag)
+int vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
+                             uint32 dst_buflen, const void *src, uint32 size,
+                             uint flag, uint32 *cnt_write, uint64 *fault_gfn)
 {
     // TODO: use guest CPL for access checks
     const char *srcp = src;
@@ -1052,10 +1090,14 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
         paddr_t gpa;
         uint64 len = size - offset;
         uint r = vcpu_translate(vcpu, addr + offset, TF_WRITE, &gpa, &len,
-                                flag != 2);
+                                flag != 2, fault_gfn);
         if (r != 0) {
-            if (flag != 0)
-                return offset;  // Number of bytes successfully written
+            if (r == (TF_FAILED | TF_GPA_PROT))
+                return -EPERM;
+            if (flag != 0) {
+                *cnt_write = offset;
+                return true;  // Number of bytes successfully written
+            }
             if (r & TF_GP2HP) {
                 hax_panic_vcpu(vcpu, "write_guest_virtual(%llx, %x) failed\n",
                                addr, size);
@@ -1068,12 +1110,14 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
         }
 #ifdef CONFIG_HAX_EPT2
         len2 = (uint64)gpa_space_write_data(&vcpu->vm->gpa_space, gpa, len,
-                                            (uint8 *)(srcp + offset));
+                                           (uint8 *)(srcp + offset), fault_gfn);
         if (len2 <= 0) {
-            hax_panic_vcpu(
+            if (len2 != -EPERM)
+                hax_panic_vcpu(
                     vcpu, "write guest virtual error, gpa:0x%llx, len:0x%llx\n",
                     gpa, len);
-            return false;
+            *cnt_write = 0;
+            return len2;
         } else {
             len = len2;
         }
@@ -1098,7 +1142,8 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
         offset += len;
     }
 
-    return flag != 0 ? size : true;
+    *cnt_write = offset;
+    return true;
 }
 
 /*
@@ -1112,7 +1157,7 @@ uint32 vcpu_write_guest_virtual(struct vcpu_t *vcpu, vaddr_t addr,
  * number otherwise.
  */
 uint vcpu_translate(struct vcpu_t *vcpu, vaddr_t va, uint access, paddr_t *pa,
-                    uint64 *len, bool update)
+                    uint64 *len, bool update, uint64 *fault_gfn)
 {
     pagemode_t mode = vcpu_get_pagemode(vcpu);
     uint order = 0;
@@ -1133,7 +1178,7 @@ uint vcpu_translate(struct vcpu_t *vcpu, vaddr_t va, uint access, paddr_t *pa,
         case PM_PAE:
         case PM_PML4: {
             r = pw_perform_page_walk(vcpu, va, access, pa, &order, update,
-                                     false);
+                                     false, fault_gfn);
             break;
         }
         default: {
diff --git a/include/hax.h b/include/hax.h
index 75d278d0..81c9b7c9 100644
--- a/include/hax.h
+++ b/include/hax.h
@@ -38,8 +38,8 @@
 
 extern int hax_page_size;
 
-#define HAX_CUR_VERSION    0x0004
-#define HAX_COMPAT_VERSION 0x0001
+#define HAX_CUR_VERSION    0x0005
+#define HAX_COMPAT_VERSION 0x0002
 
 // EPT2 refers to the new memory virtualization engine, which implements lazy
 // allocation, and therefore greatly speeds up ALLOC_RAM and SET_RAM VM ioctls