Skip to content

Commit

Permalink
Merge branch 'cache' into 'master'
Browse files Browse the repository at this point in the history
Improve the instruction cache

See merge request mempool/mempool!76
  • Loading branch information
SamuelRiedel committed Jun 24, 2021
2 parents bb8cfb0 + 73e6c1e commit ca90bbd
Show file tree
Hide file tree
Showing 11 changed files with 293 additions and 88 deletions.
1 change: 1 addition & 0 deletions Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ sources:
- hardware/src/address_scrambler.sv
- hardware/src/axi2mem.sv
- hardware/src/bootrom.sv
- hardware/src/latch_scm.sv
# Level 1
- hardware/src/mempool_tile.sv
# Level 2
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## Unreleased


## 0.2.0 - 2021-03-29

### Added
- Assertion checking that Snitch's instruction interface is stable during stalls

### Changed
- Update `axi` dependency to 0.27.1
- Change I$ policy to avoid evicting the cache-line currently in use
- Make the L0 cache's data latch-based and double its size
- Make the L1 cache's tag latch-based
- Serialize the L1 lookup

### Fixed
- Add a workaround for a Modelsim 2019 bug in the `axi_demux`
Expand Down
3 changes: 3 additions & 0 deletions hardware/deps/snitch/src/snitch_icache/snitch_icache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ module snitch_icache #(
parameter int FILL_AW = -1,
/// Fill interface data width. Power of two; >= 8.
parameter int FILL_DW = -1,
/// Replace the L1 tag banks with latch-based SCM.
parameter bit L1_TAG_SCM = 0,
/// This reduces area impact at the cost of
/// increased hassle of having latches in
/// the design.
Expand Down Expand Up @@ -88,6 +90,7 @@ module snitch_icache #(
FETCH_DW: FETCH_DW,
FILL_AW: FILL_AW,
FILL_DW: FILL_DW,
L1_TAG_SCM: L1_TAG_SCM,
EARLY_LATCH: EARLY_LATCH,

FETCH_ALIGN: $clog2(FETCH_DW/8),
Expand Down
17 changes: 16 additions & 1 deletion hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #(
assign hit_prefetch_any = |hit_prefetch;
assign miss = ~hit_any & in_valid_i & ~pending_refill_q;

logic clk_inv;
tc_clk_inverter i_clk_inv (
.clk_i (clk_i),
.clk_o (clk_inv)
);

for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_array
// Tag Array
always_ff @(posedge clk_i or negedge rst_ni) begin
Expand All @@ -139,12 +145,21 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #(
end
end
if (CFG.EARLY_LATCH) begin : gen_latch
logic clk_vld;
tc_clk_gating i_clk_gate (
.clk_i (clk_inv ),
.en_i (validate_strb[i]),
.test_en_i (1'b0 ),
.clk_o (clk_vld )
);
// Data Array
/* verilator lint_off NOLATCH */
always_latch begin
if (clk_i && validate_strb[i]) begin
if (clk_vld) begin
data[i] <= out_rsp_data_i;
end
end
/* verilator lint_on NOLATCH */
end else begin : gen_ff
`FFLNR(data[i], out_rsp_data_i, validate_strb[i], clk_i)
end
Expand Down
166 changes: 109 additions & 57 deletions hardware/deps/snitch/src/snitch_icache/snitch_icache_lookup.sv
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
// permission from ETH Zurich.
//
// Fabian Schuiki <[email protected]>
// Samuel Riedel <[email protected]>

`include "common_cells/registers.svh"

/// An actual cache lookup.
module snitch_icache_lookup #(
Expand Down Expand Up @@ -41,20 +44,39 @@ module snitch_icache_lookup #(
input logic write_valid_i,
output logic write_ready_o
);
logic valid_and_hit;
assign valid_and_hit = out_valid_o & out_hit_o;

`ifndef SYNTHESIS
initial assert(CFG != '0);
`endif

localparam int unsigned DataAddrWdith = CFG.SET_ALIGN + CFG.COUNT_ALIGN;

typedef struct packed {
logic [CFG.FETCH_AW-1:0] addr;
logic [CFG.COUNT_ALIGN-1:0] cset;
logic [CFG.LINE_WIDTH-1:0] data;
logic [CFG.ID_WIDTH_REQ-1:0] id;
logic write;
} req_t;

// Multiplex read and write access to the RAMs onto one port, prioritizing
// write accesses.
logic [CFG.COUNT_ALIGN-1:0] ram_addr ;
logic [CFG.SET_COUNT-1:0] ram_enable ;
logic [CFG.LINE_WIDTH-1:0] ram_wdata, ram_rdata [CFG.SET_COUNT] ;
logic [CFG.LINE_WIDTH-1:0] ram_wdata, ram_rdata;
logic [CFG.TAG_WIDTH+1:0] ram_wtag, ram_rtag [CFG.SET_COUNT] ;
logic ram_write ;
logic ram_write_q;
logic [CFG.COUNT_ALIGN:0] init_count_q;
logic [CFG.COUNT_ALIGN-1:0] data_addr;
logic [DataAddrWdith-1:0] data_bank_addr;
req_t data_req_d, data_req_q;
logic req_valid, req_ready;

logic out_hit, out_error;
logic [CFG.SET_ALIGN-1:0] out_set;

always_comb begin : p_portmux
write_ready_o = 0;
Expand All @@ -65,6 +87,7 @@ module snitch_icache_lookup #(
ram_wtag = {1'b1, write_error_i, write_tag_i};
ram_enable = '0;
ram_write = 1'b0;
req_valid = 1'b0;

if (init_count_q != $unsigned(CFG.LINE_COUNT)) begin
ram_addr = init_count_q;
Expand All @@ -76,10 +99,25 @@ module snitch_icache_lookup #(
ram_addr = write_addr_i;
ram_enable = $unsigned(1 << write_set_i);
ram_write = 1'b1;
write_ready_o = 1'b1;
end else if (out_ready_i) begin
ram_enable = in_valid_i ? '1 : '0;
in_ready_o = 1'b1;
write_ready_o = 1'b1; // From Fall-through register
// Store request to data bank
req_valid = 1'b1;
data_req_d.addr = write_addr_i;
data_req_d.cset = write_set_i;
data_req_d.data = write_data_i;
data_req_d.id = data_req_q.id; // Don't care
data_req_d.write = 1'b1;
end else if (in_valid_i) begin
// Read the tag banks
ram_enable = '1;
in_ready_o = out_ready_i;
// Store request to data bank
req_valid = 1'b1;
data_req_d.addr = in_addr_i;
data_req_d.cset = data_req_q.cset; // Don't care
data_req_d.data = data_req_q.data; // Don't care
data_req_d.id = in_id_i;
data_req_d.write = 1'b0;
end
end

Expand All @@ -104,87 +142,101 @@ module snitch_icache_lookup #(

// The address register keeps track of additional metadata alongside the
// looked up tag and data.
logic valid_q;
logic valid_q, valid_d;
logic [CFG.FETCH_AW-1:0] addr_q;
logic [CFG.ID_WIDTH_REQ-1:0] id_q;

always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni)
valid_q <= 1'b0;
else if ((in_valid_i && in_ready_o) || out_ready_i)
valid_q <= in_valid_i && in_ready_o;
end

always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni) begin
addr_q <= '0;
id_q <= '0;
end else if (in_valid_i && in_ready_o) begin
addr_q <= in_addr_i;
id_q <= in_id_i;
end else if (valid_d && out_ready_i) begin
addr_q <= data_req_q.addr;
id_q <= data_req_q.id;
end
end

`FFLARN(out_hit_o, out_hit, valid_d & out_ready_i, 1'b0, clk_i, rst_ni)
`FFLARN(out_error_o, out_error, valid_d & out_ready_i, 1'b0, clk_i, rst_ni)
`FFLARN(out_set_o, out_set, valid_d & out_ready_i, '0, clk_i, rst_ni)

// Store data while reading the tag
`FFLARN(data_req_q, data_req_d, req_valid & out_ready_i, '0, clk_i, rst_ni)
`FF(valid_d, req_valid, 1'b0)

`FF(valid_q, valid_d & ~data_req_q.write, 1'b0)

// Instantiate the RAM sets.
for (genvar i = 0; i < CFG.SET_COUNT; i++) begin : g_sets
tc_sram #(
.DataWidth ( CFG.TAG_WIDTH+2 ),
.NumWords ( CFG.LINE_COUNT ),
.NumPorts ( 1 )
) i_tag (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( ram_enable[i] ),
.we_i ( ram_write ),
.addr_i ( ram_addr ),
.wdata_i ( ram_wtag ),
.be_i ( '1 ),
.rdata_o ( ram_rtag[i] )
);

tc_sram #(
.DataWidth ( CFG.LINE_WIDTH ),
.NumWords ( CFG.LINE_COUNT ),
.NumPorts ( 1 )
) i_data (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( ram_enable[i] ),
.we_i ( ram_write ),
.addr_i ( ram_addr ),
.wdata_i ( ram_wdata ),
.be_i ( '1 ),
.rdata_o ( ram_rdata[i] )
);
if (CFG.L1_TAG_SCM) begin : gen_scm
latch_scm #(
.ADDR_WIDTH ($clog2(CFG.LINE_COUNT)),
.DATA_WIDTH (CFG.TAG_WIDTH+2 )
) i_tag (
.clk (clk_i ),
.ReadEnable (ram_enable[i] && !ram_write),
.ReadAddr (ram_addr ),
.ReadData (ram_rtag[i] ),
.WriteEnable(ram_enable[i] && ram_write ),
.WriteAddr (ram_addr ),
.WriteData (ram_wtag )
);
end else begin : gen_sram
tc_sram #(
.DataWidth ( CFG.TAG_WIDTH+2 ),
.NumWords ( CFG.LINE_COUNT ),
.NumPorts ( 1 )
) i_tag (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( ram_enable[i] ),
.we_i ( ram_write ),
.addr_i ( ram_addr ),
.wdata_i ( ram_wtag ),
.be_i ( '1 ),
.rdata_o ( ram_rtag[i] )
);
end
end

// Single data bank for all sets
assign data_addr = {data_req_q.write ? data_req_q.addr : data_req_q.addr >> CFG.LINE_ALIGN};
assign data_bank_addr = {data_req_q.write ? data_req_q.cset : out_set, data_addr};
tc_sram #(
.DataWidth ( CFG.LINE_WIDTH ),
.NumWords ( CFG.LINE_COUNT * CFG.SET_COUNT ),
.NumPorts ( 1 )
) i_data (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( valid_d ),
.we_i ( data_req_q.write ),
.addr_i ( data_bank_addr ),
.wdata_i ( data_req_q.data ),
.be_i ( '1 ),
.rdata_o ( ram_rdata )
);

// Determine which RAM line hit, and multiplex that data to the output.
logic [CFG.TAG_WIDTH-1:0] required_tag;
logic [CFG.SET_COUNT-1:0] line_hit;

always_comb begin
automatic logic [CFG.SET_COUNT-1:0] errors;
required_tag = addr_q >> (CFG.LINE_ALIGN + CFG.COUNT_ALIGN);
required_tag = data_req_q.addr >> (CFG.LINE_ALIGN + CFG.COUNT_ALIGN);
for (int i = 0; i < CFG.SET_COUNT; i++) begin
line_hit[i] = ram_rtag[i][CFG.TAG_WIDTH+1] && ram_rtag[i][CFG.TAG_WIDTH-1:0] == required_tag;
errors[i] = ram_rtag[i][CFG.TAG_WIDTH] && line_hit[i];
end
out_hit_o = |line_hit & ~ram_write_q; // Don't let refills trigger "valid" lookups
out_error_o = |errors;
out_hit = |line_hit & ~ram_write_q; // Don't let refills trigger "valid" lookups
out_error = |errors;
end

always_comb begin
for (int i = 0; i < CFG.LINE_WIDTH; i++) begin
automatic logic [CFG.SET_COUNT-1:0] masked;
for (int j = 0; j < CFG.SET_COUNT; j++)
masked[j] = ram_rdata[j][i] & line_hit[j];
out_data_o[i] = |masked;
end
end
assign out_data_o = out_hit_o ? ram_rdata : '0;

lzc #(.WIDTH(CFG.SET_COUNT)) i_lzc (
.in_i ( line_hit ),
.cnt_o ( out_set_o ),
.cnt_o ( out_set ),
.empty_o ( )
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ package snitch_icache_pkg;
int FETCH_DW;
int FILL_AW;
int FILL_DW;
bit L1_TAG_SCM;
bit EARLY_LATCH;

// Derived values.
Expand Down
26 changes: 0 additions & 26 deletions hardware/scripts/wave.do
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,6 @@ for {set group 0} {$group < [examine -radix dec /mempool_pkg::NumGroups]} {incr
add wave -group group_[$group] -group Interconnect_Local /mempool_tb/dut/i_mempool/gen_groups[$group]/i_group/i_local_interco/*
}
add wave -group Wake_up_reg /mempool_tb/dut/i_ctrl_registers/clk_i
add wave -group Wake_up_reg /mempool_tb/dut/i_ctrl_registers/wake_up
add wave -group Wake_up_reg /mempool_tb/dut/i_ctrl_registers/wake_up_o
add wave -group Wake_up_reg /mempool_tb/dut/i_ctrl_registers/wr_active_d
add wave -group Wake_up_reg /mempool_tb/dut/i_ctrl_registers/wr_active_q
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[0]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[0]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[0]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[1]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[0]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[2]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[0]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[3]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[1]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[0]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[1]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[1]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[1]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[2]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[1]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[3]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[2]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[0]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[2]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[1]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[2]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[2]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[2]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[3]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[3]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[0]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[3]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[1]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[3]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[2]/riscv_core/i_snitch/wake_up_sync_i
add wave -group Core_wake_up_sync_i /mempool_tb/dut/i_mempool/gen_groups[3]/i_group/gen_tiles[0]/i_tile/i_tile/gen_cores[3]/riscv_core/i_snitch/wake_up_sync_i
add wave -Group Control_Registers /mempool_tb/dut/i_ctrl_registers/*
# TreeUpdate [SetDefaultTree]
Expand Down
24 changes: 24 additions & 0 deletions hardware/scripts/wave_cache.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Create cache for core $3 from group $1 tile $2 (core_id=NUM_CORES_PER_group*$1+NUM_CORES_PER_TILE*$2+$3)

add wave -noupdate -group core[$1][$2][$3] -divider Parameters
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/NR_FETCH_PORTS
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/L0_LINE_COUNT
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/LINE_WIDTH
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/LINE_COUNT
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/SET_COUNT
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/FETCH_DW
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/FILL_AW
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/FILL_DW
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/EARLY_LATCH
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/L0_EARLY_TAG_WIDTH
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/ISO_CROSSING
add wave -noupdate -group core[$1][$2][$3] -divider Signals
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/*
for {set i 0} {$i < [examine -radix dec /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/NR_FETCH_PORTS]} {incr i} {
add wave -noupdate -group cache[$1][$2][$3] -group refill[$i] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/gen_prefetcher[$i]/i_snitch_icache_l0/*
}
add wave -noupdate -group cache[$1][$2][$3] -group lookup /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/i_lookup/*
add wave -noupdate -group cache[$1][$2][$3] -group handler /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/i_handler/*
add wave -noupdate -group cache[$1][$2][$3] -group refill /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/i_refill/*
Loading

0 comments on commit ca90bbd

Please sign in to comment.