-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve the instruction cache See merge request mempool/mempool!76
- Loading branch information
Showing
11 changed files
with
293 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,9 @@ | |
// permission from ETH Zurich. | ||
// | ||
// Fabian Schuiki <[email protected]> | ||
// Samuel Riedel <[email protected]> | ||
|
||
`include "common_cells/registers.svh" | ||
|
||
/// An actual cache lookup. | ||
module snitch_icache_lookup #( | ||
|
@@ -41,20 +44,39 @@ module snitch_icache_lookup #( | |
input logic write_valid_i, | ||
output logic write_ready_o | ||
); | ||
logic valid_and_hit; | ||
assign valid_and_hit = out_valid_o & out_hit_o; | ||
|
||
`ifndef SYNTHESIS | ||
initial assert(CFG != '0); | ||
`endif | ||
|
||
localparam int unsigned DataAddrWdith = CFG.SET_ALIGN + CFG.COUNT_ALIGN; | ||
|
||
typedef struct packed { | ||
logic [CFG.FETCH_AW-1:0] addr; | ||
logic [CFG.COUNT_ALIGN-1:0] cset; | ||
logic [CFG.LINE_WIDTH-1:0] data; | ||
logic [CFG.ID_WIDTH_REQ-1:0] id; | ||
logic write; | ||
} req_t; | ||
|
||
// Multiplex read and write access to the RAMs onto one port, prioritizing | ||
// write accesses. | ||
logic [CFG.COUNT_ALIGN-1:0] ram_addr ; | ||
logic [CFG.SET_COUNT-1:0] ram_enable ; | ||
logic [CFG.LINE_WIDTH-1:0] ram_wdata, ram_rdata [CFG.SET_COUNT] ; | ||
logic [CFG.LINE_WIDTH-1:0] ram_wdata, ram_rdata; | ||
logic [CFG.TAG_WIDTH+1:0] ram_wtag, ram_rtag [CFG.SET_COUNT] ; | ||
logic ram_write ; | ||
logic ram_write_q; | ||
logic [CFG.COUNT_ALIGN:0] init_count_q; | ||
logic [CFG.COUNT_ALIGN-1:0] data_addr; | ||
logic [DataAddrWdith-1:0] data_bank_addr; | ||
req_t data_req_d, data_req_q; | ||
logic req_valid, req_ready; | ||
|
||
logic out_hit, out_error; | ||
logic [CFG.SET_ALIGN-1:0] out_set; | ||
|
||
always_comb begin : p_portmux | ||
write_ready_o = 0; | ||
|
@@ -65,6 +87,7 @@ module snitch_icache_lookup #( | |
ram_wtag = {1'b1, write_error_i, write_tag_i}; | ||
ram_enable = '0; | ||
ram_write = 1'b0; | ||
req_valid = 1'b0; | ||
|
||
if (init_count_q != $unsigned(CFG.LINE_COUNT)) begin | ||
ram_addr = init_count_q; | ||
|
@@ -76,10 +99,25 @@ module snitch_icache_lookup #( | |
ram_addr = write_addr_i; | ||
ram_enable = $unsigned(1 << write_set_i); | ||
ram_write = 1'b1; | ||
write_ready_o = 1'b1; | ||
end else if (out_ready_i) begin | ||
ram_enable = in_valid_i ? '1 : '0; | ||
in_ready_o = 1'b1; | ||
write_ready_o = 1'b1; // From Fall-through register | ||
// Store request to data bank | ||
req_valid = 1'b1; | ||
data_req_d.addr = write_addr_i; | ||
data_req_d.cset = write_set_i; | ||
data_req_d.data = write_data_i; | ||
data_req_d.id = data_req_q.id; // Don't care | ||
data_req_d.write = 1'b1; | ||
end else if (in_valid_i) begin | ||
// Read the tag banks | ||
ram_enable = '1; | ||
in_ready_o = out_ready_i; | ||
// Store request to data bank | ||
req_valid = 1'b1; | ||
data_req_d.addr = in_addr_i; | ||
data_req_d.cset = data_req_q.cset; // Don't care | ||
data_req_d.data = data_req_q.data; // Don't care | ||
data_req_d.id = in_id_i; | ||
data_req_d.write = 1'b0; | ||
end | ||
end | ||
|
||
|
@@ -104,87 +142,101 @@ module snitch_icache_lookup #( | |
|
||
// The address register keeps track of additional metadata alongside the | ||
// looked up tag and data. | ||
logic valid_q; | ||
logic valid_q, valid_d; | ||
logic [CFG.FETCH_AW-1:0] addr_q; | ||
logic [CFG.ID_WIDTH_REQ-1:0] id_q; | ||
|
||
always_ff @(posedge clk_i, negedge rst_ni) begin | ||
if (!rst_ni) | ||
valid_q <= 1'b0; | ||
else if ((in_valid_i && in_ready_o) || out_ready_i) | ||
valid_q <= in_valid_i && in_ready_o; | ||
end | ||
|
||
always_ff @(posedge clk_i, negedge rst_ni) begin | ||
if (!rst_ni) begin | ||
addr_q <= '0; | ||
id_q <= '0; | ||
end else if (in_valid_i && in_ready_o) begin | ||
addr_q <= in_addr_i; | ||
id_q <= in_id_i; | ||
end else if (valid_d && out_ready_i) begin | ||
addr_q <= data_req_q.addr; | ||
id_q <= data_req_q.id; | ||
end | ||
end | ||
|
||
`FFLARN(out_hit_o, out_hit, valid_d & out_ready_i, 1'b0, clk_i, rst_ni) | ||
`FFLARN(out_error_o, out_error, valid_d & out_ready_i, 1'b0, clk_i, rst_ni) | ||
`FFLARN(out_set_o, out_set, valid_d & out_ready_i, '0, clk_i, rst_ni) | ||
|
||
// Store data while reading the tag | ||
`FFLARN(data_req_q, data_req_d, req_valid & out_ready_i, '0, clk_i, rst_ni) | ||
`FF(valid_d, req_valid, 1'b0) | ||
|
||
`FF(valid_q, valid_d & ~data_req_q.write, 1'b0) | ||
|
||
// Instantiate the RAM sets. | ||
for (genvar i = 0; i < CFG.SET_COUNT; i++) begin : g_sets | ||
tc_sram #( | ||
.DataWidth ( CFG.TAG_WIDTH+2 ), | ||
.NumWords ( CFG.LINE_COUNT ), | ||
.NumPorts ( 1 ) | ||
) i_tag ( | ||
.clk_i ( clk_i ), | ||
.rst_ni ( rst_ni ), | ||
.req_i ( ram_enable[i] ), | ||
.we_i ( ram_write ), | ||
.addr_i ( ram_addr ), | ||
.wdata_i ( ram_wtag ), | ||
.be_i ( '1 ), | ||
.rdata_o ( ram_rtag[i] ) | ||
); | ||
|
||
tc_sram #( | ||
.DataWidth ( CFG.LINE_WIDTH ), | ||
.NumWords ( CFG.LINE_COUNT ), | ||
.NumPorts ( 1 ) | ||
) i_data ( | ||
.clk_i ( clk_i ), | ||
.rst_ni ( rst_ni ), | ||
.req_i ( ram_enable[i] ), | ||
.we_i ( ram_write ), | ||
.addr_i ( ram_addr ), | ||
.wdata_i ( ram_wdata ), | ||
.be_i ( '1 ), | ||
.rdata_o ( ram_rdata[i] ) | ||
); | ||
if (CFG.L1_TAG_SCM) begin : gen_scm | ||
latch_scm #( | ||
.ADDR_WIDTH ($clog2(CFG.LINE_COUNT)), | ||
.DATA_WIDTH (CFG.TAG_WIDTH+2 ) | ||
) i_tag ( | ||
.clk (clk_i ), | ||
.ReadEnable (ram_enable[i] && !ram_write), | ||
.ReadAddr (ram_addr ), | ||
.ReadData (ram_rtag[i] ), | ||
.WriteEnable(ram_enable[i] && ram_write ), | ||
.WriteAddr (ram_addr ), | ||
.WriteData (ram_wtag ) | ||
); | ||
end else begin : gen_sram | ||
tc_sram #( | ||
.DataWidth ( CFG.TAG_WIDTH+2 ), | ||
.NumWords ( CFG.LINE_COUNT ), | ||
.NumPorts ( 1 ) | ||
) i_tag ( | ||
.clk_i ( clk_i ), | ||
.rst_ni ( rst_ni ), | ||
.req_i ( ram_enable[i] ), | ||
.we_i ( ram_write ), | ||
.addr_i ( ram_addr ), | ||
.wdata_i ( ram_wtag ), | ||
.be_i ( '1 ), | ||
.rdata_o ( ram_rtag[i] ) | ||
); | ||
end | ||
end | ||
|
||
// Single data bank for all sets | ||
assign data_addr = {data_req_q.write ? data_req_q.addr : data_req_q.addr >> CFG.LINE_ALIGN}; | ||
assign data_bank_addr = {data_req_q.write ? data_req_q.cset : out_set, data_addr}; | ||
tc_sram #( | ||
.DataWidth ( CFG.LINE_WIDTH ), | ||
.NumWords ( CFG.LINE_COUNT * CFG.SET_COUNT ), | ||
.NumPorts ( 1 ) | ||
) i_data ( | ||
.clk_i ( clk_i ), | ||
.rst_ni ( rst_ni ), | ||
.req_i ( valid_d ), | ||
.we_i ( data_req_q.write ), | ||
.addr_i ( data_bank_addr ), | ||
.wdata_i ( data_req_q.data ), | ||
.be_i ( '1 ), | ||
.rdata_o ( ram_rdata ) | ||
); | ||
|
||
// Determine which RAM line hit, and multiplex that data to the output. | ||
logic [CFG.TAG_WIDTH-1:0] required_tag; | ||
logic [CFG.SET_COUNT-1:0] line_hit; | ||
|
||
always_comb begin | ||
automatic logic [CFG.SET_COUNT-1:0] errors; | ||
required_tag = addr_q >> (CFG.LINE_ALIGN + CFG.COUNT_ALIGN); | ||
required_tag = data_req_q.addr >> (CFG.LINE_ALIGN + CFG.COUNT_ALIGN); | ||
for (int i = 0; i < CFG.SET_COUNT; i++) begin | ||
line_hit[i] = ram_rtag[i][CFG.TAG_WIDTH+1] && ram_rtag[i][CFG.TAG_WIDTH-1:0] == required_tag; | ||
errors[i] = ram_rtag[i][CFG.TAG_WIDTH] && line_hit[i]; | ||
end | ||
out_hit_o = |line_hit & ~ram_write_q; // Don't let refills trigger "valid" lookups | ||
out_error_o = |errors; | ||
out_hit = |line_hit & ~ram_write_q; // Don't let refills trigger "valid" lookups | ||
out_error = |errors; | ||
end | ||
|
||
always_comb begin | ||
for (int i = 0; i < CFG.LINE_WIDTH; i++) begin | ||
automatic logic [CFG.SET_COUNT-1:0] masked; | ||
for (int j = 0; j < CFG.SET_COUNT; j++) | ||
masked[j] = ram_rdata[j][i] & line_hit[j]; | ||
out_data_o[i] = |masked; | ||
end | ||
end | ||
assign out_data_o = out_hit_o ? ram_rdata : '0; | ||
|
||
lzc #(.WIDTH(CFG.SET_COUNT)) i_lzc ( | ||
.in_i ( line_hit ), | ||
.cnt_o ( out_set_o ), | ||
.cnt_o ( out_set ), | ||
.empty_o ( ) | ||
); | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Create cache for core $3 from group $1 tile $2 (core_id=NUM_CORES_PER_group*$1+NUM_CORES_PER_TILE*$2+$3) | ||
|
||
add wave -noupdate -group core[$1][$2][$3] -divider Parameters | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/NR_FETCH_PORTS | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/L0_LINE_COUNT | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/LINE_WIDTH | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/LINE_COUNT | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/SET_COUNT | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/FETCH_DW | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/FILL_AW | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/FILL_DW | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/EARLY_LATCH | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/L0_EARLY_TAG_WIDTH | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/ISO_CROSSING | ||
add wave -noupdate -group core[$1][$2][$3] -divider Signals | ||
add wave -noupdate -group cache[$1][$2][$3] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/* | ||
for {set i 0} {$i < [examine -radix dec /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/NR_FETCH_PORTS]} {incr i} { | ||
add wave -noupdate -group cache[$1][$2][$3] -group refill[$i] /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/gen_prefetcher[$i]/i_snitch_icache_l0/* | ||
} | ||
add wave -noupdate -group cache[$1][$2][$3] -group lookup /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/i_lookup/* | ||
add wave -noupdate -group cache[$1][$2][$3] -group handler /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/i_handler/* | ||
add wave -noupdate -group cache[$1][$2][$3] -group refill /mempool_tb/dut/i_mempool/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/i_tile/gen_caches[$3]/i_snitch_icache/i_refill/* |
Oops, something went wrong.