Updated to cache design
diff --git a/verilog/rtl/user_proj_example.v b/verilog/rtl/user_proj_example.v
index 26081e9..4a14c5e 100644
--- a/verilog/rtl/user_proj_example.v
+++ b/verilog/rtl/user_proj_example.v
@@ -108,58 +108,901 @@
         .BITS(BITS)
     ) counter(
         .clk(clk),
-        .reset(rst),
-        .ready(wbs_ack_o),
-        .valid(valid),
-        .rdata(rdata),
-        .wdata(wbs_dat_i),
-        .wstrb(wstrb),
-        .la_write(la_write),
-        .la_input(la_data_in[63:32]),
-        .count(count)
+        .reset(la_data_in[0]),
+        .trace_ready(la_data_in[1]),
+        .mem_addr(la_data_in[33:2]),
+        .L1_hit_count(la_data_out[9:0]),
+        .L2_hit_count4(la_data_out[19:10]),
+        .L2_hit_count8(la_data_out[29:20]),
+        .L2_hit_count16(la_data_out[39:30]),
+        .L2_ss1_count4(la_data_out[49:40]),
+        .L2_ss1_count8(la_data_out[59:50]),
+        .L2_ss1_count16(la_data_out[69:60]),
+        .L2_ss2_count4(la_data_out[79:70]),
+        .L2_ss2_count8(la_data_out[89:80]),
+        .L2_ss2_count16(la_data_out[99:90])
     );
 
 endmodule
 
-module counter #(
-    parameter BITS = 32
-)(
-    input clk,
-    input reset,
-    input valid,
-    input [3:0] wstrb,
-    input [BITS-1:0] wdata,
-    input [BITS-1:0] la_write,
-    input [BITS-1:0] la_input,
-    output ready,
-    output [BITS-1:0] rdata,
-    output [BITS-1:0] count
-);
-    reg ready;
-    reg [BITS-1:0] count;
-    reg [BITS-1:0] rdata;
+module main(clk,reset,trace_ready,mem_addr,L1_hit_count,L2_hit_count4,L2_hit_count8,L2_hit_count16,L2_ss1_count4,L2_ss1_count8,L2_ss1_count16,L2_ss2_count4,L2_ss2_count8,L2_ss2_count16);
 
-    always @(posedge clk) begin
-        if (reset) begin
-            count <= 0;
-            ready <= 0;
-        end else begin
-            ready <= 1'b0;
-            if (~|la_write) begin
-                count <= count + 1;
-            end
-            if (valid && !ready) begin
-                ready <= 1'b1;
-                rdata <= count;
-                if (wstrb[0]) count[7:0]   <= wdata[7:0];
-                if (wstrb[1]) count[15:8]  <= wdata[15:8];
-                if (wstrb[2]) count[23:16] <= wdata[23:16];
-                if (wstrb[3]) count[31:24] <= wdata[31:24];
-            end else if (|la_write) begin
-                count <= la_write & la_input;
+    parameter L1_way = 4;
+    parameter L1_block_size_byte = 16;
+    parameter L1_cache_size_byte = 1*1024;
+    
+    parameter L2_way = 16;
+    parameter L2_block_size_byte = 16;
+    parameter L2_set_size = 64;
+    
+    parameter L1_block_offset_index = $rtoi($ln(L1_block_size_byte)/$ln(2));
+    parameter L1_set = L1_cache_size_byte/(L1_block_size_byte*L1_way);
+    parameter L1_set_index = $rtoi($ln(L1_set)/$ln(2));
+    parameter L1_way_width = $rtoi($ln(L1_way)/$ln(2));
+    
+    parameter L2_block_offset_index = $rtoi($ln(L2_block_size_byte)/$ln(2));
+    parameter L2_set_index = $rtoi($ln(L2_set_size)/$ln(2));
+    parameter L2_way_width = $rtoi($ln(L2_way)/$ln(2));
+
+    input clk,trace_ready,reset;
+    input [31:0] mem_addr;
+    
+    output [19:0] L1_hit_count,L2_hit_count4,L2_hit_count8,L2_hit_count16,L2_ss1_count4,L2_ss1_count8,L2_ss1_count16,L2_ss2_count4,L2_ss2_count8,L2_ss2_count16;
+    wire updated;    
+    
+    // variables to divide address in tag, index and offset for L1 cache
+    wire [31-L1_set_index-L1_block_offset_index:0] L1_tag;
+    wire [L1_set_index-1:0] L1_index;
+    wire [L1_block_offset_index-1:0] L1_block_offset;
+    
+    // variables to divide address in tag, index and offset for L2 cache
+    wire [31-L2_set_index-L2_block_offset_index:0] L2_tag;
+    wire [L2_set_index-1:0] L2_index;
+    wire [L2_block_offset_index-1:0] L2_block_offset;
+    
+    //L1 cache 
+    wire L1_done,L1_found_in_cache,L1_updated;
+    
+    //L2 cache
+    wire L2_done,L2_found_in_cache,L2_updated ;    
+    wire [L2_way_width:0] L2_hit_way;
+    
+    //Subset cache1
+    wire ss1_found_in_cache,ss1_updated;
+    wire [L2_way_width:0] ss1_hit_way;
+    
+    //Subset cache2
+    wire ss2_found_in_cache,ss2_updated;
+    wire [L2_way_width:0] ss2_hit_way;
+    wire [L2_set_index-2:0] ss1_index;
+    
+    //prefetcher
+    wire prefetch_hit,prefetch_done;
+    
+
+    assign L1_block_offset = mem_addr[L1_block_offset_index-1:0];
+    assign L1_index = mem_addr[L1_set_index+L1_block_offset_index-1:L1_block_offset_index];
+    assign L1_tag = mem_addr[31:L1_set_index+L1_block_offset_index];
+    
+    assign L2_block_offset = mem_addr[L2_block_offset_index-1:0];
+    assign L2_index = mem_addr[L2_set_index+L2_block_offset_index-1:L2_block_offset_index];
+    assign L2_tag = mem_addr[31:L2_set_index+L2_block_offset_index]; 
+    
+    assign ss1_index = L2_index[L2_set_index-2:0];
+    assign updated = !prefetch_hit&&ss1_updated ? 1'b1 : L1_updated&&(prefetch_hit||L1_found_in_cache) ? 1'b1 :  1'b0;      
+    
+    L1_cache #(.way(L1_way),.block_size_byte(L1_block_size_byte),.cache_size_byte(L1_cache_size_byte)) i1  (clk,reset,L1_tag,L1_index,L1_block_offset,trace_ready,prefetch_hit,L2_found_in_cache,L1_hit_count,L1_found_in_cache,L1_updated,L1_done,prefetch_done);
+    L2_cache #(.way(L2_way),.block_size_byte(L2_block_size_byte),.set_size(L2_set_size)) i2 (clk,reset,L2_tag,L2_index,L2_block_offset,(prefetch_done && !prefetch_hit),L2_hit_count4,L2_hit_count8,L2_hit_count16,L2_found_in_cache,L2_hit_way,L2_done,L2_updated);
+    L2_cache_subset #(.way(L2_way),.block_size_byte(L2_block_size_byte),.set_size(L2_set_size/2)) i3 (clk,reset,L2_index,L2_updated,L2_found_in_cache,L2_hit_way,ss1_found_in_cache,L2_ss1_count4,L2_ss1_count8,L2_ss1_count16,ss1_hit_way,ss1_updated);
+    L2_cache_subset #(.way(L2_way),.block_size_byte(L2_block_size_byte),.set_size(L2_set_size/4)) i4 (clk,reset,ss1_index,ss1_updated,ss1_found_in_cache,ss1_hit_way,ss2_found_in_cache,L2_ss2_count4,L2_ss2_count8,L2_ss2_count16,ss2_hit_way,ss2_updated);
+    prefetcher #(.way(L1_way),.block_size_byte(L1_block_size_byte),.cache_size_byte(L1_cache_size_byte)) i5(clk,mem_addr,(L1_done && !L1_found_in_cache),prefetch_hit);
+    
+endmodule
+
+module L1_cache(clk,reset,tag,index,block_offset,find_start,prefetch_hit,L2_cache_hit,cache_hit_count,found_in_cache,updated,done_L1,done_prefetch);
+
+    parameter way = 4;
+    parameter block_size_byte = 16;
+    parameter cache_size_byte = 32*1024;
+    
+    parameter block_offset_index = $rtoi($ln(block_size_byte)/$ln(2));
+    parameter set = cache_size_byte/(block_size_byte*way); 
+    parameter set_index = $rtoi($ln(set)/$ln(2));
+    parameter way_width = $rtoi($ln(way)/$ln(2));
+    parameter cache_line_width = 32-set_index-block_offset_index+1;
+    
+    input clk,find_start,L2_cache_hit,prefetch_hit,reset;
+    input [31-set_index-block_offset_index:0] tag;
+    input [set_index-1:0] index;
+    input [block_offset_index-1:0] block_offset;
+    output reg found_in_cache;
+    output reg [19:0] cache_hit_count;
+    output reg updated,done_L1,done_prefetch;
+    
+    reg [1:0]find_state;
+    reg [1:0]flag;
+    reg bi_flag;
+    reg [way_width:0] way_index;
+    reg [way_width-1:0] hit_way;
+    reg [cache_line_width-1:0] cache [0:set-1][0:way-1];
+    reg [cache_line_width-1:0] temp_content;
+    reg [cache_line_width-1:0] temp_content1 [0:way-1];
+    
+    integer i,j;
+    
+    initial
+    begin
+        found_in_cache = 0;
+        cache_hit_count = 0;        
+        find_state = 0;
+        flag = 0;
+        bi_flag = 0;
+        updated = 0;
+        done_prefetch = 0;
+        for (i=0;i<set;i=i+1)
+            for (j=0;j<way;j=j+1)
+                cache[i][j] = 0;
+    end
+    
+    always @ (posedge clk)
+    begin
+        if (reset)
+        begin
+            found_in_cache = 0;
+            cache_hit_count = 0;
+            //cache_miss_count = 0;
+            find_state = 0;
+            flag = 0;
+            bi_flag = 0;
+            updated = 0;
+            done_prefetch = 0;
+            for (i=0;i<set;i=i+1)
+                for (j=0;j<way;j=j+1)
+                    cache[i][j] = 0;     
+        end
+        else
+        begin
+            case (find_state)
+                2'b00: begin          
+                        found_in_cache = 1'b0;
+                        if(find_start)
+                        begin                        
+                            find_state = 2'b01;                        
+                            done_L1 = 1'b0;
+                        end        
+                      end
+                      
+                2'b01: begin
+                  
+                        if (done_L1 && !found_in_cache)
+                        begin
+                            find_state = 2'b10;
+                            done_L1 = 1'b0;
+                        end    
+                        else if (done_L1 && found_in_cache)
+                        begin
+                            find_state = 2'b11;
+                            done_L1 = 1'b0;
+                        end    
+                        else
+                        begin
+                            for (way_index=0;way_index<way;way_index=way_index+1'b1)
+                            begin
+                                if(cache[index][way_index][cache_line_width-1]) // valid bit
+                                begin
+                                    if(cache[index][way_index][cache_line_width-2:0]==tag) // tag comparison
+                                    begin
+                                        found_in_cache = 1'b1;                                    
+                                        hit_way = way_index;                                    
+                                        temp_content = cache[index][way_index];
+                                        cache_hit_count = cache_hit_count + 1'b1;
+                                        done_L1 = 1'b1;                                    
+                                    end
+                               end                                                                                                                                                                                                                                                                                                                                                                                                                                
+                            end                                               
+                            if (way_index==way&&!found_in_cache)
+                            begin
+                                    //cache_miss_count = cache_miss_count + 1'b1; 
+                                    found_in_cache = 1'b0;                                
+                                    done_L1 = 1'b1;
+                            end                                                  
+                        end 
+                       end
+    
+                2'b10: begin
+                        if(done_prefetch)
+                        begin
+                            find_state= 2'b11;
+                            done_prefetch = 1'b0;
+                            flag = 1'b0;
+                        end   
+                        else
+                        begin
+                            if(!flag)
+                                flag = 1'b1;
+                            else
+                            begin
+                                if(prefetch_hit&flag)                        
+                                begin
+                                    flag = 1'b0;
+                                    cache_hit_count = cache_hit_count + 1'b1;                                     
+                                end
+                                //else                                                                
+                                    //cache_miss_count = cache_miss_count + 1'b1;                              
+                                done_prefetch = 1'b1;
+                            end            
+                        end
+                       end                                                        
+                       
+                2'b11: begin                
+                        if(updated)
+                        begin
+                            find_state = 2'b00;
+                            updated = 1'b0;
+                        end
+                        
+                        else
+                        begin
+                            if(found_in_cache) // hit
+                            begin  
+                                if(hit_way != 0)
+                                    begin
+                                        cache[index][hit_way] = cache[index][hit_way-1];
+                                        hit_way = hit_way - 1;
+                                    end
+                                    else
+                                    begin            
+                                        cache[index][0] = temp_content;
+                                        updated = 1'b1;
+                                    end                                    
+                            end 
+                            
+                            else // miss
+                            begin
+                                for(way_index=way-2;way_index>0;way_index=way_index-1)                            
+                                    cache[index][way_index+1] = cache[index][way_index];                                    
+        
+                                cache[index][1] = cache[index][0];
+                                cache[index][0] = {1'b1,tag};  
+                                updated = 1'b1;                                                  
+                            end                                                                                                                                                                                                              
+                        end 
+                                                  
+                       end                     
+            endcase
+         end       
+    end   
+    
+endmodule
+
+module L2_cache(clk,reset,tag,index,block_offset,find_start,cache_hit_count4,cache_hit_count8,cache_hit_count16,found_in_cache,hit_way,done,updated);
+
+    parameter way = 16;
+    parameter block_size_byte = 16;
+    parameter set_size = 64;
+    
+    parameter block_offset_index = $rtoi($ln(block_size_byte)/$ln(2)); 
+    parameter set_index = $rtoi($ln(set_size)/$ln(2));
+    parameter way_width = $rtoi($ln(way)/$ln(2));
+    parameter cache_line_width = 32-set_index-block_offset_index+1; 
+    
+    input clk,find_start,reset;
+    input [31-set_index-block_offset_index:0] tag;
+    input [set_index-1:0] index;
+    input [block_offset_index-1:0] block_offset;
+    output reg found_in_cache;
+    // this needs to get parameterized based on number of max associativity        
+    output reg [19:0] cache_hit_count4;
+    output reg [19:0] cache_hit_count8;
+    output reg [19:0] cache_hit_count16;
+    output reg [way_width:0] hit_way;   
+    output reg updated,done;        
+    
+    reg [1:0]find_state;
+    reg [way_width:0] way_index;    
+    reg [cache_line_width-1:0] cache [0:set_size-1][0:way-1];
+    reg [cache_line_width-1:0] temp_content;
+    //reg [19:0] hit_count [way-1:0];
+    
+    integer i,j;
+    
+    initial
+    begin
+        found_in_cache = 0;        
+        cache_hit_count4 = 0;        
+        cache_hit_count8 = 0;
+        cache_hit_count16 = 0;                
+        find_state = 0;
+        updated = 0;        
+        for (i=0;i<set_size;i=i+1)
+            for (j=0;j<way;j=j+1)
+                cache[i][j] = 0;
+    end
+    
+    always @ (posedge clk)
+    begin
+        if (reset)
+        begin
+            found_in_cache = 0;        
+            cache_hit_count4 = 0;        
+            cache_hit_count8 = 0;
+            cache_hit_count16 = 0;                   
+            find_state = 0;
+            updated = 0;            
+            for (i=0;i<set_size;i=i+1)
+                for (j=0;j<way;j=j+1)
+                    cache[i][j] = 0;    
+        end
+        else
+        begin
+            case (find_state)
+                2'b00: begin          
+                        found_in_cache = 1'b0;                             
+                        if(find_start)
+                        begin
+                            find_state = 2'b01;                        
+                            done = 1'b0;
+                        end        
+                      end
+                      // Find cache state (to check if particular memory address data is present in cache or not)
+                      
+                2'b01: begin
+                  
+                        if (done)
+                        begin
+                            find_state = 2'b10;                                                   
+                        end        
+                        else
+                        begin
+                            for (way_index=0;way_index<way;way_index=way_index+1'b1)
+                            begin
+                                if(cache[index][way_index][cache_line_width-1]) // valid bit
+                                begin
+                                    if(cache[index][way_index][cache_line_width-2:0]==tag) // tag comparison
+                                    begin
+                                        found_in_cache = 1'b1;                                    
+                                        hit_way = way_index;                                    
+                                        temp_content = cache[index][way_index];                                    
+                                    end
+                               end                                                                                          
+                            end                        
+                            if (found_in_cache)
+                            begin
+                                if (hit_way>=0 && hit_way<4)
+                                begin
+                                    cache_hit_count4 = cache_hit_count4 + 1;
+                                    cache_hit_count8 = cache_hit_count8 + 1;
+                                    cache_hit_count16 = cache_hit_count16 + 1;
+                                end
+                                else if (hit_way>=4 && hit_way<8)
+                                begin
+                                    cache_hit_count8 = cache_hit_count8 + 1;
+                                    cache_hit_count16 = cache_hit_count16 + 1;
+                                end
+                                else if (hit_way>=8 && hit_way<16)
+                                begin                                    
+                                    cache_hit_count16 = cache_hit_count16 + 1;
+                                end
+                                done = 1'b1;
+                                way_index = hit_way;                                                                                      
+                            end  
+                            else                                                                
+                            begin
+                                hit_way = way;
+                                done = 1'b1;
+                            end                                                  
+                        end 
+                       end
+                      
+                       // Updation of cache according LRU shift register policy
+                2'b10: begin                
+                        if(updated)
+                        begin
+                            find_state = 2'b00;
+                            updated = 1'b0;
+                        end
+                        else
+                        begin                            
+                            if(found_in_cache) // hit
+                            begin 
+                                   if(way_index != 0)
+                                    begin
+                                        cache[index][way_index] = cache[index][way_index-1];
+                                        way_index = way_index - 1;
+                                    end
+                                    else
+                                    begin            
+                                        cache[index][0] = temp_content;
+                                        updated = 1'b1;
+                                    end                                                                       
+                            end    
+                            else // miss
+                            begin                                                                                          
+                                for(way_index=way-2;way_index>0;way_index=way_index-1)                            
+                                  cache[index][way_index+1] = cache[index][way_index];                                    
+    
+                                cache[index][1] = cache[index][0];
+                                cache[index][0] = {1'b1,tag};
+                                updated = 1'b1;             
+                            end                                                                                    
+                        end                           
+                       end 
+                        
+            endcase
+         end       
+    end  
+endmodule
+
+module L2_cache_subset(clk,reset,msb_index,find_start,L2_found_in_cache,hit_way,found_in_cache,cache_hit_count4,cache_hit_count8,cache_hit_count16,hit_source,updated);
+
+    parameter way = 16;
+    parameter block_size_byte = 16;
+    parameter set_size = 512;   
+    parameter set_index = $rtoi($ln(set_size)/$ln(2));
+    parameter way_width = $rtoi($ln(way)/$ln(2));
+    
+    
+    input clk,find_start,reset,L2_found_in_cache;
+    input [way_width:0] hit_way;
+    input [set_index:0] msb_index;
+    output reg found_in_cache;
+    // this needs to get parameterized based on number of max associativity
+    output reg [19:0] cache_hit_count4;
+    output reg [19:0] cache_hit_count8;
+    output reg [19:0] cache_hit_count16;
+    output reg updated;
+    output reg [way_width:0] hit_source;
+    
+    reg msb_indexbit,msb_update,mask_update;
+    reg [set_index-1:0] index;        
+    reg [way_width:0] way_index; 
+      
+    reg mask0 [0:set_size-1][0:way-1];
+    reg mask1 [0:set_size-1][0:way-1];
+    reg [1:0] source [0:set_size-1][0:way-1];
+    reg [1:0] temp_data;
+    reg temp_source;
+    reg [way_width:0] count;
+    //reg [19:0] hit_count [way-1:0];
+    
+    reg [1:0]find_state;
+    reg done;    
+    
+    integer i,j;
+    
+    
+    initial
+    begin
+        found_in_cache = 0;
+        cache_hit_count4 = 0;        
+        cache_hit_count8 = 0;
+        cache_hit_count16 = 0;
+        //cache_miss_count = 0;        
+        find_state = 0;
+        temp_data = 0;   
+        msb_update = 0;  
+        mask_update = 0;   
+        for (i=0;i<set_size;i=i+1)
+        begin
+            for (j=0;j<way;j=j+1)
+            begin
+                source[i][j] = 0;
+                mask0[i][j] = 0;
+                mask1[i][j] = 0;
             end
         end
     end
+    
+    always @ (posedge clk)
+    begin
+        if (reset)
+        begin
+            found_in_cache = 0;
+            cache_hit_count4 = 0;        
+            cache_hit_count8 = 0;
+            cache_hit_count16 = 0;                    
+            find_state = 0;
+            temp_data = 0;            
+            for (i=0;i<set_size;i=i+1)
+            begin
+                for (j=0;j<way;j=j+1)
+                begin
+                    source[i][j] = 0;
+                    mask0[i][j] = 0;
+                    mask1[i][j] = 0;
+                end
+            end
+        end
+        else
+        begin
+            case (find_state)
+                2'b00: begin          
+                                                     
+                        if(find_start)
+                        begin
+                            found_in_cache = 1'b0;
+                            find_state = 2'b01;                        
+                            done = 1'b0;                        
+                        end                            
+                       end
+                       
+                2'b01: begin
+                       
+                       if (done)
+                       begin
+                           find_state = 2'b10;
+                           count = 0;
+                       end     
+                       else
+                       begin
+                          // to check if there is hit in cache 
+                          msb_indexbit = msb_index[set_index];
+                          index[set_index-1:0] = msb_index[set_index-1:0];                          
+                          if (L2_found_in_cache)
+                          begin
+                              if (msb_indexbit)
+                              begin                          
+                                 if(mask1[index][hit_way])
+                                 begin                            
+                                    temp_source = 1'b1;                         
+                                    found_in_cache = 1'b1;
+                                 end                               
+                              end
+                              else if (!msb_indexbit)
+                              begin                                                    
+                                 if(mask0[index][hit_way]) // mask0[0][16]
+                                 begin                           
+                                    temp_source = 1'b0;
+                                    found_in_cache = 1'b1;
+                                 end   
+                              end
+                              if (found_in_cache)                      
+                              begin                         
+                                 way_index = 0;
+                                 if(temp_source)
+                                 begin
+                                    for(way_index=0;way_index<way;way_index=way_index+1)
+                                    begin
+                                        if(source[index][way_index]==2'b11&&count<hit_way+1)
+                                            count = count + 1;
+                                        if(count==hit_way+1)
+                                        begin                                    
+                                            temp_data =  source[index][way_index]; 
+                                            hit_source = way_index;
+                                            count = count + 1;
+                                        end                                    
+                                    end               
+                                 end
+                                 else if (!temp_source)
+                                 begin
+                                    for(way_index=0;way_index<way;way_index=way_index+1)
+                                    begin
+                                        if(source[index][way_index]==2'b10&&count<hit_way+1)
+                                            count = count + 1;
+                                        if(count==hit_way+1)
+                                        begin                                    
+                                            temp_data =  source[index][way_index]; 
+                                            hit_source = way_index;
+                                            count = count + 1;
+                                        end                                    
+                                    end                                                   
+                                 end                                                         
+                                 
+                                    if (hit_source>=0 && hit_source<4)
+                                    begin
+                                        cache_hit_count4 = cache_hit_count4 + 1;
+                                        cache_hit_count8 = cache_hit_count8 + 1;
+                                        cache_hit_count16 = cache_hit_count16 + 1;
+                                    end
+                                    else if (hit_source>=4 && hit_source<8)
+                                    begin
+                                        cache_hit_count8 = cache_hit_count8 + 1;
+                                        cache_hit_count16 = cache_hit_count16 + 1;
+                                    end
+                                    else if (hit_source>=8 && hit_source<16)
+                                    begin                                    
+                                        cache_hit_count16 = cache_hit_count16 + 1;
+                                    end
+                                 way_index = hit_source;
+                                 done = 1'b1;                              
+                            end
+                            else
+                            begin                                                          
+                             hit_source = way;
+                             done = 1'b1;      
+                            end
 
+                          end
+                          
+                          else
+                          begin                            
+                             hit_source = way;
+                             done = 1'b1;      
+                          end
+                       end     
+                       end
+               2'b10: begin
+                      
+                      if(updated)
+                      begin
+                        find_state = 2'b00;
+                        updated = 1'b0;
+                        count = 0;
+                      end
+                      else
+                      begin
+                        if(found_in_cache)
+                        begin
+                            
+                           if(msb_indexbit && !msb_update && !mask_update)
+                           begin
+                              if(way_index != 0)
+                              begin
+                                mask1[index][way_index] = mask1[index][way_index-1];
+                                way_index = way_index - 1;
+                              end
+                              else
+                              begin            
+                                mask1[index][0] = 1'b1;
+                                msb_update = 1'b1;
+                              end                                                                                                                               
+                                                               
+                           end
+                           else if (!msb_indexbit && !msb_update && !mask_update)
+                           begin
+                              if(way_index != 0)
+                              begin
+                                mask0[index][way_index] = mask0[index][way_index-1];
+                                way_index = way_index - 1;
+                              end
+                              else
+                              begin            
+                                mask0[index][0] = 1'b1;
+                                msb_update = 1'b1;                                
+                              end                                                                                                                               
+                                                               
+                           end
+                           if (msb_update && !mask_update)
+                           begin
+                            for(way_index=0;way_index<way;way_index=way_index+1)
+                            begin
+                                if(mask0[index][way_index])
+                                    count = count + 1;
+                                if(mask1[index][way_index])
+                                    count = count + 1;    
+                            end                                                       
+                            
+                            // if count>way then we have to change one of mask from 1 to 0 based on LRU
+                            if(count>way)
+                            begin
+                                if(source[index][way-1] == 2'b11)
+                                begin
+                                    way_index = way-1;
+                                    while(way_index>0 && !mask1[index][way_index])
+                                        way_index = way_index - 1;
+                                    mask1[index][way_index] = 1'b0;                                   
+                                end
+                                else if (source[index][way-1] == 2'b10)
+                                begin
+                                    way_index = way-1;
+                                    while(way_index>0 && !mask0[index][way_index])
+                                        way_index = way_index - 1;
+                                    mask0[index][way_index] = 1'b0;                                
+                                end    
+                            end
+                            mask_update = 1'b1;
+                            way_index = hit_source;
+                            msb_update = 1'b0;
+                           end
+                           if(mask_update)
+                           begin
+                               if(way_index != 0)
+                               begin
+                                    source[index][way_index] = source[index][way_index-1];
+                                    way_index = way_index - 1;
+                               end
+                               else
+                               begin
+                                    mask_update = 1'b0;                                           
+                                    source[index][0] = temp_data;
+                                    updated = 1'b1;
+                               end                                                                                                                               
+                           end                                                      
+                        end                   
+                        else // cache miss
+                        begin
+                                                                       
+                            // update mask register
+                            if(msb_indexbit)
+                            begin
+                                for(way_index=way-2;way_index>0;way_index=way_index-1)                                         
+                                    mask1[index][way_index+1] = mask1[index][way_index];
+                                mask1[index][1] = mask1[index][0];
+                                mask1[index][0] = 1'b1;                                  
+                            end
+                            else
+                            begin
+                                for(way_index=way-2;way_index>0;way_index=way_index-1)                                         
+                                    mask0[index][way_index+1] = mask0[index][way_index];
+                                mask0[index][1] = mask0[index][0];
+                                mask0[index][0] = 1'b1;           
+                            end
+                            
+                            // after updating  the mask registers check count of 1's in both mask 0 and 1                        
+                            for(way_index=0;way_index<way;way_index=way_index+1)
+                            begin
+                                if(mask0[index][way_index])
+                                    count = count + 1;
+                                if(mask1[index][way_index])
+                                    count = count + 1;    
+                            end                                                       
+                            
+                            // if count>way then we have to change one of mask from 1 to 0 based on LRU
+                            if(count>way)
+                            begin
+                                if(source[index][way-1] == 2'b11)
+                                begin
+                                    way_index = way-1;
+                                    while(way_index>0 && !mask1[index][way_index])
+                                        way_index = way_index - 1;
+                                    mask1[index][way_index] = 1'b0;                                   
+                                end
+                                else if (source[index][way-1] == 2'b10)
+                                begin
+                                    way_index = way-1;
+                                    while(way_index>0 && !mask0[index][way_index])
+                                        way_index = way_index - 1;
+                                    mask0[index][way_index] = 1'b0;                                
+                                end    
+                            end
+                            
+                             // update source register
+                            for(way_index=way-2;way_index>0;way_index=way_index-1)                                         
+                                source[index][way_index+1] = source[index][way_index];                                                                                               
+                            source[index][1] = source[index][0];
+                            source[index][0] = {1'b1,msb_indexbit};                               
+                            updated = 1'b1;
+                                    
+                        end
+                      end
+               
+                      end
+                                  
+            endcase
+         end                 
+    end    
 endmodule
-`default_nettype wire
+
+
+module prefetcher (clk,address,cache_miss,prefetch_hit);
+
+    parameter way = 4;
+    parameter block_size_byte = 16;
+    parameter cache_size_byte = 1024;
+    
+    parameter block_offset_index = $rtoi($ln(block_size_byte)/$ln(2));  //2
+    parameter set = cache_size_byte/(block_size_byte*way); 
+    parameter set_index = $rtoi($ln(set)/$ln(2));
+     
+    parameter prefetch_width = 32-block_offset_index  + 1; // without data (tag+index+valid) 
+    
+    input clk,cache_miss;
+    input [31:0]address;
+    output reg prefetch_hit;
+    
+    
+    wire [31-set_index-block_offset_index:0] tag;      // 23 bits..22:0
+    wire [set_index-1:0] index;                        //7 bits...6:0
+    wire [block_offset_index-1:0] block_offset;         //2 bits...1:0
+    
+     
+    wire [set_index-1:0]temp_index;
+    wire [5:0]prefetch_fill_index;
+    wire valid_buffer_check, valid_fill_check;
+    
+    reg [prefetch_width-1:0]prefetch_buffer[0:7];
+    wire [2:0] hit_index;
+    reg [2:0] way_index;    
+    reg [1:0] find_state;
+    reg done,updated;
+    reg [prefetch_width-1:0] temp_data;
+    integer k;
+     
+     assign block_offset = address[block_offset_index-1:0];
+     assign index = address[set_index+block_offset_index-1:block_offset_index];
+     assign tag =address[31:set_index+block_offset_index];
+     assign temp_index = index + 1;
+         
+     assign valid_buffer_check = (prefetch_buffer[0][prefetch_width-1] && (prefetch_buffer[0][prefetch_width-2:0]==address[31:block_offset_index])) || (prefetch_buffer[1][prefetch_width-1] && (prefetch_buffer[1][prefetch_width-2:0]==address[31:block_offset_index])) || (prefetch_buffer[2][prefetch_width-1] && (prefetch_buffer[2][prefetch_width-2:0]==address[31:block_offset_index])) || (prefetch_buffer[3][prefetch_width-1] && (prefetch_buffer[3][prefetch_width-2:0]==address[31:block_offset_index])) || (prefetch_buffer[4][prefetch_width-1] && (prefetch_buffer[4][prefetch_width-2:0]==address[31:block_offset_index])) || (prefetch_buffer[5][prefetch_width-1] && (prefetch_buffer[5][prefetch_width-2:0]==address[31:block_offset_index])) || (prefetch_buffer[6][prefetch_width-1] && (prefetch_buffer[6][prefetch_width-2:0]==address[31:block_offset_index])) || (prefetch_buffer[7][prefetch_width-1] && (prefetch_buffer[7][prefetch_width-2:0]==address[31:block_offset_index])); 
+     
+     assign hit_index = (prefetch_buffer[0][prefetch_width-1] && (prefetch_buffer[0][prefetch_width-2:0]==address[31:block_offset_index])) ? 3'b000 :     
+                        (prefetch_buffer[1][prefetch_width-1] && (prefetch_buffer[1][prefetch_width-2:0]==address[31:block_offset_index])) ? 3'b001 :
+                        (prefetch_buffer[2][prefetch_width-1] && (prefetch_buffer[2][prefetch_width-2:0]==address[31:block_offset_index])) ? 3'b010 :
+                        (prefetch_buffer[3][prefetch_width-1] && (prefetch_buffer[3][prefetch_width-2:0]==address[31:block_offset_index])) ? 3'b011 : 
+                        (prefetch_buffer[4][prefetch_width-1] && (prefetch_buffer[4][prefetch_width-2:0]==address[31:block_offset_index])) ? 3'b100 :
+                        (prefetch_buffer[5][prefetch_width-1] && (prefetch_buffer[5][prefetch_width-2:0]==address[31:block_offset_index])) ? 3'b101 :
+                        (prefetch_buffer[6][prefetch_width-1] && (prefetch_buffer[6][prefetch_width-2:0]==address[31:block_offset_index])) ? 3'b110 : 3'b111;
+                        
+     
+     assign valid_fill_check = (prefetch_buffer[0][prefetch_width-1] && (prefetch_buffer[0][prefetch_width-2:0]=={tag,temp_index})) || (prefetch_buffer[1][prefetch_width-1] && (prefetch_buffer[1][prefetch_width-2:0]=={tag,temp_index})) || (prefetch_buffer[2][prefetch_width-1] && (prefetch_buffer[2][prefetch_width-2:0]=={tag,temp_index})) || (prefetch_buffer[3][prefetch_width-1] && (prefetch_buffer[3][prefetch_width-2:0]=={tag,temp_index})) || (prefetch_buffer[4][prefetch_width-1] && (prefetch_buffer[4][prefetch_width-2:0]=={tag,temp_index})) || (prefetch_buffer[5][prefetch_width-1] && (prefetch_buffer[5][prefetch_width-2:0]=={tag,temp_index})) || (prefetch_buffer[6][prefetch_width-1] && (prefetch_buffer[6][prefetch_width-2:0]=={tag,temp_index})) || (prefetch_buffer[7][prefetch_width-1] && (prefetch_buffer[7][prefetch_width-2:0]=={tag,temp_index}));
+     initial
+     begin
+        for (k=0;k<8;k=k+1)
+            prefetch_buffer[k] = 0;
+        done = 0;
+        updated = 0;
+        prefetch_hit = 0;
+        find_state = 0;        
+    end
+    
+    always @ (posedge clk)
+    begin
+        case(find_state)
+            2'b00: begin
+                    done = 1'b0;
+	                prefetch_hit = 1'b0;
+                    if(cache_miss)
+                    begin
+                        find_state = 2'b01;
+                    end    
+                  end
+                  
+            2'b01: begin
+                    if(done)
+                        find_state = 2'b10;
+                    else
+                    begin                                           
+                        if(valid_buffer_check)
+                        begin
+                            prefetch_hit = 1'b1;
+                            done = 1'b1;
+                        end
+                        else 
+                        begin
+                            done =1'b1;
+                            prefetch_hit=1'b0;
+                        end
+                     end                                                        
+                   end
+            
+            2'b10: begin
+                    if(updated)
+                    begin
+                        find_state = 2'b00;
+                        updated = 1'b0;
+                    end
+                    else
+                    begin
+                        if(!prefetch_hit)
+                        begin
+                            if(valid_fill_check)
+                                updated = 1'b1;
+                            else
+                            begin    
+                                prefetch_buffer[7] = prefetch_buffer[6];
+                                prefetch_buffer[6] = prefetch_buffer[5];
+                                prefetch_buffer[5] = prefetch_buffer[4];
+                                prefetch_buffer[4] = prefetch_buffer[3];
+                                prefetch_buffer[3] = prefetch_buffer[2];
+                                prefetch_buffer[2] = prefetch_buffer[1];
+                                prefetch_buffer[1] = prefetch_buffer[0];
+                                prefetch_buffer[0] = {1'b1,tag,temp_index};
+                                updated = 1'b1;
+                             end                                                                     
+                        end
+                        
+                        if(prefetch_hit)
+                        begin
+                            temp_data = prefetch_buffer[hit_index];
+                            for (way_index=0;way_index<hit_index;way_index=way_index+1)
+                                prefetch_buffer[hit_index-way_index]= prefetch_buffer[hit_index-way_index-1];
+                            prefetch_buffer[0] = temp_data;
+                                                        
+                            updated = 1'b1;             
+                        end
+                                                
+                    end
+                   end       
+        endcase
+    end
+endmodule