/*****************************************************************************
 *                                McPAT/CACTI
 *                      SOFTWARE LICENSE AGREEMENT
 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
 *                          All Rights Reserved
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 ***************************************************************************/



#include <cassert>

#include "mat.h"

Mat::Mat(const DynamicParameter & dyn_p)
    : dp(dyn_p),
      power_subarray_out_drv(),
      delay_fa_tag(0), delay_cam(0),
      delay_before_decoder(0), delay_bitline(0),
      delay_wl_reset(0), delay_bl_restore(0),
      delay_searchline(0), delay_matchchline(0),
      delay_cam_sl_restore(0), delay_cam_ml_reset(0),
      delay_fa_ram_wl(0), delay_hit_miss_reset(0),
      delay_hit_miss(0),
      subarray(dp, dp.fully_assoc),
      power_bitline(), per_bitline_read_energy(0),
      deg_bl_muxing(dp.deg_bl_muxing),
      num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
      delay_writeback(0),
      cell(subarray.cell), cam_cell(subarray.cam_cell),
      is_dram(dyn_p.is_dram),
      pure_cam(dyn_p.pure_cam),
      num_mats(dp.num_mats),
      power_sa(), delay_sa(0),
      leak_power_sense_amps_closed_page_state(0),
      leak_power_sense_amps_open_page_state(0),
      delay_subarray_out_drv(0),
      delay_comparator(0), power_comparator(),
      num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
      num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
      num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
    assert(num_subarrays_per_mat <= 4);
    assert(num_subarrays_per_row <= 2);
    is_fa = (dp.fully_assoc) ? true : false;
    camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.

    if (is_fa || pure_cam) {
        num_subarrays_per_row = num_subarrays_per_mat > 2 ?
            num_subarrays_per_mat / 2 : num_subarrays_per_mat;
    }

    if (dp.use_inp_params == 1) {
        RWP  = dp.num_rw_ports;
        ERP  = dp.num_rd_ports;
        EWP  = dp.num_wr_ports;
        SCHP = dp.num_search_ports;
    } else {
        RWP = g_ip->num_rw_ports;
        ERP = g_ip->num_rd_ports;
        EWP = g_ip->num_wr_ports;
        SCHP = g_ip->num_search_ports;

    }

    double number_sa_subarray;

    if (!is_fa && !pure_cam) {
        number_sa_subarray = subarray.num_cols / deg_bl_muxing;
    } else if (is_fa && !pure_cam) {
        number_sa_subarray =  (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
    }

    else {
        number_sa_subarray =  (subarray.num_cols_fa_cam) / deg_bl_muxing;
    }

    int    num_dec_signals           = subarray.num_rows;
    double C_ld_bit_mux_dec_out      = 0;
    double C_ld_sa_mux_lev_1_dec_out = 0;
    double C_ld_sa_mux_lev_2_dec_out = 0;
    double R_wire_wl_drv_out;

    if (!is_fa && !pure_cam) {
        R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
    } else if (is_fa && !pure_cam) {
        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
    } else {
        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
    }

    double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
    double R_wire_sa_mux_dec_out  = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;

    if (deg_bl_muxing > 1) {
        C_ld_bit_mux_dec_out =
            (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
            gate_C(g_tp.w_nmos_b_mux, 0, is_dram) +  // 2 transistor per cell
            num_subarrays_per_row * subarray.num_cols *
            g_tp.wire_inside_mat.C_per_um * cell.get_w();
    }

    if (dp.Ndsam_lev_1 > 1) {
        C_ld_sa_mux_lev_1_dec_out =
            (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
            gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
            num_subarrays_per_row * subarray.num_cols *
            g_tp.wire_inside_mat.C_per_um * cell.get_w();
    }
    if (dp.Ndsam_lev_2 > 1) {
        C_ld_sa_mux_lev_2_dec_out =
            (num_subarrays_per_mat * number_sa_subarray /
             (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
            gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
            num_subarrays_per_row * subarray.num_cols *
            g_tp.wire_inside_mat.C_per_um * cell.get_w();
    }

    if (num_subarrays_per_row >= 2) {
        // wire heads for both right and left side of a mat, so half the resistance
        R_wire_bit_mux_dec_out /= 2.0;
        R_wire_sa_mux_dec_out  /= 2.0;
    }


    row_dec = new Decoder(
        num_dec_signals,
        false,
        subarray.C_wl,
        R_wire_wl_drv_out,
        false/*is_fa*/,
        is_dram,
        true,
        camFlag ? cam_cell : cell);
//  if (is_fa && (!dp.is_tag))
//  {
//    row_dec->exist = true;
//  }
    bit_mux_dec = new Decoder(
        deg_bl_muxing,// This number is 1 for FA or CAM
        false,
        C_ld_bit_mux_dec_out,
        R_wire_bit_mux_dec_out,
        false/*is_fa*/,
        is_dram,
        false,
        camFlag ? cam_cell : cell);
    sa_mux_lev_1_dec = new Decoder(
        dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
        dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
        C_ld_sa_mux_lev_1_dec_out,
        R_wire_sa_mux_dec_out,
        false/*is_fa*/,
        is_dram,
        false,
        camFlag ? cam_cell : cell);
    sa_mux_lev_2_dec = new Decoder(
        dp.Ndsam_lev_2, // This number is 1 for FA or CAM
        false,
        C_ld_sa_mux_lev_2_dec_out,
        R_wire_sa_mux_dec_out,
        false/*is_fa*/,
        is_dram,
        false,
        camFlag ? cam_cell : cell);

    double C_wire_predec_blk_out;
    double R_wire_predec_blk_out;

    if (!is_fa && !pure_cam) {

        C_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
        R_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;

    } else { //for pre-decode block's load is same for both FA and CAM
        C_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
        R_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
    }


    if (is_fa || pure_cam)
        num_dec_signals += _log2(num_subarrays_per_mat);

    PredecBlk * r_predec_blk1 = new PredecBlk(
        num_dec_signals,
        row_dec,
        C_wire_predec_blk_out,
        R_wire_predec_blk_out,
        num_subarrays_per_mat,
        is_dram,
        true);
    PredecBlk * r_predec_blk2 = new PredecBlk(
        num_dec_signals,
        row_dec,
        C_wire_predec_blk_out,
        R_wire_predec_blk_out,
        num_subarrays_per_mat,
        is_dram,
        false);
    PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
    PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
    PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
    PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
    PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
    PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
    dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
    dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);

    PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
    PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
    PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
    PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
    PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
    PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
    PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
    PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
    way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
    dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);

    r_predec            = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
    b_mux_predec        = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
    sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
    sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);

    subarray_out_wire   = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng

    double driver_c_gate_load;
    double driver_c_wire_load;
    double driver_r_wire_load;

    if (is_fa || pure_cam)

    {   //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
        driver_c_gate_load = (subarray.num_cols_fa_cam ) *
            gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
                   is_dram, false, false);
        driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
            g_tp.wire_outside_mat.C_per_um;
        driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
            g_tp.wire_outside_mat.R_per_um;
        cam_bl_precharge_eq_drv = new Driver(
            driver_c_gate_load,
            driver_c_wire_load,
            driver_r_wire_load,
            is_dram);

        if (!pure_cam) {
            //This is only used for fully asso not pure CAM
            driver_c_gate_load = (subarray.num_cols_fa_ram ) *
                gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
                       is_dram, false, false);
            driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
                g_tp.wire_outside_mat.C_per_um;
            driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
                g_tp.wire_outside_mat.R_per_um;
            bl_precharge_eq_drv = new Driver(
                driver_c_gate_load,
                driver_c_wire_load,
                driver_r_wire_load,
                is_dram);
        }
    }

    else {
        driver_c_gate_load =  subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
        driver_c_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
        driver_r_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
        bl_precharge_eq_drv = new Driver(
            driver_c_gate_load,
            driver_c_wire_load,
            driver_r_wire_load,
            is_dram);
    }
    double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
    double w_row_decoder    = area_row_decoder / subarray.area.get_h();

    double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
        compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();

    double h_subarray_out_drv = subarray_out_wire->area.get_area() *
                                (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();


    h_subarray_out_drv *= (RWP + ERP + SCHP);

    double h_comparators                = 0.0;
    double w_row_predecode_output_wires = 0.0;
    double h_bit_mux_dec_out_wires      = 0.0;
    double h_senseamp_mux_dec_out_wires = 0.0;

    if ((!is_fa) && (dp.is_tag)) {
        //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
        h_comparators  = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
        h_comparators *= (RWP + ERP);
    }


    int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
    int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
    w_row_predecode_output_wires   = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
                                     g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);


    double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
                             (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
                              h_subarray_out_drv + h_comparators);

    double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);

    if (deg_bl_muxing > 1) {
        h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
    }
    if (dp.Ndsam_lev_1 > 1) {
        h_senseamp_mux_dec_out_wires =  dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
    }
    if (dp.Ndsam_lev_2 > 1) {
        h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
    }

    double h_addr_datain_wires;
    if (!g_ip->ver_htree_wires_over_array) {
        h_addr_datain_wires = (dp.number_addr_bits_mat +
                               dp.number_way_select_signals_mat +
                               (dp.num_di_b_mat + dp.num_do_b_mat) /
                               num_subarrays_per_row) *
            g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);

        if (is_fa || pure_cam) {
            h_addr_datain_wires =
                (dp.number_addr_bits_mat +
                 dp.number_way_select_signals_mat +  //TODO: revisit
                 (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
                g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
                (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
                g_tp.wire_inside_mat.pitch * SCHP;
        }
        //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
        //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
        h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
                           h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
                          h_addr_datain_wires +
                          h_bit_mux_dec_out_wires +
                          h_senseamp_mux_dec_out_wires;

    }

    // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
    double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
                                        b_mux_predec_blk_drv1->area.get_area() +
                                        sa_mux_lev_1_predec_blk_drv1->area.get_area() +
                                        sa_mux_lev_2_predec_blk_drv1->area.get_area() +
                                        way_sel_drv1->area.get_area() +
                                        r_predec_blk_drv2->area.get_area() +
                                        b_mux_predec_blk_drv2->area.get_area() +
                                        sa_mux_lev_1_predec_blk_drv2->area.get_area() +
                                        sa_mux_lev_2_predec_blk_drv2->area.get_area() +
                                        r_predec_blk1->area.get_area() +
                                        b_mux_predec_blk1->area.get_area() +
                                        sa_mux_lev_1_predec_blk1->area.get_area() +
                                        sa_mux_lev_2_predec_blk1->area.get_area() +
                                        r_predec_blk2->area.get_area() +
                                        b_mux_predec_blk2->area.get_area() +
                                        sa_mux_lev_1_predec_blk2->area.get_area() +
                                        sa_mux_lev_2_predec_blk2->area.get_area() +
                                        bit_mux_dec->area.get_area() +
                                        sa_mux_lev_1_dec->area.get_area() +
                                        sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);

    double area_efficiency_mat;

//  if (!is_fa)
//  {
    assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
    area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
        subarray.area.h + h_non_cell_area;
    area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
    area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
        100.0 / area.get_area();

//    cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
//    cout<<"h_comparators"<<h_comparators<<endl;
//    cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
//    cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
//    cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
//    cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
//    cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
//    cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
//    cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
//    cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;

    assert(area.h > 0);
    assert(area.w > 0);
//  }
//  else
//  {
//    area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
//    area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
//    area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
//    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
//  }
}



Mat::~Mat() {
    delete row_dec;
    delete bit_mux_dec;
    delete sa_mux_lev_1_dec;
    delete sa_mux_lev_2_dec;

    delete r_predec->blk1;
    delete r_predec->blk2;
    delete b_mux_predec->blk1;
    delete b_mux_predec->blk2;
    delete sa_mux_lev_1_predec->blk1;
    delete sa_mux_lev_1_predec->blk2;
    delete sa_mux_lev_2_predec->blk1;
    delete sa_mux_lev_2_predec->blk2;
    delete dummy_way_sel_predec_blk1;
    delete dummy_way_sel_predec_blk2;

    delete r_predec->drv1;
    delete r_predec->drv2;
    delete b_mux_predec->drv1;
    delete b_mux_predec->drv2;
    delete sa_mux_lev_1_predec->drv1;
    delete sa_mux_lev_1_predec->drv2;
    delete sa_mux_lev_2_predec->drv1;
    delete sa_mux_lev_2_predec->drv2;
    delete way_sel_drv1;
    delete dummy_way_sel_predec_blk_drv2;

    delete r_predec;
    delete b_mux_predec;
    delete sa_mux_lev_1_predec;
    delete sa_mux_lev_2_predec;

    delete subarray_out_wire;
    if (!pure_cam)
        delete bl_precharge_eq_drv;

    if (is_fa || pure_cam) {
        delete sl_precharge_eq_drv ;
        delete sl_data_drv ;
        delete cam_bl_precharge_eq_drv;
        delete ml_precharge_drv;
        delete ml_to_ram_wl_drv;
    }
}



double Mat::compute_delays(double inrisetime) {
    int k;
    double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
    double outrisetime_search, outrisetime, row_dec_outrisetime;
    // delay calculation for tags of fully associative cache
    if (is_fa || pure_cam) {
        //Compute search access time
        outrisetime_search = compute_cam_delay(inrisetime);
        if (is_fa) {
            bl_precharge_eq_drv->compute_delay(0);
            k = ml_to_ram_wl_drv->number_gates - 1;
            rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
            C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
                                   cell.h, is_dram, false, true) +
                drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
                         is_dram, false, true);
            C_ld = ml_to_ram_wl_drv->c_gate_load +
                ml_to_ram_wl_drv->c_wire_load;
            tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
            delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);

            R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
            r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
            R_bl = subarray.num_rows * r_b_metal;
            C_bl = subarray.C_bl;
            delay_bl_restore = bl_precharge_eq_drv->delay +
                log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
                    (g_tp.sram.Vbitpre - dp.V_b_sense)) *
                (R_bl_precharge * C_bl + R_bl * C_bl / 2);


            outrisetime_search = compute_bitline_delay(outrisetime_search);
            outrisetime_search = compute_sa_delay(outrisetime_search);
        }
        outrisetime_search = compute_subarray_out_drv(outrisetime_search);
        subarray_out_wire->set_in_rise_time(outrisetime_search);
        outrisetime_search = subarray_out_wire->signal_rise_time();
        delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;


        //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
        outrisetime = r_predec->compute_delays(inrisetime);
        row_dec_outrisetime = row_dec->compute_delays(outrisetime);

        outrisetime = b_mux_predec->compute_delays(inrisetime);
        bit_mux_dec->compute_delays(outrisetime);

        outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
        sa_mux_lev_1_dec->compute_delays(outrisetime);

        outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
        sa_mux_lev_2_dec->compute_delays(outrisetime);

        if (pure_cam) {
            outrisetime = compute_bitline_delay(row_dec_outrisetime);
            outrisetime = compute_sa_delay(outrisetime);
        }
        return outrisetime_search;
    } else {
        bl_precharge_eq_drv->compute_delay(0);
        if (row_dec->exist == true) {
            int k = row_dec->num_gates - 1;
            double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
            // TODO: this 4*cell.h number must be revisited
            double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
                                          cell.h, is_dram, false, true) +
                drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
                         false, true);
            double C_ld = row_dec->C_ld_dec_out;
            double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
            delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
        }
        double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
        double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
        double R_bl = subarray.num_rows * r_b_metal;
        double C_bl = subarray.C_bl;

        if (is_dram) {
            delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
        } else {
            delay_bl_restore = bl_precharge_eq_drv->delay +
                log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
                    (g_tp.sram.Vbitpre - dp.V_b_sense)) *
                (R_bl_precharge * C_bl + R_bl * C_bl / 2);
        }
    }



    outrisetime = r_predec->compute_delays(inrisetime);
    row_dec_outrisetime = row_dec->compute_delays(outrisetime);

    outrisetime = b_mux_predec->compute_delays(inrisetime);
    bit_mux_dec->compute_delays(outrisetime);

    outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
    sa_mux_lev_1_dec->compute_delays(outrisetime);

    outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
    sa_mux_lev_2_dec->compute_delays(outrisetime);

    outrisetime = compute_bitline_delay(row_dec_outrisetime);
    outrisetime = compute_sa_delay(outrisetime);
    outrisetime = compute_subarray_out_drv(outrisetime);
    subarray_out_wire->set_in_rise_time(outrisetime);
    outrisetime = subarray_out_wire->signal_rise_time();

    delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;

    if (dp.is_tag == true && dp.fully_assoc == false) {
        compute_comparator_delay(0);
    }

    if (row_dec->exist == false) {
        delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
    }
    return outrisetime;
}



double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {

    double height =
        compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
                                       camFlag ? cam_cell.w :
                                       cell.w / (2 * (RWP + ERP + SCHP))) +
        // precharge circuitry
        compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
                                       camFlag ? cam_cell.w :
                                       cell.w / (RWP + ERP + SCHP));

    if (deg_bl_muxing > 1) {
        // col mux tr height
        height +=
            compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
                                           cell.w / (2 * (RWP + ERP)));
        // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);  // bit mux dec out wires height
    }

    height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP));  // sense_amp_height

    if (dp.Ndsam_lev_1 > 1) {
        height += compute_tr_width_after_folding(
                      g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
        //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
    }

    if (dp.Ndsam_lev_2 > 1) {
        height += compute_tr_width_after_folding(
                      g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
        //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);

        // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
        height += 2 * compute_tr_width_after_folding(
                      pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
        height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
    }

    // TODO: this should be uncommented...
    /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
      {
    //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
    double width_write_driver_write_mux  = width_write_driver_or_write_mux();
    double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
    cell.w *
    // deg_bl_muxing *
    dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
    height += height_write_driver_write_mux;
    }*/

    return height;
}



double Mat::compute_cam_delay(double inrisetime) {

    double out_time_ramp, this_delay;
    double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;


    double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
    Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
    Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
    Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;

    double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal,  dynSearchEng;
    int Htagbits;

    double driver_c_gate_load;
    double driver_c_wire_load;
    double driver_r_wire_load;
    //double searchline_precharge_time;

    double leak_power_cc_inverters_sram_cell         = 0;
    double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
    double leak_power_RD_port_sram_cell              = 0;
    double leak_power_SCHP_port_sram_cell            = 0;
    double leak_comparator_cam_cell                  =0;

    double gate_leak_comparator_cam_cell          = 0;
    double gate_leak_power_cc_inverters_sram_cell = 0;
    double gate_leak_power_RD_port_sram_cell      = 0;
    double gate_leak_power_SCHP_port_sram_cell    = 0;

    c_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.C_per_um;
    c_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.C_per_um;
    r_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.R_per_um;
    r_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.R_per_um;

    dynSearchEng = 0.0;
    delay_matchchline = 0.0;
    double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
    bool linear_scaling = false;

    if (linear_scaling) {
        Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
        Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
        Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
        Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
        Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
        Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
        Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
        Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
        Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
        Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
        Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
        Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
        Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
        Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process

        Wfaprechp     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wdummyn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
        Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
        Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
        Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        W_hit_miss_n    = Wdummyn;
        W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
        //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
    } else {
        Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
        Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
        Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
        Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
        Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
        Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
        Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
        Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
        Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
        Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
        Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
        Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
        Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
        Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process

        Wfaprechp     = g_tp.w_pmos_bl_precharge;//this was  10 micron for the 0.8 micron process
        Wdummyn       = g_tp.cam.cell_nmos_w;
        Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
        Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
        Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
        Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
        Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
        W_hit_miss_n    = Wdummyn;
        W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
    }

    Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));

    /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
       search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
       From the driver(am and an) to the comparators in all the rows including the dummy row,
       Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */

    //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
    //Searchline precharge routes horizontally
    driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
    driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
    driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;

    sl_precharge_eq_drv = new Driver(
        driver_c_gate_load,
        driver_c_wire_load,
        driver_r_wire_load,
        is_dram);

    //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
    //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
    driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
    driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
    driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
    sl_data_drv = new Driver(
        driver_c_gate_load,
        driver_c_wire_load,
        driver_r_wire_load,
        is_dram);

    sl_precharge_eq_drv->compute_delay(0);
    double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
    double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
    double R_bl = (subarray.num_rows + 1) * r_b_metal;
    double C_bl = subarray.C_bl_cam;
    delay_cam_sl_restore = sl_precharge_eq_drv->delay
        + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);

    out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)

    //matchline ops delay
    delay_matchchline += sl_data_drv->delay;

    /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
    //matchline delay, matchline power, matchline_reset for cycle time computation,

    ////matchline precharge circuitry routes vertically
    //There are two matchline precharge driver chains per subarray.
    driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
    driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
    driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;

    ml_precharge_drv = new Driver(
        driver_c_gate_load,
        driver_c_wire_load,
        driver_r_wire_load,
        is_dram);

    ml_precharge_drv->compute_delay(0);


    rd =  tr_R_on(Wdummyn, NCH, 2, is_dram);
    c_intrinsic = Htagbits *
        (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
                      is_dram)//TODO: the cell_h_def should be revisit
         + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
         Htagbits);//since each halve only has one precharge tx per matchline

    Cwire = c_matchline_metal * Htagbits;
    Rwire = r_matchline_metal * Htagbits;
    c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);

    double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
    //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
    double R_ml = Rwire;
    double C_ml = Cwire + c_intrinsic;
    //TODO: latest CAM has sense amps on matchlines too
    delay_cam_ml_reset = ml_precharge_drv->delay
        + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);

    //matchline ops delay
    tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
    this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
    delay_matchchline += this_delay;
    out_time_ramp = this_delay / VTHFA3;

    dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
                     (subarray.num_rows + 1)) //TODO: need to be precise
        * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
        2;//each subarry has two halves

    /* third stage, from the NAND2 gates to the drivers in the dummy row */
    rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
    c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
                  drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
    c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
    tf = rd * (c_intrinsic + c_gate_load);
    this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
    out_time_ramp = this_delay / (1 - VTHFA4);
    delay_matchchline += this_delay;

    //only the dummy row has the extra inverter between NAND and NOR gates
    dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
        g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//  * Ntbl;

    /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
    rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
    c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
    Cwire = c_matchline_metal * Htagbits +  c_searchline_metal *
        (subarray.num_rows + 1) / 2;
    Rwire = r_matchline_metal * Htagbits +  r_searchline_metal *
        (subarray.num_rows + 1) / 2;
    c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
    tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
    this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
    out_time_ramp = this_delay / VTHFA5;
    delay_matchchline += this_delay;

    dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
        g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;

    /*final statge from the NOR gate to drive the wordline of the data portion */

    //searchline data driver There are two matchline precharge driver chains per subarray.
    driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
    driver_c_wire_load = subarray.C_wl_ram;
    driver_r_wire_load = subarray.R_wl_ram;

    ml_to_ram_wl_drv = new Driver(
        driver_c_gate_load,
        driver_c_wire_load,
        driver_r_wire_load,
        is_dram);



    rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
    c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
        drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
    c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
    tf = rd * (c_intrinsic + c_gate_load);
    this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
    out_time_ramp = this_delay / (1 - 0.5);
    delay_matchchline += this_delay;

    out_time_ramp   = ml_to_ram_wl_drv->compute_delay(out_time_ramp);

    //c_gate_load energy is computed in ml_to_ram_wl_drv
    dynSearchEng  += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;


    /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
    /*Precharge the hitting logic */
    c_intrinsic = 2 *
        drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
    Cwire = c_searchline_metal * subarray.num_rows;
    Rwire = r_searchline_metal * subarray.num_rows;
    c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
        subarray.num_rows;

    rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
    //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
    double R_hit_miss = Rwire;
    double C_hit_miss = Cwire + c_intrinsic;
    delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
        (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
    dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;

    /*hitting logic evaluation */
    c_intrinsic = 2 *
        drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
    Cwire = c_searchline_metal * subarray.num_rows;
    Rwire = r_searchline_metal * subarray.num_rows;
    c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
        subarray.num_rows;

    rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
    tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);

    delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);

    if (is_fa)
        delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);

    dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;

    /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/

    power_matchline.searchOp.dynamic = dynSearchEng;

    //leakage in one subarray
    double Iport     = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  1, nmos, false, true);//TODO: how much is the idle time? just by *2?
    double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  2, nmos, false, true);
    double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
                                     1, inv, false, true) * 2;
    //approx XOR with Inv
    double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
                                                false, true) * 2;

    leak_power_cc_inverters_sram_cell         = Icell * g_tp.cam_cell.Vdd;
    leak_comparator_cam_cell                  = Icell_comparator * g_tp.cam_cell.Vdd;
    leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
    leak_power_RD_port_sram_cell              = Iport_erp * g_tp.cam_cell.Vdd;
    leak_power_SCHP_port_sram_cell            = 0;//search port and r/w port are sperate, therefore no access txs in search ports

    power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
                                        leak_comparator_cam_cell +
                                        leak_power_acc_tr_RW_or_WR_port_sram_cell +
                                        leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
                                        leak_power_RD_port_sram_cell * ERP +
                                        leak_power_SCHP_port_sram_cell * SCHP;
//  power_matchline.searchOp.leakage += leak_comparator_cam_cell;
    power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
        subarray.num_cols_fa_cam;//TODO:dumy line precise
    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
        cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
        cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
        cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
    //In idle states, the hit/miss txs are closed (on) therefore no Isub
    power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
    // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;

    //in idle state, Ig_on only possibly exist in access transistors of read only ports
    double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
    double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
                                     1, inv, false, true) * 2;
    double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
                                                false, true) * 2;

    gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
    gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
    gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
    gate_leak_power_SCHP_port_sram_cell = 0;

    //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;

    power_matchline.searchOp.gate_leakage +=
        gate_leak_power_cc_inverters_sram_cell;
    power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
    power_matchline.searchOp.gate_leakage +=
        gate_leak_power_SCHP_port_sram_cell * SCHP +
        gate_leak_power_RD_port_sram_cell * ERP;
    power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
        subarray.num_cols_fa_cam;//TODO:dumy line precise
    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
        cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
        cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
        cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
    power_matchline.searchOp.gate_leakage += subarray.num_rows *
        cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
        + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;


    return out_time_ramp;
}


double Mat::width_write_driver_or_write_mux() {
    // calculate resistance of SRAM cell pull-up PMOS transistor
    // cam and sram have same cell trasistor properties
    double R_sram_cell_pull_up_tr  = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
    double R_access_tr             = tr_R_on(g_tp.sram.cell_a_w,    NCH, 1, is_dram, true);
    double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
    double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);

    return width_write_driver_nmos;
}



double Mat::compute_comparators_height(
    int tagbits,
    int number_ways_in_mat,
    double subarray_mem_cell_area_width) {
    double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
    double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
    return cumulative_area / subarray_mem_cell_area_width;
}



double Mat::compute_bitline_delay(double inrisetime) {
    double V_b_pre, v_th_mem_cell, V_wl;
    double tstep;
    double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
    double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
    int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;

    double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
    double R_bl      = subarray.num_rows * R_b_metal;
    double C_bl      = subarray.C_bl;

    // TODO: no leakage for DRAMs?
    double leak_power_cc_inverters_sram_cell = 0;
    double gate_leak_power_cc_inverters_sram_cell = 0;
    double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
    double leak_power_RD_port_sram_cell = 0;
    double gate_leak_power_RD_port_sram_cell = 0;

    if (is_dram == true) {
        V_b_pre = g_tp.dram.Vbitpre;
        v_th_mem_cell = g_tp.dram_acc.Vth;
        V_wl = g_tp.vpp;
        //The access transistor is not folded. So we just need to specify a
        // threshold value for the folding width that is equal to or greater
        // than Wmemcella.
        R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
        r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
    } else { //SRAM
        V_b_pre = g_tp.sram.Vbitpre;
        v_th_mem_cell = g_tp.sram_cell.Vth;
        V_wl = g_tp.sram_cell.Vdd;
        R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
        R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);

        //Leakage current of an SRAM cell
        //TODO: how much is the idle time? just by *2?
        double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  1, nmos,
                                         false, true);
        double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  2, nmos,
                                             false, true);
        double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
                                         g_tp.sram.cell_pmos_w, 1, inv, false,
                                         true) * 2;//two invs per cell

        leak_power_cc_inverters_sram_cell         = Icell * g_tp.sram_cell.Vdd;
        leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
        leak_power_RD_port_sram_cell              = Iport_erp * g_tp.sram_cell.Vdd;


        //in idle state, Ig_on only possibly exist in access transistors of read only ports
        double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
                                             false, true);
        double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
                                         g_tp.sram.cell_pmos_w, 1, inv, false,
                                         true);

        gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
        gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
    }


    double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
                                      camFlag ? cam_cell.w : cell.w /
                                      (2 * (RWP + ERP + SCHP)), is_dram);
    double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
    double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
                                            camFlag ? cam_cell.w :
                                            cell.w * deg_bl_muxing /
                                            (RWP + ERP + SCHP), is_dram);
    double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
    double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
                                      is_dram) +
        drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
                 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
        drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
                 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
    double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
                                            camFlag ? cam_cell.w :
                                            cell.w * deg_bl_muxing /
                                            (RWP + ERP + SCHP), is_dram);

    if (is_dram) {
        double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
                                          g_tp.dram_cell_C /
                                          (g_tp.dram_cell_C + C_bl));
        tstep = 2.3 * fraction * r_dev *
            (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
                                 C_sense_amp_latch + C_drain_sense_amp_mux)) /
            (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
                                 C_sense_amp_latch + C_drain_sense_amp_mux));
        delay_writeback = tstep;
        dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
                        C_drain_sense_amp_mux) *
            (g_tp.dram_cell_Vdd / 2) *
            g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
        dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
            (g_tp.dram_cell_Vdd / 2) *
            g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
            num_act_mats_hor_dir * 100;
        per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
                                   C_sense_amp_latch + C_drain_sense_amp_mux) *
            (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
    } else {
        double tau;

        if (deg_bl_muxing > 1) {
            tau = (R_cell_pull_down + R_cell_acc) *
                (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
                 C_sense_amp_latch + C_drain_sense_amp_mux) +
                R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
                        C_drain_sense_amp_iso + C_sense_amp_latch +
                        C_drain_sense_amp_mux) +
                R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
                             C_sense_amp_latch + C_drain_sense_amp_mux) +
                R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
                                   C_drain_sense_amp_mux);
            dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
                g_tp.sram_cell.Vdd;
            dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
                            C_drain_sense_amp_mux) *
                2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
                (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
                 deg_bl_muxing);
            dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
                                deg_bl_muxing) / deg_senseamp_muxing) *
                num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
                g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
            //Write Ops are differential for SRAM
        } else {
            tau = (R_cell_pull_down + R_cell_acc) *
                  (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
                  R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
            dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
                           2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
            dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
                                 deg_bl_muxing) / deg_senseamp_muxing) *
                               num_act_mats_hor_dir * C_bl) *
                g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;

        }
        tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
        power_bitline.readOp.leakage =
            leak_power_cc_inverters_sram_cell +
            leak_power_acc_tr_RW_or_WR_port_sram_cell +
            leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
            leak_power_RD_port_sram_cell * ERP;
        power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
                                            gate_leak_power_RD_port_sram_cell * ERP;

    }

//  cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
//  cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
//  cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
//  cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;


    /* take input rise time into account */
    double m = V_wl / inrisetime;
    if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
        delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
    } else {
        delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
    }

    bool is_fa = (dp.fully_assoc) ? true : false;

    if (dp.is_tag == false || is_fa == false) {
        power_bitline.readOp.dynamic  = dynRdEnergy;
        power_bitline.writeOp.dynamic = dynWriteEnergy;
    }

    double outrisetime = 0;
    return outrisetime;
}



double Mat::compute_sa_delay(double inrisetime) {
    //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray

    //Bitline circuitry leakage.
    double Iiso     = simplified_pmos_leakage(g_tp.w_iso, is_dram);
    double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
    double IsenseN  = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
    double IsenseP  = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);

    double lkgIdlePh  = IsenseEn;//+ 2*IoBufP;
    //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
    double lkgReadPh  = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
    //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
    //    lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
    double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
    leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
    leak_power_sense_amps_open_page_state   = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;

    // sense amplifier has to drive logic in "data out driver" and sense precharge load.
    // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
    //constant as well as the magnitude of input differential voltage.
    double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
        drain_C_(g_tp.w_sense_n, NCH, 1, 0,
                 camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
                 (RWP + ERP + SCHP), is_dram) +
        drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
                 is_dram) +
        drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
                 is_dram) +
        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
                 is_dram);
    double tau = C_ld / g_tp.gm_sense_amp_latch;
    delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
    power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
                            num_subarrays_per_mat * num_act_mats_hor_dir*/;
    power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;

    double outrisetime = 0;
    return outrisetime;
}



double Mat::compute_subarray_out_drv(double inrisetime) {
    double C_ld, rd, tf, this_delay;
    double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);

    // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
    rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
    C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
                                     camFlag ? cam_cell.w : cell.w *
                                     deg_bl_muxing / (RWP + ERP + SCHP),
                                     is_dram) +
        gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
    tf = rd * C_ld;
    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
    delay_subarray_out_drv += this_delay;
    inrisetime = this_delay / (1.0 - 0.5);
    power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
    power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
    power_subarray_out_drv.readOp.gate_leakage +=
        cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
    // delay of signal through inverter-buffer to second level of sense-amp mux.
    // internal delay of buffer
    rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
    C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
           drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
           gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
    tf = rd * C_ld;
    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
    delay_subarray_out_drv += this_delay;
    inrisetime = this_delay / (1.0 - 0.5);
    power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
    power_subarray_out_drv.readOp.leakage +=
        cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
                          inv, is_dram) * g_tp.peri_global.Vdd;
    power_subarray_out_drv.readOp.gate_leakage +=
        cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
                        inv) * g_tp.peri_global.Vdd;

    // inverter driving drain of pass transistor of second level of sense-amp mux.
    rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
    C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
        drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
                 is_dram) +
        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
                 cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
                 (RWP + ERP + SCHP), is_dram);
    tf = rd * C_ld;
    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
    delay_subarray_out_drv += this_delay;
    inrisetime = this_delay / (1.0 - 0.5);
    power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
    power_subarray_out_drv.readOp.leakage +=
        cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
                          inv) * g_tp.peri_global.Vdd;
    power_subarray_out_drv.readOp.gate_leakage +=
        cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
                        inv) * g_tp.peri_global.Vdd;


    // delay of signal through pass-transistor to input of subarray output driver.
    rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
    C_ld = dp.Ndsam_lev_2 *
        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
                 cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
                 is_dram) +
           //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
        gate_C(subarray_out_wire->repeater_size *
               (subarray_out_wire->wire_length /
                subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
               (1 + p_to_n_sz_r), 0.0, is_dram);
    tf = rd * C_ld;
    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
    delay_subarray_out_drv += this_delay;
    inrisetime = this_delay / (1.0 - 0.5);
    power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
    power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
    power_subarray_out_drv.readOp.gate_leakage +=
        cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;


    return inrisetime;
}



double Mat::compute_comparator_delay(double inrisetime) {
    int A = g_ip->tag_assoc;

    int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
    // a multiple of 4.

    /* First Inverter */
    double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
                 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
                 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
    double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
    double tf  = Req * Ceq;
    double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
    double nextinputtime = st1del / VTHCOMPINV;
    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;

    //For each degree of associativity
    //there are 4 such quarter comparators
    double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
                                          g_tp.w_comp_inv_p1, 1, inv,
                                          is_dram) * 4 * A;
    double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
                                            g_tp.w_comp_inv_p1, 1, inv,
                                            is_dram) * 4 * A;
    /* Second Inverter */
    Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
          drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
          drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
    Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
    tf = Req * Ceq;
    double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
    nextinputtime = st2del / (1.0 - VTHCOMPINV);
    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
                                    inv, is_dram) * 4 * A;
    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
                                      inv, is_dram) * 4 * A;

    /* Third Inverter */
    Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
          drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
          drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
    Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
    tf = Req * Ceq;
    double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
    nextinputtime = st3del / (VTHEVALINV);
    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
                                    inv, is_dram) * 4 * A;
    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
                                      1, inv, is_dram) * 4 * A;

    /* Final Inverter (virtual ground driver) discharging compare part */
    double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
    double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
    double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
                                       g_tp.cell_h_def, is_dram) +
                              drain_C_(g_tp.w_comp_n, NCH, 2, 1,
                                       g_tp.cell_h_def, is_dram)) +
        drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
        drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
    double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
                                       g_tp.cell_h_def, is_dram) +
                              drain_C_(g_tp.w_comp_n, NCH, 2, 1,
                                       g_tp.cell_h_def, is_dram)) +
        drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
        gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
    power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
    power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *  (A - 1);
    lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
                                    inv, is_dram) * 4 * A;
    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
                                    is_dram) * 4 * A; // stack factor of 0.2

    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
                                      inv, is_dram) * 4 * A;
    //for gate leakage this equals to a inverter
    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
                                      is_dram) * 4 * A;

    /* time to go to threshold of mux driver */
    double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
    /* take into account non-zero input rise time */
    double m = g_tp.peri_global.Vdd / nextinputtime;
    double Tcomparatorni;

    if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
        double a = m;
        double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
                        g_tp.peri_global.Vth);
        double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
                                   g_tp.peri_global.Vth) + 1 / m *
            ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
            ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
        Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
    } else {
        Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
                                   g_tp.peri_global.Vth) / (2 * m) -
            (g_tp.peri_global.Vdd * VTHEVALINV) / m;
    }
    delay_comparator = Tcomparatorni + st1del + st2del + st3del;
    power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
    power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;

    return Tcomparatorni / (1.0 - VTHMUXNAND);;
}



void Mat::compute_power_energy() {
    //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
    //when search all subarrays and all mats are fully active
    //when plain read/write only one subarray in a single mat is active.

    // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
    power.readOp.dynamic += r_predec->power.readOp.dynamic +
                            b_mux_predec->power.readOp.dynamic +
                            sa_mux_lev_1_predec->power.readOp.dynamic +
                            sa_mux_lev_2_predec->power.readOp.dynamic;

    // add energy consumed in decoders
    power_row_decoders.readOp.dynamic        = row_dec->power.readOp.dynamic;
    if (!(is_fa || pure_cam))
        power_row_decoders.readOp.dynamic        *= num_subarrays_per_mat;

    // add energy consumed in bitline prechagers, SAs, and bitlines
    if (!(is_fa || pure_cam)) {
        // add energy consumed in bitline prechagers
        power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
        power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;

        //Add sense amps energy
        num_sa_subarray = subarray.num_cols / deg_bl_muxing;
        power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;

        // add energy consumed in bitlines
        //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
        power_bitline.readOp.dynamic *= num_subarrays_per_mat *
            subarray.num_cols;
        power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
            subarray.num_cols;
        //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
        //Add subarray output energy
        power_subarray_out_drv.readOp.dynamic =
            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;

        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
                                power_sa.readOp.dynamic +
                                power_bitline.readOp.dynamic +
                                power_subarray_out_drv.readOp.dynamic;

        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
                                bit_mux_dec->power.readOp.dynamic +
                                sa_mux_lev_1_dec->power.readOp.dynamic +
                                sa_mux_lev_2_dec->power.readOp.dynamic +
                                power_comparator.readOp.dynamic;
    }

    else if (is_fa) {
        //for plain read/write only one subarray in a mat is active
        // add energy consumed in bitline prechagers
        power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
                + cam_bl_precharge_eq_drv->power.readOp.dynamic;
        power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;

        //Add sense amps energy
        num_sa_subarray = (subarray.num_cols_fa_cam +
                           subarray.num_cols_fa_ram) / deg_bl_muxing;
        num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
        power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
            num_sa_subarray_search;
        power_sa.readOp.dynamic *= num_sa_subarray;


        // add energy consumed in bitlines
        power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
        power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
                                         subarray.num_cols_fa_ram);
        power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
                                          subarray.num_cols_fa_ram);
        power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;

        //Add subarray output energy
        power_subarray_out_drv.searchOp.dynamic =
            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
        power_subarray_out_drv.readOp.dynamic =
            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;


        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
                                power_sa.readOp.dynamic +
                                power_bitline.readOp.dynamic +
                                power_subarray_out_drv.readOp.dynamic;

        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
                                bit_mux_dec->power.readOp.dynamic +
                                sa_mux_lev_1_dec->power.readOp.dynamic +
                                sa_mux_lev_2_dec->power.readOp.dynamic +
                                power_comparator.readOp.dynamic;

        //add energy consumed inside cam
        power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
        power_searchline_precharge = sl_precharge_eq_drv->power;
        power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
        power_searchline = sl_data_drv->power;
        power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
            subarray.num_cols_fa_cam * num_subarrays_per_mat;;
        power_matchline_precharge  = ml_precharge_drv->power;
        power_matchline_precharge.searchOp.dynamic =
            power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
        power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
        power_ml_to_ram_wl_drv.searchOp.dynamic =
            ml_to_ram_wl_drv->power.readOp.dynamic;

        power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
        power_cam_all_active.searchOp.dynamic +=
            power_searchline_precharge.searchOp.dynamic;
        power_cam_all_active.searchOp.dynamic +=
            power_searchline.searchOp.dynamic;
        power_cam_all_active.searchOp.dynamic +=
            power_matchline_precharge.searchOp.dynamic;

        power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
        //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;

    } else {
        // add energy consumed in bitline prechagers
        power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
        //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
        //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
        //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;

        //Add sense amps energy
        num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
        power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
        power_sa.searchOp.dynamic = 0;

        power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
        power_bitline.searchOp.dynamic = 0;
        power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;

        power_subarray_out_drv.searchOp.dynamic =
            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
        power_subarray_out_drv.readOp.dynamic =
            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;

        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
                                power_sa.readOp.dynamic +
                                power_bitline.readOp.dynamic +
                                power_subarray_out_drv.readOp.dynamic;

        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
                                bit_mux_dec->power.readOp.dynamic +
                                sa_mux_lev_1_dec->power.readOp.dynamic +
                                sa_mux_lev_2_dec->power.readOp.dynamic +
                                power_comparator.readOp.dynamic;


        ////add energy consumed inside cam
        power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
        power_searchline_precharge = sl_precharge_eq_drv->power;
        power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
        power_searchline = sl_data_drv->power;
        power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
            subarray.num_cols_fa_cam * num_subarrays_per_mat;;
        power_matchline_precharge  = ml_precharge_drv->power;
        power_matchline_precharge.searchOp.dynamic =
            power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
        power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
        power_ml_to_ram_wl_drv.searchOp.dynamic =
            ml_to_ram_wl_drv->power.readOp.dynamic;

        power_cam_all_active.searchOp.dynamic =
            power_matchline.searchOp.dynamic;
        power_cam_all_active.searchOp.dynamic +=
            power_searchline_precharge.searchOp.dynamic;
        power_cam_all_active.searchOp.dynamic +=
            power_searchline.searchOp.dynamic;
        power_cam_all_active.searchOp.dynamic +=
            power_matchline_precharge.searchOp.dynamic;

        power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
        //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;

    }



    // calculate leakage power
    if (!(is_fa || pure_cam)) {
        int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);

        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
        power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
            (RWP + ERP);

        //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
        power_subarray_out_drv.readOp.leakage =
            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);

        power.readOp.leakage += power_bitline.readOp.leakage +
                                power_bl_precharge_eq_drv.readOp.leakage +
                                power_sa.readOp.leakage +
                                power_subarray_out_drv.readOp.leakage;
        //cout<<"leakage"<<power.readOp.leakage<<endl;

        power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
        power.readOp.leakage += power_comparator.readOp.leakage;

        //cout<<"leakage1"<<power.readOp.leakage<<endl;

        // leakage power
        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
        power_bit_mux_decoders.readOp.leakage      = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
        power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
        power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;

        power.readOp.leakage += r_predec->power.readOp.leakage +
                                b_mux_predec->power.readOp.leakage +
                                sa_mux_lev_1_predec->power.readOp.leakage +
                                sa_mux_lev_2_predec->power.readOp.leakage +
                                power_row_decoders.readOp.leakage +
                                power_bit_mux_decoders.readOp.leakage +
                                power_sa_mux_lev_1_decoders.readOp.leakage +
                                power_sa_mux_lev_2_decoders.readOp.leakage;
        //cout<<"leakage2"<<power.readOp.leakage<<endl;

        //++++Below is gate leakage
        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
        power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
        power_sa.readOp.gate_leakage *= num_sa_subarray *
            num_subarrays_per_mat * (RWP + ERP);

        //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
        power_subarray_out_drv.readOp.gate_leakage =
            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);

        power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
                                     power_bl_precharge_eq_drv.readOp.gate_leakage +
                                     power_sa.readOp.gate_leakage +
                                     power_subarray_out_drv.readOp.gate_leakage;
        //cout<<"leakage"<<power.readOp.leakage<<endl;

        power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
        power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;

        //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;

        // gate_leakage power
        power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
        power_bit_mux_decoders.readOp.gate_leakage      = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
        power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
        power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;

        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
                                     b_mux_predec->power.readOp.gate_leakage +
                                     sa_mux_lev_1_predec->power.readOp.gate_leakage +
                                     sa_mux_lev_2_predec->power.readOp.gate_leakage +
                                     power_row_decoders.readOp.gate_leakage +
                                     power_bit_mux_decoders.readOp.gate_leakage +
                                     power_sa_mux_lev_1_decoders.readOp.gate_leakage +
                                     power_sa_mux_lev_2_decoders.readOp.gate_leakage;
    } else if (is_fa) {
        int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);

        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
        power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
        power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
            (RWP + ERP + SCHP);

        //cout<<"leakage3"<<power.readOp.leakage<<endl;


        power_subarray_out_drv.readOp.leakage =
            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);

        power.readOp.leakage += power_bitline.readOp.leakage +
                                power_bl_precharge_eq_drv.readOp.leakage +
                                power_bl_precharge_eq_drv.searchOp.leakage +
                                power_sa.readOp.leakage +
                                power_subarray_out_drv.readOp.leakage;

        //cout<<"leakage4"<<power.readOp.leakage<<endl;

        // leakage power
        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
        power.readOp.leakage += r_predec->power.readOp.leakage +
                                power_row_decoders.readOp.leakage;

        //cout<<"leakage5"<<power.readOp.leakage<<endl;

        //inside cam
        power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
        power_cam_all_active.searchOp.leakage +=
            sl_precharge_eq_drv->power.readOp.leakage;
        power_cam_all_active.searchOp.leakage +=
            sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
        power_cam_all_active.searchOp.leakage +=
            ml_precharge_drv->power.readOp.dynamic;
        power_cam_all_active.searchOp.leakage *=
            num_subarrays_per_mat;

        power.readOp.leakage += power_cam_all_active.searchOp.leakage;

//	  cout<<"leakage6"<<power.readOp.leakage<<endl;

        //+++Below is gate leakage
        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
        power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
        power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
        power_sa.readOp.gate_leakage *= num_sa_subarray *
            num_subarrays_per_mat * (RWP + ERP + SCHP);

        //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;


        power_subarray_out_drv.readOp.gate_leakage =
            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);

        power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
                                     power_bl_precharge_eq_drv.readOp.gate_leakage +
                                     power_bl_precharge_eq_drv.searchOp.gate_leakage +
                                     power_sa.readOp.gate_leakage +
                                     power_subarray_out_drv.readOp.gate_leakage;

        //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;

        // gate_leakage power
        power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
                                     power_row_decoders.readOp.gate_leakage;

        //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;

        //inside cam
        power_cam_all_active.searchOp.gate_leakage =
            power_matchline.searchOp.gate_leakage;
        power_cam_all_active.searchOp.gate_leakage +=
            sl_precharge_eq_drv->power.readOp.gate_leakage;
        power_cam_all_active.searchOp.gate_leakage +=
            sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
        power_cam_all_active.searchOp.gate_leakage +=
            ml_precharge_drv->power.readOp.dynamic;
        power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;

        power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;

    } else {
        int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);

        //power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
        //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
        power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
            (RWP + ERP + SCHP);


        power_subarray_out_drv.readOp.leakage =
            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);

        power.readOp.leakage += //power_bitline.readOp.leakage +
            //power_bl_precharge_eq_drv.readOp.leakage +
            power_bl_precharge_eq_drv.searchOp.leakage +
            power_sa.readOp.leakage +
            power_subarray_out_drv.readOp.leakage;

        // leakage power
        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
            subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
        power.readOp.leakage += r_predec->power.readOp.leakage +
                                power_row_decoders.readOp.leakage;

        //inside cam
        power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
        power_cam_all_active.searchOp.leakage +=
            sl_precharge_eq_drv->power.readOp.leakage;
        power_cam_all_active.searchOp.leakage +=
            sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
        power_cam_all_active.searchOp.leakage +=
            ml_precharge_drv->power.readOp.dynamic;
        power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;

        power.readOp.leakage += power_cam_all_active.searchOp.leakage;

        //+++Below is gate leakage
        power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
        power_sa.readOp.gate_leakage *= num_sa_subarray *
            num_subarrays_per_mat * (RWP + ERP + SCHP);


        power_subarray_out_drv.readOp.gate_leakage =
            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);

        power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
            //power_bl_precharge_eq_drv.readOp.gate_leakage +
            power_bl_precharge_eq_drv.searchOp.gate_leakage +
            power_sa.readOp.gate_leakage +
            power_subarray_out_drv.readOp.gate_leakage;

        // gate_leakage power
        power_row_decoders.readOp.gate_leakage =
            row_dec->power.readOp.gate_leakage * subarray.num_rows *
            num_subarrays_per_mat * (RWP + ERP + EWP);
        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
                                     power_row_decoders.readOp.gate_leakage;

        //inside cam
        power_cam_all_active.searchOp.gate_leakage =
            power_matchline.searchOp.gate_leakage;
        power_cam_all_active.searchOp.gate_leakage +=
            sl_precharge_eq_drv->power.readOp.gate_leakage;
        power_cam_all_active.searchOp.gate_leakage +=
            sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
        power_cam_all_active.searchOp.gate_leakage +=
            ml_precharge_drv->power.readOp.dynamic;
        power_cam_all_active.searchOp.gate_leakage *=
            num_subarrays_per_mat;

        power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
    }
}