uboot/drivers/ram/octeon/octeon3_lmc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2020 Marvell International Ltd.
   4 */
   5
   6#include <command.h>
   7#include <dm.h>
   8#include <hang.h>
   9#include <i2c.h>
  10#include <ram.h>
  11#include <time.h>
  12
  13#include <linux/bitops.h>
  14#include <linux/io.h>
  15
  16#include <mach/octeon_ddr.h>
  17
  18/* Random number generator stuff */
  19
  20#define CVMX_OCT_DID_RNG        8ULL
  21
  22static u64 cvmx_rng_get_random64(void)
  23{
  24        return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0));
  25}
  26
  27static void cvmx_rng_enable(void)
  28{
  29        u64 val;
  30
  31        val = csr_rd(CVMX_RNM_CTL_STATUS);
  32        val |= BIT(0) | BIT(1);
  33        csr_wr(CVMX_RNM_CTL_STATUS, val);
  34}
  35
  36#define RLEVEL_PRINTALL_DEFAULT         1
  37#define WLEVEL_PRINTALL_DEFAULT         1
  38
  39/*
  40 * Define how many HW WL samples to take for majority voting.
  41 * MUST BE odd!!
  42 * Assume there should only be 2 possible values that will show up,
  43 * so treat ties as a problem!!!
  44 * NOTE: Do not change this without checking the code!!!
  45 */
  46#define WLEVEL_LOOPS_DEFAULT            5
  47
  48#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
  49#define SW_WLEVEL_HW_DEFAULT            1
  50#define DEFAULT_BEST_RANK_SCORE         9999999
  51#define MAX_RANK_SCORE_LIMIT            99
  52
  53/*
  54 * Define how many HW RL samples per rank to take multiple samples will
  55 * allow looking for the best sample score
  56 */
  57#define RLEVEL_SAMPLES_DEFAULT          3
  58
  59#define ddr_seq_print(format, ...) do {} while (0)
  60
  61struct wlevel_bitcnt {
  62        int bitcnt[4];
  63};
  64
  65static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
  66                                     int ecc_ena, int *settings, char *title);
  67
  68static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
  69                                        int dac_value, int byte);
  70
  71/* "mode" arg */
  72#define DBTRAIN_TEST 0
  73#define DBTRAIN_DBI  1
  74#define DBTRAIN_LFSR 2
  75
  76static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
  77                                int mode, u64 *xor_data);
  78
  79#define LMC_DDR3_RESET_ASSERT   0
  80#define LMC_DDR3_RESET_DEASSERT 1
  81
  82static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset)
  83{
  84        union cvmx_lmcx_reset_ctl reset_ctl;
  85
  86        /*
  87         * 4. Deassert DDRn_RESET_L pin by writing
  88         *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
  89         *    without modifying any other LMC(0..3)_RESET_CTL fields.
  90         * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
  91         * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
  92         *    delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE*
  93         *    assertion.
  94         */
  95        debug("LMC%d %s DDR_RESET_L\n", if_num,
  96              (reset ==
  97               LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
  98
  99        reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
 100        reset_ctl.cn78xx.ddr3rst = reset;
 101        lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64);
 102
 103        lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
 104
 105        udelay(500);
 106}
 107
 108static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num)
 109{
 110        /*
 111         * 5.9.6 LMC RESET Initialization
 112         *
 113         * The purpose of this step is to assert/deassert the RESET# pin at the
 114         * DDR3/DDR4 parts.
 115         *
 116         * This LMC RESET step is done for all enabled LMCs.
 117         *
 118         * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
 119         * are in self refresh and are currently preserving their
 120         * contents. (Software can determine this via
 121         * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
 122         * this section assumes that the DRAM contents need not be preserved.
 123         *
 124         * The remainder of this section assumes that the CN78XX DDRn_RESET_L
 125         * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts,
 126         * as will be appropriate in many systems.
 127         *
 128         * (In other systems, such as ones that can preserve DDR3/DDR4 part
 129         * contents while CN78XX is powered down, it will not be appropriate to
 130         * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
 131         * DDR3/DDR4 parts, and this section may not apply.)
 132         *
 133         * The remainder of this section describes the sequence for LMCn.
 134         *
 135         * Perform the following six substeps for LMC reset initialization:
 136         *
 137         * 1. If not done already, assert DDRn_RESET_L pin by writing
 138         * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
 139         * LMC(0..3)_RESET_CTL fields.
 140         */
 141
 142        if (!ddr_memory_preserved(priv)) {
 143                /*
 144                 * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
 145                 */
 146
 147                lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
 148
 149                /*
 150                 * 3. Wait until RESET# assertion-time requirement from JEDEC
 151                 * DDR3/DDR4 specification is satisfied (200 us during a
 152                 * power-on ramp, 100ns when power is already stable).
 153                 */
 154
 155                udelay(200);
 156
 157                /*
 158                 * 4. Deassert DDRn_RESET_L pin by writing
 159                 *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
 160                 *    without modifying any other LMC(0..3)_RESET_CTL fields.
 161                 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
 162                 * 6. Wait a minimum of 500us. This guarantees the necessary
 163                 *    T = 500us delay between DDRn_RESET_L deassertion and
 164                 *    DDRn_DIMM*_CKE* assertion.
 165                 */
 166                cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
 167
 168                /* Toggle Reset Again */
 169                /* That is, assert, then de-assert, one more time */
 170                cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT);
 171                cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
 172        }
 173}
 174
 175void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
 176                   int sequence)
 177{
 178        /*
 179         * 3. Without changing any other fields in LMC(0)_CONFIG, write
 180         *    LMC(0)_CONFIG[RANKMASK] then write both
 181         *    LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
 182         *    operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
 183         *    the ranks that will participate in the sequence.
 184         *
 185         *    The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
 186         *    selfrefresh exit, depending on whether the DRAM parts are in
 187         *    self-refresh and whether their contents should be preserved. While
 188         *    LMC performs these sequences, it will not perform any other DDR3
 189         *    transactions. When the sequence is complete, hardware sets the
 190         *    LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
 191         *    initialized.
 192         *
 193         *    If power-up/init is selected immediately following a DRESET
 194         *    assertion, LMC executes the sequence described in the "Reset and
 195         *    Initialization Procedure" section of the JEDEC DDR3
 196         *    specification. This includes activating CKE, writing all four DDR3
 197         *    mode registers on all selected ranks, and issuing the required
 198         *    ZQCL
 199         *    command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
 200         *    with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
 201         *    LMC writes the JEDEC standard SSTE32882 control words selected by
 202         *    LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
 203         *    the first DDR3 mode register write operation.
 204         *    LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
 205         *    corresponding DIMM is not present.
 206         *
 207         *    If self-refresh exit is selected, LMC executes the required SRX
 208         *    command followed by a refresh and ZQ calibration. Section 4.5
 209         *    describes behavior of a REF + ZQCS.  LMC does not write the DDR3
 210         *    mode registers as part of this sequence, and the mode register
 211         *    parameters must match at self-refresh entry and exit times.
 212         *
 213         * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE]
 214         *    to be set.
 215         *
 216         * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
 217         *    been initialized.
 218         */
 219
 220        union cvmx_lmcx_seq_ctl seq_ctl;
 221        union cvmx_lmcx_config lmc_config;
 222        int timeout;
 223
 224        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
 225        lmc_config.s.rankmask = rank_mask;
 226        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
 227
 228        seq_ctl.u64 = 0;
 229
 230        seq_ctl.s.init_start = 1;
 231        seq_ctl.s.seq_sel = sequence;
 232
 233        ddr_seq_print
 234            ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n",
 235             rank_mask, sequence, sequence_str[sequence]);
 236
 237        if (seq_ctl.s.seq_sel == 3)
 238                debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num,
 239                      rank_mask);
 240
 241        lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64);
 242        lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
 243
 244        timeout = 100;
 245        do {
 246                udelay(100);    /* Wait a while */
 247                seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
 248                if (--timeout == 0) {
 249                        printf("Sequence %d timed out\n", sequence);
 250                        break;
 251                }
 252        } while (seq_ctl.s.seq_complete != 1);
 253
 254        ddr_seq_print("           LMC sequence=%x: Completed.\n", sequence);
 255}
 256
 257#define bdk_numa_get_address(n, p)      ((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT)
 258#define AREA_BASE_OFFSET                BIT_ULL(26)
 259
 260static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p,
 261                            u64 bitmask, u64 *xor_data)
 262{
 263        u64 p1, p2, d1, d2;
 264        u64 v, v1;
 265        u64 p2offset = (1ULL << 26);    // offset to area 2
 266        u64 datamask;
 267        u64 xor;
 268        u64 i, j, k;
 269        u64 ii;
 270        int errors = 0;
 271        //u64 index;
 272        u64 pattern1 = cvmx_rng_get_random64();
 273        u64 pattern2 = 0;
 274        u64 bad_bits[2] = { 0, 0 };
 275        int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
 276        union cvmx_l2c_ctl l2c_ctl;
 277        int burst;
 278        int saved_dissblkdty;
 279        int node = 0;
 280
 281        // Force full cacheline write-backs to boost traffic
 282        l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
 283        saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty;
 284        l2c_ctl.cn78xx.dissblkdty = 1;
 285        l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64);
 286
 287        if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
 288                kbitno = 18;
 289
 290        // Byte lanes may be clear in the mask to indicate no testing on that
 291        //lane.
 292        datamask = bitmask;
 293
 294        /*
 295         * Add offset to both test regions to not clobber boot stuff
 296         * when running from L2 for NAND boot.
 297         */
 298        p += AREA_BASE_OFFSET;  // make sure base is out of the way of boot
 299
 300        // final address must include LMC and node
 301        p |= (lmc << 7);        /* Map address into proper interface */
 302        p = bdk_numa_get_address(node, p);      /* Map to node */
 303        p |= 1ull << 63;
 304
 305#define II_INC BIT_ULL(22)
 306#define II_MAX BIT_ULL(22)
 307#define K_INC  BIT_ULL(14)
 308#define K_MAX  BIT_ULL(kbitno)
 309#define J_INC  BIT_ULL(9)
 310#define J_MAX  BIT_ULL(12)
 311#define I_INC  BIT_ULL(3)
 312#define I_MAX  BIT_ULL(7)
 313
 314        debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n",
 315              node, lmc, __func__, p, p + p2offset, 1ULL << kbitno);
 316
 317        // loops are ordered so that only a single 64-bit slot is written to
 318        // each cacheline at one time, then the cachelines are forced out;
 319        // this should maximize read/write traffic
 320
 321        // FIXME? extend the range of memory tested!!
 322        for (ii = 0; ii < II_MAX; ii += II_INC) {
 323                for (i = 0; i < I_MAX; i += I_INC) {
 324                        for (k = 0; k < K_MAX; k += K_INC) {
 325                                for (j = 0; j < J_MAX; j += J_INC) {
 326                                        p1 = p + ii + k + j;
 327                                        p2 = p1 + p2offset;
 328
 329                                        v = pattern1 * (p1 + i);
 330                                        // write the same thing to both areas
 331                                        v1 = v;
 332
 333                                        cvmx_write64_uint64(p1 + i, v);
 334                                        cvmx_write64_uint64(p2 + i, v1);
 335
 336                                        CVMX_CACHE_WBIL2(p1, 0);
 337                                        CVMX_CACHE_WBIL2(p2, 0);
 338                                }
 339                        }
 340                }
 341        }
 342
 343        CVMX_DCACHE_INVALIDATE;
 344
 345        debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc);
 346
 347        /* Make a series of passes over the memory areas. */
 348
 349        for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) {
 350                u64 this_pattern = cvmx_rng_get_random64();
 351
 352                pattern2 ^= this_pattern;
 353
 354                /*
 355                 * XOR the data with a random value, applying the change to both
 356                 * memory areas.
 357                 */
 358
 359                // FIXME? extend the range of memory tested!!
 360                for (ii = 0; ii < II_MAX; ii += II_INC) {
 361                        // FIXME: rearranged, did not make much difference?
 362                        for (i = 0; i < I_MAX; i += I_INC) {
 363                                for (k = 0; k < K_MAX; k += K_INC) {
 364                                        for (j = 0; j < J_MAX; j += J_INC) {
 365                                                p1 = p + ii + k + j;
 366                                                p2 = p1 + p2offset;
 367
 368                                                v = cvmx_read64_uint64(p1 +
 369                                                                      i) ^
 370                                                    this_pattern;
 371                                                v1 = cvmx_read64_uint64(p2 +
 372                                                                       i) ^
 373                                                    this_pattern;
 374
 375                                                cvmx_write64_uint64(p1 + i, v);
 376                                                cvmx_write64_uint64(p2 + i, v1);
 377
 378                                                CVMX_CACHE_WBIL2(p1, 0);
 379                                                CVMX_CACHE_WBIL2(p2, 0);
 380                                        }
 381                                }
 382                        }
 383                }
 384
 385                CVMX_DCACHE_INVALIDATE;
 386
 387                debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
 388                      node, lmc);
 389
 390                /*
 391                 * Look for differences in the areas. If there is a mismatch,
 392                 * reset both memory locations with the same pattern. Failing
 393                 * to do so means that on all subsequent passes the pair of
 394                 * locations remain out of sync giving spurious errors.
 395                 */
 396
 397                // FIXME: Change the loop order so that an entire cache line
 398                //        is compared at one time. This is so that a read
 399                //        error that occurs *anywhere* on the cacheline will
 400                //        be caught, rather than comparing only 1 cacheline
 401                //        slot at a time, where an error on a different
 402                //        slot will be missed that time around
 403                // Does the above make sense?
 404
 405                // FIXME? extend the range of memory tested!!
 406                for (ii = 0; ii < II_MAX; ii += II_INC) {
 407                        for (k = 0; k < K_MAX; k += K_INC) {
 408                                for (j = 0; j < J_MAX; j += J_INC) {
 409                                        p1 = p + ii + k + j;
 410                                        p2 = p1 + p2offset;
 411
 412                                        // process entire cachelines in the
 413                                        //innermost loop
 414                                        for (i = 0; i < I_MAX; i += I_INC) {
 415                                                int bybit = 1;
 416                                                // start in byte lane 0
 417                                                u64 bymsk = 0xffULL;
 418
 419                                                // FIXME: this should predict
 420                                                // what we find...???
 421                                                v = ((p1 + i) * pattern1) ^
 422                                                        pattern2;
 423                                                d1 = cvmx_read64_uint64(p1 + i);
 424                                                d2 = cvmx_read64_uint64(p2 + i);
 425
 426                                                // union of error bits only in
 427                                                // active byte lanes
 428                                                xor = ((d1 ^ v) | (d2 ^ v)) &
 429                                                        datamask;
 430
 431                                                if (!xor)
 432                                                        continue;
 433
 434                                                // accumulate bad bits
 435                                                bad_bits[0] |= xor;
 436
 437                                                while (xor != 0) {
 438                                                        debug("ERROR(%03d): [0x%016llX] [0x%016llX]  expected 0x%016llX d1 %016llX d2 %016llX\n",
 439                                                              burst, p1, p2, v,
 440                                                              d1, d2);
 441                                                        // error(s) in this lane
 442                                                        if (xor & bymsk) {
 443                                                                // set the byte
 444                                                                // error bit
 445                                                                errors |= bybit;
 446                                                                // clear byte
 447                                                                // lane in
 448                                                                // error bits
 449                                                                xor &= ~bymsk;
 450                                                                // clear the
 451                                                                // byte lane in
 452                                                                // the mask
 453                                                                datamask &= ~bymsk;
 454#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
 455                                                                // nothing
 456                                                                // left to do
 457                                                                if (datamask == 0) {
 458                                                                        return errors;
 459                                                                }
 460#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
 461                                                        }
 462                                                        // move mask into
 463                                                        // next byte lane
 464                                                        bymsk <<= 8;
 465                                                        // move bit into next
 466                                                        // byte position
 467                                                        bybit <<= 1;
 468                                                }
 469                                        }
 470                                        CVMX_CACHE_WBIL2(p1, 0);
 471                                        CVMX_CACHE_WBIL2(p2, 0);
 472                                }
 473                        }
 474                }
 475
 476                debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
 477                      node, lmc);
 478        }
 479
 480        if (xor_data) {         // send the bad bits back...
 481                xor_data[0] = bad_bits[0];
 482                xor_data[1] = bad_bits[1];      // let it be zeroed
 483        }
 484
 485        // Restore original setting that could enable partial cacheline writes
 486        l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
 487        l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty;
 488        l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64);
 489
 490        return errors;
 491}
 492
 493static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank,
 494                     int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
 495{
 496        union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
 497
 498        lmc_mr_mpr_ctl.u64 = 0;
 499        lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
 500        lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel;
 501        lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank;
 502        lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value =
 503                (mr_wr_addr == -1) ? 1 : 0;
 504        lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1;
 505        lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
 506
 507        /* Mode Register Write */
 508        oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
 509}
 510
 511#define INV_A0_17(x)    ((x) ^ 0x22bf8)
 512
 513static void set_mpr_mode(struct ddr_priv *priv, int rank_mask,
 514                         int if_num, int dimm_count, int mpr, int bg1)
 515{
 516        int rankx;
 517
 518        debug("All Ranks: Set mpr mode = %x %c-side\n",
 519              mpr, (bg1 == 0) ? 'A' : 'B');
 520
 521        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
 522                if (!(rank_mask & (1 << rankx)))
 523                        continue;
 524                if (bg1 == 0) {
 525                        /* MR3 A-side */
 526                        ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1);
 527                } else {
 528                        /* MR3 B-side */
 529                        ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3,
 530                                 bg1);
 531                }
 532        }
 533}
 534
 535static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num,
 536                             int rank, int page, int location)
 537{
 538        union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
 539
 540        lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num));
 541        lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0;
 542        lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
 543        lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
 544        lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
 545        lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0;       /* Read=0, Write=1 */
 546        lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
 547
 548        /* MPR register access sequence */
 549        oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
 550
 551        debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
 552              lmc_mr_mpr_ctl.u64);
 553        debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
 554              lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
 555        debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
 556              lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
 557        debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
 558              lmc_mr_mpr_ctl.cn70xx.mpr_loc);
 559        debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
 560              lmc_mr_mpr_ctl.cn70xx.mpr_wr);
 561}
 562
 563static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable)
 564{
 565        union cvmx_lmcx_control lmc_control;
 566        int save_rdimm_mode;
 567
 568        lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
 569        save_rdimm_mode = lmc_control.s.rdimm_ena;
 570        lmc_control.s.rdimm_ena = enable;
 571        debug("Setting RDIMM_ENA = %x\n", enable);
 572        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
 573
 574        return save_rdimm_mode;
 575}
 576
 577static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank,
 578                          int page, int location, u64 *mpr_data)
 579{
 580        do_ddr4_mpr_read(priv, if_num, rank, page, location);
 581
 582        mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
 583}
 584
 585/* Display MPR values for Page */
 586static void display_mpr_page(struct ddr_priv *priv, int rank_mask,
 587                             int if_num, int page)
 588{
 589        int rankx, location;
 590        u64 mpr_data[3];
 591
 592        for (rankx = 0; rankx < 4; rankx++) {
 593                if (!(rank_mask & (1 << rankx)))
 594                        continue;
 595
 596                debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ",
 597                      if_num, rankx, page);
 598                for (location = 0; location < 4; location++) {
 599                        ddr4_mpr_read(priv, if_num, rankx, page, location,
 600                                      mpr_data);
 601                        debug("0x%02llx ", mpr_data[0] & 0xFF);
 602                }
 603                debug("\n");
 604
 605        }                       /* for (rankx = 0; rankx < 4; rankx++) */
 606}
 607
 608static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank,
 609                           int page, int location, u8 mpr_data)
 610{
 611        union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
 612
 613        lmc_mr_mpr_ctl.u64 = 0;
 614        lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data;
 615        lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
 616        lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
 617        lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
 618        lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1;       /* Read=0, Write=1 */
 619        lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
 620
 621        /* MPR register access sequence */
 622        oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
 623
 624        debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
 625              lmc_mr_mpr_ctl.u64);
 626        debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
 627              lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
 628        debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
 629              lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
 630        debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
 631              lmc_mr_mpr_ctl.cn70xx.mpr_loc);
 632        debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
 633              lmc_mr_mpr_ctl.cn70xx.mpr_wr);
 634}
 635
 636static void set_vref(struct ddr_priv *priv, int if_num, int rank,
 637                     int range, int value)
 638{
 639        union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
 640        union cvmx_lmcx_modereg_params3 lmc_modereg_params3;
 641        int mr_wr_addr = 0;
 642
 643        lmc_mr_mpr_ctl.u64 = 0;
 644        lmc_modereg_params3.u64 = lmc_rd(priv,
 645                                         CVMX_LMCX_MODEREG_PARAMS3(if_num));
 646
 647        /* A12:A10 tCCD_L */
 648        mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10;
 649        mr_wr_addr |= 1 << 7;   /* A7 1 = Enable(Training Mode) */
 650        mr_wr_addr |= range << 6;       /* A6 vrefDQ Training Range */
 651        mr_wr_addr |= value << 0;       /* A5:A0 vrefDQ Training Value */
 652
 653        lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
 654        lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6;    /* Write MR6 */
 655        lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
 656        lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
 657
 658        /* 0x8 = Mode Register Write */
 659        oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
 660
 661        /*
 662         * It is vendor specific whether vref_value is captured with A7=1.
 663         * A subsequent MRS might be necessary.
 664         */
 665        oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
 666
 667        mr_wr_addr &= ~(1 << 7);        /* A7 0 = Disable(Training Mode) */
 668        lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
 669        lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
 670}
 671
 672static void set_dram_output_inversion(struct ddr_priv *priv, int if_num,
 673                                      int dimm_count, int rank_mask,
 674                                      int inversion)
 675{
 676        union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl;
 677        union cvmx_lmcx_dimmx_params lmc_dimmx_params;
 678        union cvmx_lmcx_dimm_ctl lmc_dimm_ctl;
 679        int dimm_no;
 680
 681        /* Don't touch extenced register control words */
 682        lmc_ddr4_dimm_ctl.u64 = 0;
 683        lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64);
 684
 685        debug("All DIMMs: Register Control Word          RC0 : %x\n",
 686              (inversion & 1));
 687
 688        for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
 689                lmc_dimmx_params.u64 =
 690                    lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num));
 691                lmc_dimmx_params.s.rc0 =
 692                    (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
 693
 694                lmc_wr(priv,
 695                       CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num),
 696                       lmc_dimmx_params.u64);
 697        }
 698
 699        /* LMC0_DIMM_CTL */
 700        lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
 701        lmc_dimm_ctl.s.dimm0_wmask = 0x1;
 702        lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
 703
 704        debug("LMC DIMM_CTL                                  : 0x%016llx\n",
 705              lmc_dimm_ctl.u64);
 706        lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64);
 707
 708        oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);    /* Init RCW */
 709}
 710
 711static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask,
 712                                    int if_num, int dimm_count, int pattern,
 713                                    int location_mask)
 714{
 715        int rankx;
 716        int location;
 717
 718        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
 719                if (!(rank_mask & (1 << rankx)))
 720                        continue;
 721                for (location = 0; location < 4; ++location) {
 722                        if (!(location_mask & (1 << location)))
 723                                continue;
 724
 725                        ddr4_mpr_write(priv, if_num, rankx,
 726                                       /* page */ 0, /* location */ location,
 727                                       pattern);
 728                }
 729        }
 730}
 731
 732static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask,
 733                                     int if_num, int dimm_count)
 734{
 735        int save_ref_zqcs_int;
 736        union cvmx_lmcx_config lmc_config;
 737
 738        /*
 739         * Okay, here is the latest sequence.  This should work for all
 740         * chips and passes (78,88,73,etc).  This sequence should be run
 741         * immediately after DRAM INIT.  The basic idea is to write the
 742         * same pattern into each of the 4 MPR locations in the DRAM, so
 743         * that the same value is returned when doing MPR reads regardless
 744         * of the inversion state.  My advice is to put this into a
 745         * function, change_rdimm_mpr_pattern or something like that, so
 746         * that it can be called multiple times, as I think David wants a
 747         * clock-like pattern for OFFSET training, but does not want a
 748         * clock pattern for Bit-Deskew.  You should then be able to call
 749         * this at any point in the init sequence (after DRAM init) to
 750         * change the pattern to a new value.
 751         * Mike
 752         *
 753         * A correction: PHY doesn't need any pattern during offset
 754         * training, but needs clock like pattern for internal vref and
 755         * bit-dskew training.  So for that reason, these steps below have
 756         * to be conducted before those trainings to pre-condition
 757         * the pattern.  David
 758         *
 759         * Note: Step 3, 4, 8 and 9 have to be done through RDIMM
 760         * sequence. If you issue MRW sequence to do RCW write (in o78 pass
 761         * 1 at least), LMC will still do two commands because
 762         * CONTROL[RDIMM_ENA] is still set high. We don't want it to have
 763         * any unintentional mode register write so it's best to do what
 764         * Mike is doing here.
 765         * Andrew
 766         */
 767
 768        /* 1) Disable refresh (REF_ZQCS_INT = 0) */
 769
 770        debug("1) Disable refresh (REF_ZQCS_INT = 0)\n");
 771
 772        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
 773        save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
 774        lmc_config.cn78xx.ref_zqcs_int = 0;
 775        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
 776
 777        /*
 778         * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
 779         * with MODEREG_PARAMS0[MPRLOC]=0,
 780         * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
 781         * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
 782         */
 783
 784        debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
 785
 786        /* A-side */
 787        set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0);
 788        /* B-side */
 789        set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1);
 790
 791        /*
 792         * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
 793         * the value you would like directly into
 794         * MR_MPR_CTL[MR_WR_ADDR]
 795         */
 796
 797        /*
 798         * 3) Disable RCD Parity (if previously enabled) - parity does not
 799         * work if inversion disabled
 800         */
 801
 802        debug("3) Disable RCD Parity\n");
 803
 804        /*
 805         * 4) Disable Inversion in the RCD.
 806         * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
 807         * may be easier to use the MRW sequence (seq_sel=8).  Just set
 808         * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
 809         * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg
 810         */
 811
 812        debug("4) Disable Inversion in the RCD.\n");
 813
 814        set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1);
 815
 816        /*
 817         * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
 818         * non-inverted.
 819         */
 820
 821        debug("5) Disable CONTROL[RDIMM_ENA]\n");
 822
 823        set_rdimm_mode(priv, if_num, 0);
 824
 825        /*
 826         * 6) Write all 4 MPR registers with the desired pattern (have to
 827         * do this for all enabled ranks)
 828         * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
 829         * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern
 830         */
 831
 832        debug("6) Write all 4 MPR page 0 Training Patterns\n");
 833
 834        write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8);
 835
 836        /* 7) Re-enable RDIMM_ENA */
 837
 838        debug("7) Re-enable RDIMM_ENA\n");
 839
 840        set_rdimm_mode(priv, if_num, 1);
 841
 842        /* 8) Re-enable RDIMM inversion */
 843
 844        debug("8) Re-enable RDIMM inversion\n");
 845
 846        set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0);
 847
 848        /* 9) Re-enable RDIMM parity (if desired) */
 849
 850        debug("9) Re-enable RDIMM parity (if desired)\n");
 851
 852        /*
 853         * 10)Take B-side devices out of MPR mode (Run MRW sequence
 854         * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
 855         * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
 856         * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
 857         */
 858
 859        debug("10)Take B-side devices out of MPR mode\n");
 860
 861        set_mpr_mode(priv, rank_mask, if_num, dimm_count,
 862                     /* mpr */ 0, /* bg1 */ 1);
 863
 864        /*
 865         * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
 866         * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR]
 867         */
 868
 869        /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
 870
 871        debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
 872
 873        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
 874        lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
 875        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
 876}
 877
 878static int validate_hwl_seq(int *wl, int *seq)
 879{
 880        // sequence index, step through the sequence array
 881        int seqx;
 882        int bitnum;
 883
 884        seqx = 0;
 885
 886        while (seq[seqx + 1] >= 0) {    // stop on next seq entry == -1
 887                // but now, check current versus next
 888                bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]];
 889                // magic validity number (see matrix above)
 890                if (!((1 << bitnum) & 0xBDE7))
 891                        return 1;
 892                seqx++;
 893        }
 894
 895        return 0;
 896}
 897
 898static int validate_hw_wl_settings(int if_num,
 899                                   union cvmx_lmcx_wlevel_rankx
 900                                   *lmc_wlevel_rank, int is_rdimm, int ecc_ena)
 901{
 902        int wl[9], byte, errors;
 903
 904        // arrange the sequences so
 905        // index 0 has byte 0, etc, ECC in middle
 906        int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 };
 907        // index 0 is ECC, then go down
 908        int rseq1[] = { 8, 3, 2, 1, 0, -1 };
 909        // index 0 has byte 4, then go up
 910        int rseq2[] = { 4, 5, 6, 7, -1 };
 911        // index 0 has byte 0, etc, no ECC
 912        int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 };
 913        // index 0 is byte 3, then go down, no ECC
 914        int rseq1no[] = { 3, 2, 1, 0, -1 };
 915
 916        // in the CSR, bytes 0-7 are always data, byte 8 is ECC
 917        for (byte = 0; byte < (8 + ecc_ena); byte++) {
 918                // preprocess :-)
 919                wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >>
 920                            1) & 3;
 921        }
 922
 923        errors = 0;
 924        if (is_rdimm) {         // RDIMM order
 925                errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
 926                errors += validate_hwl_seq(wl, rseq2);
 927        } else {                // UDIMM order
 928                errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno);
 929        }
 930
 931        return errors;
 932}
 933
 934static unsigned int extr_wr(u64 u, int x)
 935{
 936        return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) |
 937                              ((u >> (51 + x - 2)) & 0x4ULL));
 938}
 939
 940static void insrt_wr(u64 *up, int x, int v)
 941{
 942        u64 u = *up;
 943
 944        u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x)));
 945        *up = (u | ((v & 0x3ULL) << (x * 12 + 5)) |
 946               ((v & 0x4ULL) << (51 + x - 2)));
 947}
 948
 949/* Read out Deskew Settings for DDR */
 950
 951struct deskew_bytes {
 952        u16 bits[8];
 953};
 954
 955struct deskew_data {
 956        struct deskew_bytes bytes[9];
 957};
 958
 959struct dac_data {
 960        int bytes[9];
 961};
 962
 963// T88 pass 1, skip 4=DAC
 964static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 };
 965// T88 Pass 2, skip 4=DAC and 5=DBI
 966static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 };
 967
 968static void get_deskew_settings(struct ddr_priv *priv, int if_num,
 969                                struct deskew_data *dskdat)
 970{
 971        union cvmx_lmcx_phy_ctl phy_ctl;
 972        union cvmx_lmcx_config lmc_config;
 973        int bit_index;
 974        int byte_lane, byte_limit;
 975        // NOTE: these are for pass 2.x
 976        int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X);
 977        const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1;
 978
 979        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
 980        byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
 981
 982        memset(dskdat, 0, sizeof(*dskdat));
 983
 984        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
 985        phy_ctl.s.dsk_dbg_clk_scaler = 3;
 986
 987        for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
 988                phy_ctl.s.dsk_dbg_byte_sel = byte_lane; // set byte lane
 989
 990                for (bit_index = 0; bit_index < 8; ++bit_index) {
 991                        // set bit number and start read sequence
 992                        phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index];
 993                        phy_ctl.s.dsk_dbg_rd_start = 1;
 994                        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
 995
 996                        // poll for read sequence to complete
 997                        do {
 998                                phy_ctl.u64 =
 999                                        lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1000                        } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1001
1002                        // record the data
1003                        dskdat->bytes[byte_lane].bits[bit_index] =
1004                                phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
1005                }
1006        }
1007}
1008
1009static void display_deskew_settings(struct ddr_priv *priv, int if_num,
1010                                    struct deskew_data *dskdat,
1011                                    int print_enable)
1012{
1013        int byte_lane;
1014        int bit_num;
1015        u16 flags, deskew;
1016        union cvmx_lmcx_config lmc_config;
1017        int byte_limit;
1018        const char *fc = " ?-=+*#&";
1019
1020        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1021        byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1022
1023        if (print_enable) {
1024                debug("N0.LMC%d: Deskew Data:              Bit =>      :",
1025                      if_num);
1026                for (bit_num = 7; bit_num >= 0; --bit_num)
1027                        debug(" %3d  ", bit_num);
1028                debug("\n");
1029        }
1030
1031        for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1032                if (print_enable)
1033                        debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
1034                              if_num, byte_lane,
1035                              (print_enable >= 3) ? "FINAL" : "     ");
1036
1037                for (bit_num = 7; bit_num >= 0; --bit_num) {
1038                        flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
1039                        deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
1040
1041                        if (print_enable)
1042                                debug(" %3d %c", deskew, fc[flags ^ 1]);
1043
1044                }               /* for (bit_num = 7; bit_num >= 0; --bit_num) */
1045
1046                if (print_enable)
1047                        debug("\n");
1048        }
1049}
1050
1051static void override_deskew_settings(struct ddr_priv *priv, int if_num,
1052                                     struct deskew_data *dskdat)
1053{
1054        union cvmx_lmcx_phy_ctl phy_ctl;
1055        union cvmx_lmcx_config lmc_config;
1056
1057        int bit, byte_lane, byte_limit;
1058        u64 csr_data;
1059
1060        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1061        byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1062
1063        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1064
1065        phy_ctl.s.phy_reset = 0;
1066        phy_ctl.s.dsk_dbg_num_bits_sel = 1;
1067        phy_ctl.s.dsk_dbg_offset = 0;
1068        phy_ctl.s.dsk_dbg_clk_scaler = 3;
1069
1070        phy_ctl.s.dsk_dbg_wr_mode = 1;
1071        phy_ctl.s.dsk_dbg_load_dis = 0;
1072        phy_ctl.s.dsk_dbg_overwrt_ena = 0;
1073
1074        phy_ctl.s.phy_dsk_reset = 0;
1075
1076        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1077        lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1078
1079        for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1080                csr_data = 0;
1081                // FIXME: can we ignore DBI?
1082                for (bit = 0; bit < 8; ++bit) {
1083                        // fetch input and adjust
1084                        u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) &
1085                                0x7F;
1086
1087                        /*
1088                         * lmc_general_purpose0.data[6:0]    // DQ0
1089                         * lmc_general_purpose0.data[13:7]   // DQ1
1090                         * lmc_general_purpose0.data[20:14]  // DQ2
1091                         * lmc_general_purpose0.data[27:21]  // DQ3
1092                         * lmc_general_purpose0.data[34:28]  // DQ4
1093                         * lmc_general_purpose0.data[41:35]  // DQ5
1094                         * lmc_general_purpose0.data[48:42]  // DQ6
1095                         * lmc_general_purpose0.data[55:49]  // DQ7
1096                         * lmc_general_purpose0.data[62:56]  // DBI
1097                         */
1098                        csr_data |= (bits << (7 * bit));
1099
1100                } /* for (bit = 0; bit < 8; ++bit) */
1101
1102                // update GP0 with the bit data for this byte lane
1103                lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data);
1104                lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num));
1105
1106                // start the deskew load sequence
1107                phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1108                phy_ctl.s.dsk_dbg_rd_start = 1;
1109                lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1110
1111                // poll for read sequence to complete
1112                do {
1113                        udelay(100);
1114                        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1115                } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1116        }
1117
1118        // tell phy to use the new settings
1119        phy_ctl.s.dsk_dbg_overwrt_ena = 1;
1120        phy_ctl.s.dsk_dbg_rd_start = 0;
1121        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1122
1123        phy_ctl.s.dsk_dbg_wr_mode = 0;
1124        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1125}
1126
1127static void process_by_rank_dac(struct ddr_priv *priv, int if_num,
1128                                int rank_mask, struct dac_data *dacdat)
1129{
1130        union cvmx_lmcx_config lmc_config;
1131        int rankx, byte_lane;
1132        int byte_limit;
1133        int rank_count;
1134        struct dac_data dacsum;
1135        int lane_probs;
1136
1137        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1138        byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1139
1140        memset((void *)&dacsum, 0, sizeof(dacsum));
1141        rank_count = 0;
1142        lane_probs = 0;
1143
1144        for (rankx = 0; rankx < 4; rankx++) {
1145                if (!(rank_mask & (1 << rankx)))
1146                        continue;
1147                rank_count++;
1148
1149                display_dac_dbi_settings(if_num, /*dac */ 1,
1150                                         lmc_config.s.ecc_ena,
1151                                         &dacdat[rankx].bytes[0],
1152                                         "By-Ranks VREF");
1153                // sum
1154                for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1155                        if (rank_count == 2) {
1156                                int ranks_diff =
1157                                    abs((dacsum.bytes[byte_lane] -
1158                                         dacdat[rankx].bytes[byte_lane]));
1159
1160                                // FIXME: is 19 a good number?
1161                                if (ranks_diff > 19)
1162                                        lane_probs |= (1 << byte_lane);
1163                        }
1164                        dacsum.bytes[byte_lane] +=
1165                            dacdat[rankx].bytes[byte_lane];
1166                }
1167        }
1168
1169        // average
1170        for (byte_lane = 0; byte_lane < byte_limit; byte_lane++)
1171                dacsum.bytes[byte_lane] /= rank_count;  // FIXME: nint?
1172
1173        display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena,
1174                                 &dacsum.bytes[0], "All-Rank VREF");
1175
1176        if (lane_probs) {
1177                debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n",
1178                      if_num, lane_probs);
1179        }
1180
1181        // finally, write the averaged DAC values
1182        for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1183                load_dac_override(priv, if_num, dacsum.bytes[byte_lane],
1184                                  byte_lane);
1185        }
1186}
1187
1188static void process_by_rank_dsk(struct ddr_priv *priv, int if_num,
1189                                int rank_mask, struct deskew_data *dskdat)
1190{
1191        union cvmx_lmcx_config lmc_config;
1192        int rankx, lane, bit;
1193        int byte_limit;
1194        struct deskew_data dsksum, dskcnt;
1195        u16 deskew;
1196
1197        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1198        byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1199
1200        memset((void *)&dsksum, 0, sizeof(dsksum));
1201        memset((void *)&dskcnt, 0, sizeof(dskcnt));
1202
1203        for (rankx = 0; rankx < 4; rankx++) {
1204                if (!(rank_mask & (1 << rankx)))
1205                        continue;
1206
1207                // sum ranks
1208                for (lane = 0; lane < byte_limit; lane++) {
1209                        for (bit = 0; bit < 8; ++bit) {
1210                                deskew = dskdat[rankx].bytes[lane].bits[bit];
1211                                // if flags indicate sat hi or lo, skip it
1212                                if (deskew & 6)
1213                                        continue;
1214
1215                                // clear flags
1216                                dsksum.bytes[lane].bits[bit] +=
1217                                        deskew & ~7;
1218                                // count entries
1219                                dskcnt.bytes[lane].bits[bit] += 1;
1220                        }
1221                }
1222        }
1223
1224        // average ranks
1225        for (lane = 0; lane < byte_limit; lane++) {
1226                for (bit = 0; bit < 8; ++bit) {
1227                        int div = dskcnt.bytes[lane].bits[bit];
1228
1229                        if (div > 0) {
1230                                dsksum.bytes[lane].bits[bit] /= div;
1231                                // clear flags
1232                                dsksum.bytes[lane].bits[bit] &= ~7;
1233                                // set LOCK
1234                                dsksum.bytes[lane].bits[bit] |= 1;
1235                        } else {
1236                                // FIXME? use reset value?
1237                                dsksum.bytes[lane].bits[bit] =
1238                                        (64 << 3) | 1;
1239                        }
1240                }
1241        }
1242
1243        // TME for FINAL version
1244        display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3);
1245
1246        // finally, write the averaged DESKEW values
1247        override_deskew_settings(priv, if_num, &dsksum);
1248}
1249
1250struct deskew_counts {
1251        int saturated;          // number saturated
1252        int unlocked;           // number unlocked
1253        int nibrng_errs;        // nibble range errors
1254        int nibunl_errs;        // nibble unlocked errors
1255        int bitval_errs;        // bit value errors
1256};
1257
1258#define MIN_BITVAL  17
1259#define MAX_BITVAL 110
1260
1261static void validate_deskew_training(struct ddr_priv *priv, int rank_mask,
1262                                     int if_num, struct deskew_counts *counts,
1263                                     int print_flags)
1264{
1265        int byte_lane, bit_index, nib_num;
1266        int nibrng_errs, nibunl_errs, bitval_errs;
1267        union cvmx_lmcx_config lmc_config;
1268        s16 nib_min[2], nib_max[2], nib_unl[2];
1269        int byte_limit;
1270        int print_enable = print_flags & 1;
1271        struct deskew_data dskdat;
1272        s16 flags, deskew;
1273        const char *fc = " ?-=+*#&";
1274        int bit_last;
1275
1276        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1277        byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
1278
1279        memset(counts, 0, sizeof(struct deskew_counts));
1280
1281        get_deskew_settings(priv, if_num, &dskdat);
1282
1283        if (print_enable) {
1284                debug("N0.LMC%d: Deskew Settings:          Bit =>      :",
1285                      if_num);
1286                for (bit_index = 7; bit_index >= 0; --bit_index)
1287                        debug(" %3d  ", bit_index);
1288                debug("\n");
1289        }
1290
1291        for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1292                if (print_enable)
1293                        debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
1294                              if_num, byte_lane,
1295                              (print_flags & 2) ? "FINAL" : "     ");
1296
1297                nib_min[0] = 127;
1298                nib_min[1] = 127;
1299                nib_max[0] = 0;
1300                nib_max[1] = 0;
1301                nib_unl[0] = 0;
1302                nib_unl[1] = 0;
1303
1304                if (lmc_config.s.mode32b == 1 && byte_lane == 4) {
1305                        bit_last = 3;
1306                        if (print_enable)
1307                                debug("                        ");
1308                } else {
1309                        bit_last = 7;
1310                }
1311
1312                for (bit_index = bit_last; bit_index >= 0; --bit_index) {
1313                        nib_num = (bit_index > 3) ? 1 : 0;
1314
1315                        flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
1316                        deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
1317
1318                        counts->saturated += !!(flags & 6);
1319
1320                        // Do range calc even when locked; it could happen
1321                        // that a bit is still unlocked after final retry,
1322                        // and we want to have an external retry if a RANGE
1323                        // error is present at exit...
1324                        nib_min[nib_num] = min(nib_min[nib_num], deskew);
1325                        nib_max[nib_num] = max(nib_max[nib_num], deskew);
1326
1327                        if (!(flags & 1)) {     // only when not locked
1328                                counts->unlocked += 1;
1329                                nib_unl[nib_num] += 1;
1330                        }
1331
1332                        if (print_enable)
1333                                debug(" %3d %c", deskew, fc[flags ^ 1]);
1334                }
1335
1336                /*
1337                 * Now look for nibble errors
1338                 *
1339                 * For bit 55, it looks like a bit deskew problem. When the
1340                 * upper nibble of byte 6 needs to go to saturation, bit 7
1341                 * of byte 6 locks prematurely at 64. For DIMMs with raw
1342                 * card A and B, can we reset the deskew training when we
1343                 * encounter this case? The reset criteria should be looking
1344                 * at one nibble at a time for raw card A and B; if the
1345                 * bit-deskew setting within a nibble is different by > 33,
1346                 * we'll issue a reset to the bit deskew training.
1347                 *
1348                 * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
1349                 */
1350                // upper nibble range, then lower nibble range
1351                nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
1352                nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
1353
1354                // check for nibble all unlocked
1355                nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
1356
1357                // check for bit value errors, ie < 17 or > 110
1358                // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
1359                bitval_errs = ((nib_max[1] > MAX_BITVAL) ||
1360                               (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
1361                bitval_errs |= ((nib_min[1] < MIN_BITVAL) ||
1362                                (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
1363
1364                if ((nibrng_errs != 0 || nibunl_errs != 0 ||
1365                     bitval_errs != 0) && print_enable) {
1366                        debug(" %c%c%c",
1367                              (nibrng_errs) ? 'R' : ' ',
1368                              (nibunl_errs) ? 'U' : ' ',
1369                              (bitval_errs) ? 'V' : ' ');
1370                }
1371
1372                if (print_enable)
1373                        debug("\n");
1374
1375                counts->nibrng_errs |= (nibrng_errs << byte_lane);
1376                counts->nibunl_errs |= (nibunl_errs << byte_lane);
1377                counts->bitval_errs |= (bitval_errs << byte_lane);
1378        }
1379}
1380
1381static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
1382                                        int dac_value, int byte)
1383{
1384        union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1385        // single bytelanes incr by 1; A is for ALL
1386        int bytex = (byte == 0x0A) ? byte : byte + 1;
1387
1388        ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1389
1390        SET_DDR_DLL_CTL3(byte_sel, bytex);
1391        SET_DDR_DLL_CTL3(offset, dac_value >> 1);
1392
1393        ddr_dll_ctl3.cn73xx.bit_select = 0x9;   /* No-op */
1394        lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1395
1396        ddr_dll_ctl3.cn73xx.bit_select = 0xC;   /* vref bypass setting load */
1397        lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1398
1399        ddr_dll_ctl3.cn73xx.bit_select = 0xD;   /* vref bypass on. */
1400        lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1401
1402        ddr_dll_ctl3.cn73xx.bit_select = 0x9;   /* No-op */
1403        lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1404
1405        lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));       // flush writes
1406
1407        return (unsigned short)GET_DDR_DLL_CTL3(offset);
1408}
1409
1410// arg dac_or_dbi is 1 for DAC, 0 for DBI
1411// returns 9 entries (bytelanes 0 through 8) in settings[]
1412// returns 0 if OK, -1 if a problem
1413static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num,
1414                                 int dac_or_dbi, int *settings)
1415{
1416        union cvmx_lmcx_phy_ctl phy_ctl;
1417        int byte_lane, bit_num;
1418        int deskew;
1419        int dac_value;
1420        int new_deskew_layout = 0;
1421
1422        new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) ||
1423                octeon_is_cpuid(OCTEON_CNF75XX);
1424        new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) &&
1425                              !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X));
1426
1427        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1428        phy_ctl.s.dsk_dbg_clk_scaler = 3;
1429        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1430
1431        bit_num = (dac_or_dbi) ? 4 : 5;
1432        // DBI not available
1433        if (bit_num == 5 && !new_deskew_layout)
1434                return -1;
1435
1436        // FIXME: always assume ECC is available
1437        for (byte_lane = 8; byte_lane >= 0; --byte_lane) {
1438                //set byte lane and bit to read
1439                phy_ctl.s.dsk_dbg_bit_sel = bit_num;
1440                phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1441                lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1442
1443                //start read sequence
1444                phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1445                phy_ctl.s.dsk_dbg_rd_start = 1;
1446                lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1447
1448                //poll for read sequence to complete
1449                do {
1450                        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1451                } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1452
1453                // keep the flag bits where they are for DBI
1454                deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */
1455                dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
1456
1457                settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
1458        }
1459
1460        return 0;
1461}
1462
1463// print out the DBI settings array
1464// arg dac_or_dbi is 1 for DAC, 0 for DBI
1465static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
1466                                     int ecc_ena, int *settings, char *title)
1467{
1468        int byte;
1469        int flags;
1470        int deskew;
1471        const char *fc = " ?-=+*#&";
1472
1473        debug("N0.LMC%d: %s %s Settings %d:0 :",
1474              lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena);
1475        // FIXME: what about 32-bit mode?
1476        for (byte = (7 + ecc_ena); byte >= 0; --byte) {
1477                if (dac_or_dbi) {       // DAC
1478                        flags = 1;      // say its locked to get blank
1479                        deskew = settings[byte] & 0xff;
1480                } else {        // DBI
1481                        flags = settings[byte] & 7;
1482                        deskew = (settings[byte] >> 3) & 0x7f;
1483                }
1484                debug(" %3d %c", deskew, fc[flags ^ 1]);
1485        }
1486        debug("\n");
1487}
1488
1489// Find a HWL majority
1490static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc,
1491                            int *xc, int *cc)
1492{
1493        int ix, ic;
1494
1495        *mx = -1;
1496        *mc = 0;
1497        *xc = 0;
1498        *cc = 0;
1499
1500        for (ix = 0; ix < 4; ix++) {
1501                ic = bc->bitcnt[ix];
1502
1503                // make a bitmask of the ones with a count
1504                if (ic > 0) {
1505                        *mc |= (1 << ix);
1506                        *cc += 1;       // count how many had non-zero counts
1507                }
1508
1509                // find the majority
1510                if (ic > *xc) { // new max?
1511                        *xc = ic;       // yes
1512                        *mx = ix;       // set its index
1513                }
1514        }
1515
1516        return (*mx << 1);
1517}
1518
1519// Evaluate the DAC settings array
1520static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings)
1521{
1522        int byte, lane, dac, comp;
1523        int last = (if_64b) ? 7 : 3;
1524
1525        // FIXME: change the check...???
1526        // this looks only for sets of DAC values whose max/min differ by a lot
1527        // let any EVEN go so long as it is within range...
1528        for (byte = (last + ecc_ena); byte >= 0; --byte) {
1529                dac = settings[byte] & 0xff;
1530
1531                for (lane = (last + ecc_ena); lane >= 0; --lane) {
1532                        comp = settings[lane] & 0xff;
1533                        if (abs((dac - comp)) > 25)
1534                                return 1;
1535                }
1536        }
1537
1538        return 0;
1539}
1540
1541static void perform_offset_training(struct ddr_priv *priv, int rank_mask,
1542                                    int if_num)
1543{
1544        union cvmx_lmcx_phy_ctl lmc_phy_ctl;
1545        u64 orig_phy_ctl;
1546        const char *s;
1547
1548        /*
1549         * 4.8.6 LMC Offset Training
1550         *
1551         * LMC requires input-receiver offset training.
1552         *
1553         * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
1554         */
1555        lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1556        orig_phy_ctl = lmc_phy_ctl.u64;
1557        lmc_phy_ctl.s.dac_on = 1;
1558
1559        // allow full CSR override
1560        s = lookup_env_ull(priv, "ddr_phy_ctl");
1561        if (s)
1562                lmc_phy_ctl.u64 = strtoull(s, NULL, 0);
1563
1564        // do not print or write if CSR does not change...
1565        if (lmc_phy_ctl.u64 != orig_phy_ctl) {
1566                debug("PHY_CTL                                       : 0x%016llx\n",
1567                      lmc_phy_ctl.u64);
1568                lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64);
1569        }
1570
1571        /*
1572         * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
1573         *    LMC(0)_SEQ_CTL[INIT_START] = 1.
1574         *
1575         * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1576         */
1577        /* Start Offset training sequence */
1578        oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B);
1579}
1580
1581static void perform_internal_vref_training(struct ddr_priv *priv,
1582                                           int rank_mask, int if_num)
1583{
1584        union cvmx_lmcx_ext_config ext_config;
1585        union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1586
1587        // First, make sure all byte-lanes are out of VREF bypass mode
1588        ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1589
1590        ddr_dll_ctl3.cn78xx.byte_sel = 0x0A;    /* all byte-lanes */
1591        ddr_dll_ctl3.cn78xx.bit_select = 0x09;  /* No-op */
1592        lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1593
1594        ddr_dll_ctl3.cn78xx.bit_select = 0x0E;  /* vref bypass off. */
1595        lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1596
1597        ddr_dll_ctl3.cn78xx.bit_select = 0x09;  /* No-op */
1598        lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1599
1600        /*
1601         * 4.8.7 LMC Internal vref Training
1602         *
1603         * LMC requires input-reference-voltage training.
1604         *
1605         * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
1606         */
1607        ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1608        ext_config.s.vrefint_seq_deskew = 0;
1609
1610        ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1611                      ext_config.s.vrefint_seq_deskew);
1612
1613        lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1614
1615        /*
1616         * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
1617         *    LMC(0)_SEQ_CTL[INIT_START] = 1.
1618         *
1619         * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1620         */
1621        /* Start LMC Internal vref Training */
1622        oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1623}
1624
1625#define dbg_avg(format, ...)    // debug(format, ##__VA_ARGS__)
1626
1627static int process_samples_average(s16 *bytes, int num_samples,
1628                                   int lmc, int lane_no)
1629{
1630        int i, sadj, sum = 0, ret, asum, trunc;
1631        s16 smin = 32767, smax = -32768;
1632        int nmin, nmax;
1633        //int rng;
1634
1635        dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
1636
1637        for (i = 0; i < num_samples; i++) {
1638                sum += bytes[i];
1639                if (bytes[i] < smin)
1640                        smin = bytes[i];
1641                if (bytes[i] > smax)
1642                        smax = bytes[i];
1643                dbg_avg(" %3d", bytes[i]);
1644        }
1645
1646        nmin = 0;
1647        nmax = 0;
1648        for (i = 0; i < num_samples; i++) {
1649                if (bytes[i] == smin)
1650                        nmin += 1;
1651                if (bytes[i] == smax)
1652                        nmax += 1;
1653        }
1654        dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)",
1655                smin, nmin, smax, nmax, rng, num_samples);
1656
1657        asum = sum - smin - smax;
1658
1659        sadj = divide_nint(asum * 10, (num_samples - 2));
1660
1661        trunc = asum / (num_samples - 2);
1662
1663        dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc);
1664
1665        sadj = divide_nint(sadj, 10);
1666        if (trunc & 1)
1667                ret = trunc;
1668        else if (sadj & 1)
1669                ret = sadj;
1670        else
1671                ret = trunc + 1;
1672
1673        dbg_avg(" -> %3d\n", ret);
1674
1675        return ret;
1676}
1677
1678#define DEFAULT_SAT_RETRY_LIMIT    11   // 1 + 10 retries
1679
1680#define default_lock_retry_limit   20   // 20 retries
1681#define deskew_validation_delay    10000        // 10 millisecs
1682
1683static int perform_deskew_training(struct ddr_priv *priv, int rank_mask,
1684                                   int if_num, int spd_rawcard_aorb)
1685{
1686        int unsaturated, locked;
1687        int sat_retries, sat_retries_limit;
1688        int lock_retries, lock_retries_total, lock_retries_limit;
1689        int print_first;
1690        int print_them_all;
1691        struct deskew_counts dsk_counts;
1692        union cvmx_lmcx_phy_ctl phy_ctl;
1693        char *s;
1694        int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
1695                octeon_is_cpuid(OCTEON_CNF75XX);
1696        int disable_bitval_retries = 1; // default to disabled
1697
1698        debug("N0.LMC%d: Performing Deskew Training.\n", if_num);
1699
1700        sat_retries = 0;
1701        sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT;
1702
1703        lock_retries_total = 0;
1704        unsaturated = 0;
1705        print_first = 1;        // print the first one
1706        // set to true for printing all normal deskew attempts
1707        print_them_all = 0;
1708
1709        // provide override for bitval_errs causing internal VREF retries
1710        s = env_get("ddr_disable_bitval_retries");
1711        if (s)
1712                disable_bitval_retries = !!simple_strtoul(s, NULL, 0);
1713
1714        lock_retries_limit = default_lock_retry_limit;
1715        if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) ||
1716            (octeon_is_cpuid(OCTEON_CN73XX)) ||
1717            (octeon_is_cpuid(OCTEON_CNF75XX)))
1718                lock_retries_limit *= 2;        // give new chips twice as many
1719
1720        do {                    /* while (sat_retries < sat_retry_limit) */
1721                /*
1722                 * 4.8.8 LMC Deskew Training
1723                 *
1724                 * LMC requires input-read-data deskew training.
1725                 *
1726                 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
1727                 */
1728
1729                union cvmx_lmcx_ext_config ext_config;
1730
1731                ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1732                ext_config.s.vrefint_seq_deskew = 1;
1733
1734                ddr_seq_print
1735                    ("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1736                     ext_config.s.vrefint_seq_deskew);
1737
1738                lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1739
1740                /*
1741                 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
1742                 *    LMC(0)_SEQ_CTL[INIT_START] = 1.
1743                 *
1744                 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1745                 */
1746
1747                phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1748                phy_ctl.s.phy_dsk_reset = 1;    /* RESET Deskew sequence */
1749                lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1750
1751                /* LMC Deskew Training */
1752                oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1753
1754                lock_retries = 0;
1755
1756perform_deskew_training:
1757
1758                phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1759                phy_ctl.s.phy_dsk_reset = 0;    /* Normal Deskew sequence */
1760                lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1761
1762                /* LMC Deskew Training */
1763                oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1764
1765                // Moved this from validate_deskew_training
1766                /* Allow deskew results to stabilize before evaluating them. */
1767                udelay(deskew_validation_delay);
1768
1769                // Now go look at lock and saturation status...
1770                validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
1771                                         print_first);
1772                // after printing the first and not doing them all, no more
1773                if (print_first && !print_them_all)
1774                        print_first = 0;
1775
1776                unsaturated = (dsk_counts.saturated == 0);
1777                locked = (dsk_counts.unlocked == 0);
1778
1779                // only do locking retries if unsaturated or rawcard A or B,
1780                // otherwise full SAT retry
1781                if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) {
1782                        if (!locked) {  // and not locked
1783                                lock_retries++;
1784                                lock_retries_total++;
1785                                if (lock_retries <= lock_retries_limit) {
1786                                        goto perform_deskew_training;
1787                                } else {
1788                                        debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n",
1789                                              if_num, lock_retries_limit);
1790                                }
1791                        } else {
1792                                // only print if we did try
1793                                if (lock_retries_total > 0)
1794                                        debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n",
1795                                              if_num, lock_retries);
1796                        }
1797                }               /* if (unsaturated || spd_rawcard_aorb) */
1798
1799                ++sat_retries;
1800
1801                /*
1802                 * At this point, check for a DDR4 RDIMM that will not
1803                 * benefit from SAT retries; if so, exit
1804                 */
1805                if (spd_rawcard_aorb && !has_no_sat) {
1806                        debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
1807                              if_num);
1808                        break;  // no sat or lock retries
1809                }
1810
1811        } while (!unsaturated && (sat_retries < sat_retries_limit));
1812
1813        debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n",
1814              if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ?
1815              "Timed Out" : "Completed", sat_retries - 1, lock_retries_total);
1816
1817        // FIXME? add saturation to reasons for fault return - give it a
1818        // chance via Internal VREF
1819        // FIXME? add OPTIONAL bit value to reasons for fault return -
1820        // give it a chance via Internal VREF
1821        if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 ||
1822            (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) ||
1823            !unsaturated) {
1824                debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n",
1825                      if_num);
1826                // FIXME: do we want this output always for errors?
1827                validate_deskew_training(priv, rank_mask, if_num,
1828                                         &dsk_counts, 1);
1829                return -1;      // we did retry locally, they did not help
1830        }
1831
1832        // NOTE: we (currently) always print one last training validation
1833        // before starting Read Leveling...
1834
1835        return 0;
1836}
1837
1838#define SCALING_FACTOR (1000)
1839
1840// NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config
1841static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl,
1842                                    int rank_count, int dram_connection)
1843{
1844        u64 reff_s;
1845        u64 rser_s = (dram_connection) ? 0 : 15;
1846        u64 vdd = 1200;
1847        u64 vref;
1848        // 99 == HiZ
1849        u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1850                        1 * 1024 * 1024 : rtt_wr);
1851        u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) &&
1852                                               (rtt_wr != 0))) ?
1853                          1 * 1024 * 1024 : rtt_park);
1854        u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1855        int vref_value;
1856        u64 rangepc = 6000;     // range1 base
1857        u64 vrefpc;
1858        int vref_range = 0;
1859
1860        reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s));
1861
1862        vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) /
1863                (rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR;
1864
1865        vref = (vref * vdd) / 2 / SCALING_FACTOR;
1866
1867        vrefpc = (vref * 100 * 100) / vdd;
1868
1869        if (vrefpc < rangepc) { // < range1 base, use range2
1870                vref_range = 1 << 6;    // set bit A6 for range2
1871                rangepc = 4500; // range2 base is 45%
1872        }
1873
1874        vref_value = divide_nint(vrefpc - rangepc, 65);
1875        if (vref_value < 0)
1876                vref_value = vref_range;        // set to base of range
1877        else
1878                vref_value |= vref_range;
1879
1880        debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n",
1881              rtt_wr, rtt_park, dqx_ctl, rank_count);
1882        debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n",
1883              rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range,
1884              vref_range ? 2 : 1);
1885
1886        return vref_value;
1887}
1888
1889// NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs
1890static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00,
1891                                    int rtt_park_01,
1892                                    int dqx_ctl, int rtt_nom,
1893                                    int dram_connection)
1894{
1895        u64 rser = (dram_connection) ? 0 : 15;
1896        u64 vdd = 1200;
1897        u64 vl, vlp, vcm;
1898        u64 rd0, rd1, rpullup;
1899        // 99 == HiZ
1900        u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1901                        1 * 1024 * 1024 : rtt_wr);
1902        u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00);
1903        u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01);
1904        u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1905        u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom);
1906        int vref_value;
1907        u64 rangepc = 6000;     // range1 base
1908        u64 vrefpc;
1909        int vref_range = 0;
1910
1911        // rd0 = (RTT_NOM (parallel) RTT_WR) +  =
1912        // ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER
1913        rd0 = divide_nint((rtt_nom_s * rtt_wr_s),
1914                          (rtt_nom_s + rtt_wr_s)) + rser;
1915
1916        // rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER =
1917        // ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER
1918        rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s),
1919                          (rtt_park_00_s + rtt_park_01_s)) + rser;
1920
1921        // rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1)
1922        rpullup = divide_nint((rd0 * rd1), (rd0 + rd1));
1923
1924        // vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2
1925        vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup));
1926
1927        // vlp = ((RSER / rd0) * (1.2 - vl)) + vl
1928        vlp = divide_nint((rser * (vdd - vl)), rd0) + vl;
1929
1930        // vcm = (vlp + 1.2) / 2
1931        vcm = divide_nint((vlp + vdd), 2);
1932
1933        // vrefpc = (vcm / 1.2) * 100
1934        vrefpc = divide_nint((vcm * 100 * 100), vdd);
1935
1936        if (vrefpc < rangepc) { // < range1 base, use range2
1937                vref_range = 1 << 6;    // set bit A6 for range2
1938                rangepc = 4500; // range2 base is 45%
1939        }
1940
1941        vref_value = divide_nint(vrefpc - rangepc, 65);
1942        if (vref_value < 0)
1943                vref_value = vref_range;        // set to base of range
1944        else
1945                vref_value |= vref_range;
1946
1947        debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n",
1948              rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value,
1949              vref_value);
1950
1951        return vref_value;
1952}
1953
1954// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
1955static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx,
1956                            int dimm_count, int rank_count,
1957                            struct impedence_values *imp_values,
1958                            int is_stacked_die, int dram_connection)
1959{
1960        int computed_final_vref_value = 0;
1961        int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT;
1962        const char *s;
1963        int rtt_wr, dqx_ctl, rtt_nom, index;
1964        union cvmx_lmcx_modereg_params1 lmc_modereg_params1;
1965        union cvmx_lmcx_modereg_params2 lmc_modereg_params2;
1966        union cvmx_lmcx_comp_ctl2 comp_ctl2;
1967        int rtt_park;
1968        int rtt_park_00;
1969        int rtt_park_01;
1970
1971        debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n",
1972              if_num, rankx, __func__, dram_connection);
1973
1974        // allow some overrides...
1975        s = env_get("ddr_adjust_computed_vref");
1976        if (s) {
1977                enable_adjust = !!simple_strtoul(s, NULL, 0);
1978                if (!enable_adjust) {
1979                        debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n",
1980                              if_num, rankx);
1981                }
1982        }
1983
1984        s = env_get("ddr_set_computed_vref");
1985        if (s) {
1986                int new_vref = simple_strtoul(s, NULL, 0);
1987
1988                debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n",
1989                      if_num, rankx, new_vref, new_vref);
1990                return new_vref;
1991        }
1992
1993        /*
1994         * Calculate an alternative to the measured vref value
1995         * but only for configurations we know how to...
1996         */
1997        // We have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
1998        // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot
1999        // configs, and can use the 2-rank 2-slot code for 1-rank DIMMs
2000        // in 2-slot configs.
2001
2002        lmc_modereg_params1.u64 =
2003            lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
2004        lmc_modereg_params2.u64 =
2005            lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num));
2006        comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
2007        dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
2008
2009        // WR always comes from the current rank
2010        index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03;
2011        if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
2012                index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04;
2013        rtt_wr = imp_values->rtt_wr_ohms[index];
2014
2015        // separate calculations for 1 vs 2 DIMMs per LMC
2016        if (dimm_count == 1) {
2017                // PARK comes from this rank if 1-rank, otherwise other rank
2018                index =
2019                    (lmc_modereg_params2.u64 >>
2020                     ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
2021                rtt_park = imp_values->rtt_nom_ohms[index];
2022                computed_final_vref_value =
2023                    compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl,
2024                                             rank_count, dram_connection);
2025        } else {
2026                // get both PARK values from the other DIMM
2027                index =
2028                    (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) &
2029                    0x07;
2030                rtt_park_00 = imp_values->rtt_nom_ohms[index];
2031                index =
2032                    (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) &
2033                    0x07;
2034                rtt_park_01 = imp_values->rtt_nom_ohms[index];
2035                // NOM comes from this rank if 1-rank, otherwise other rank
2036                index =
2037                    (lmc_modereg_params1.u64 >>
2038                     ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
2039                rtt_nom = imp_values->rtt_nom_ohms[index];
2040                computed_final_vref_value =
2041                    compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01,
2042                                             dqx_ctl, rtt_nom, dram_connection);
2043        }
2044
2045        if (enable_adjust) {
2046                union cvmx_lmcx_config lmc_config;
2047                union cvmx_lmcx_control lmc_control;
2048
2049                lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
2050                lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2051
2052                /*
2053                 *  New computed vref = existing computed vref – X
2054                 *
2055                 * The value of X is depending on different conditions.
2056                 * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked
2057                 * die 2Rx4, so I conclude the results into two conditions:
2058                 *
2059                 * 1. Stacked Die: 2Rx4
2060                 * 1-slot: offset = 7. i, e New computed vref = existing
2061                 * computed vref – 7
2062                 * 2-slot: offset = 6
2063                 *
2064                 * 2. Regular: 2Rx4
2065                 * 1-slot: offset = 3
2066                 * 2-slot:  offset = 2
2067                 */
2068                // we know we never get called unless DDR4, so test just
2069                // the other conditions
2070                if (lmc_control.s.rdimm_ena == 1 &&
2071                    rank_count == 2 && lmc_config.s.mode_x4dev) {
2072                        // it must first be RDIMM and 2-rank and x4
2073                        int adj;
2074
2075                        // now do according to stacked die or not...
2076                        if (is_stacked_die)
2077                                adj = (dimm_count == 1) ? -7 : -6;
2078                        else
2079                                adj = (dimm_count == 1) ? -3 : -2;
2080
2081                        // we must have adjusted it, so print it out if
2082                        // verbosity is right
2083                        debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
2084                              if_num, rankx, computed_final_vref_value,
2085                              computed_final_vref_value,
2086                              computed_final_vref_value + adj,
2087                              computed_final_vref_value + adj);
2088                        computed_final_vref_value += adj;
2089                }
2090        }
2091
2092        return computed_final_vref_value;
2093}
2094
2095static void unpack_rlevel_settings(int if_bytemask, int ecc_ena,
2096                                   struct rlevel_byte_data *rlevel_byte,
2097                                   union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)
2098{
2099        if ((if_bytemask & 0xff) == 0xff) {
2100                if (ecc_ena) {
2101                        rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7;
2102                        rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6;
2103                        rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5;
2104                        rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4;
2105                        /* ECC */
2106                        rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8;
2107                } else {
2108                        rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;
2109                        rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;
2110                        rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;
2111                        rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;
2112                }
2113        } else {
2114                rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8; /* unused */
2115                rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; /* unused */
2116                rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; /* unused */
2117                rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; /* unused */
2118                rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; /* ECC */
2119        }
2120
2121        rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3;
2122        rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2;
2123        rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1;
2124        rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0;
2125}
2126
2127static void pack_rlevel_settings(int if_bytemask, int ecc_ena,
2128                                 struct rlevel_byte_data *rlevel_byte,
2129                                 union cvmx_lmcx_rlevel_rankx
2130                                 *final_rlevel_rank)
2131{
2132        union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank;
2133
2134        if ((if_bytemask & 0xff) == 0xff) {
2135                if (ecc_ena) {
2136                        lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay;
2137                        lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay;
2138                        lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay;
2139                        lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay;
2140                        /* ECC */
2141                        lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay;
2142                } else {
2143                        lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2144                        lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2145                        lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2146                        lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2147                }
2148        } else {
2149                lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay;
2150                lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2151                lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2152                lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2153                lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2154        }
2155
2156        lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay;
2157        lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay;
2158        lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay;
2159        lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay;
2160
2161        *final_rlevel_rank = lmc_rlevel_rank;
2162}
2163
2164/////////////////// These are the RLEVEL settings display routines
2165
2166// flags
2167#define WITH_NOTHING 0
2168#define WITH_SCORE   1
2169#define WITH_AVERAGE 2
2170#define WITH_FINAL   4
2171#define WITH_COMPUTE 8
2172
2173static void do_display_rl(int if_num,
2174                          union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2175                          int rank, int flags, int score)
2176{
2177        char score_buf[16];
2178        char *msg_buf;
2179        char hex_buf[20];
2180
2181        if (flags & WITH_SCORE) {
2182                snprintf(score_buf, sizeof(score_buf), "(%d)", score);
2183        } else {
2184                score_buf[0] = ' ';
2185                score_buf[1] = 0;
2186        }
2187
2188        if (flags & WITH_AVERAGE) {
2189                msg_buf = "  DELAY AVERAGES  ";
2190        } else if (flags & WITH_FINAL) {
2191                msg_buf = "  FINAL SETTINGS  ";
2192        } else if (flags & WITH_COMPUTE) {
2193                msg_buf = "  COMPUTED DELAYS ";
2194        } else {
2195                snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2196                         (unsigned long long)lmc_rlevel_rank.u64);
2197                msg_buf = hex_buf;
2198        }
2199
2200        debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
2201              if_num, rank, lmc_rlevel_rank.s.status, msg_buf,
2202              lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7,
2203              lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5,
2204              lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3,
2205              lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1,
2206              lmc_rlevel_rank.s.byte0, score_buf);
2207}
2208
2209static void display_rl(int if_num,
2210                       union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank)
2211{
2212        do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0);
2213}
2214
2215static void display_rl_with_score(int if_num,
2216                                  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2217                                  int rank, int score)
2218{
2219        do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score);
2220}
2221
2222static void display_rl_with_final(int if_num,
2223                                  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2224                                  int rank)
2225{
2226        do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0);
2227}
2228
2229static void display_rl_with_computed(int if_num,
2230                                     union cvmx_lmcx_rlevel_rankx
2231                                     lmc_rlevel_rank, int rank, int score)
2232{
2233        do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score);
2234}
2235
2236// flag values
2237#define WITH_RODT_BLANK      0
2238#define WITH_RODT_SKIPPING   1
2239#define WITH_RODT_BESTROW    2
2240#define WITH_RODT_BESTSCORE  3
2241// control
2242#define SKIP_SKIPPING 1
2243
2244static const char *with_rodt_canned_msgs[4] = {
2245        "          ", "SKIPPING  ", "BEST ROW  ", "BEST SCORE"
2246};
2247
2248static void display_rl_with_rodt(int if_num,
2249                                 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2250                                 int rank, int score,
2251                                 int nom_ohms, int rodt_ohms, int flag)
2252{
2253        const char *msg_buf;
2254        char set_buf[20];
2255
2256#if SKIP_SKIPPING
2257        if (flag == WITH_RODT_SKIPPING)
2258                return;
2259#endif
2260
2261        msg_buf = with_rodt_canned_msgs[flag];
2262        if (nom_ohms < 0) {
2263                snprintf(set_buf, sizeof(set_buf), "    RODT %3d    ",
2264                         rodt_ohms);
2265        } else {
2266                snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms,
2267                         rodt_ohms);
2268        }
2269
2270        debug("N0.LMC%d.R%d: Rlevel %s   %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
2271              if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8,
2272              lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6,
2273              lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4,
2274              lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2,
2275              lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score);
2276}
2277
2278static void do_display_wl(int if_num,
2279                          union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2280                          int rank, int flags)
2281{
2282        char *msg_buf;
2283        char hex_buf[20];
2284
2285        if (flags & WITH_FINAL) {
2286                msg_buf = "  FINAL SETTINGS  ";
2287        } else {
2288                snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2289                         (unsigned long long)lmc_wlevel_rank.u64);
2290                msg_buf = hex_buf;
2291        }
2292
2293        debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2294              if_num, rank, lmc_wlevel_rank.s.status, msg_buf,
2295              lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7,
2296              lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5,
2297              lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3,
2298              lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1,
2299              lmc_wlevel_rank.s.byte0);
2300}
2301
2302static void display_wl(int if_num,
2303                       union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank)
2304{
2305        do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING);
2306}
2307
2308static void display_wl_with_final(int if_num,
2309                                  union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2310                                  int rank)
2311{
2312        do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL);
2313}
2314
2315// pretty-print bitmask adjuster
2316static u64 ppbm(u64 bm)
2317{
2318        if (bm != 0ul) {
2319                while ((bm & 0x0fful) == 0ul)
2320                        bm >>= 4;
2321        }
2322
2323        return bm;
2324}
2325
2326// xlate PACKED index to UNPACKED index to use with rlevel_byte
2327#define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4))
2328// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
2329#define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i))
2330
2331// flag values
2332#define WITH_WL_BITMASKS      0
2333#define WITH_RL_BITMASKS      1
2334#define WITH_RL_MASK_SCORES   2
2335#define WITH_RL_SEQ_SCORES    3
2336
2337static void do_display_bm(int if_num, int rank, void *bm,
2338                          int flags, int ecc)
2339{
2340        if (flags == WITH_WL_BITMASKS) {
2341                // wlevel_bitmask array in PACKED index order, so just
2342                // print them
2343                int *bitmasks = (int *)bm;
2344
2345                debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks                 : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
2346                      if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6],
2347                      bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2],
2348                      bitmasks[1], bitmasks[0]
2349                        );
2350        } else if (flags == WITH_RL_BITMASKS) {
2351                // rlevel_bitmask array in PACKED index order, so just
2352                // print them
2353                struct rlevel_bitmask *rlevel_bitmask =
2354                        (struct rlevel_bitmask *)bm;
2355
2356                debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks        8:0      : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
2357                      if_num, rank, ppbm(rlevel_bitmask[8].bm),
2358                      ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm),
2359                      ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm),
2360                      ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm),
2361                      ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm)
2362                        );
2363        } else if (flags == WITH_RL_MASK_SCORES) {
2364                // rlevel_bitmask array in PACKED index order, so just
2365                // print them
2366                struct rlevel_bitmask *rlevel_bitmask =
2367                        (struct rlevel_bitmask *)bm;
2368
2369                debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2370                      if_num, rank, rlevel_bitmask[8].errs,
2371                      rlevel_bitmask[7].errs, rlevel_bitmask[6].errs,
2372                      rlevel_bitmask[5].errs, rlevel_bitmask[4].errs,
2373                      rlevel_bitmask[3].errs, rlevel_bitmask[2].errs,
2374                      rlevel_bitmask[1].errs, rlevel_bitmask[0].errs);
2375        } else if (flags == WITH_RL_SEQ_SCORES) {
2376                // rlevel_byte array in UNPACKED index order, so xlate
2377                // and print them
2378                struct rlevel_byte_data *rlevel_byte =
2379                        (struct rlevel_byte_data *)bm;
2380
2381                debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2382                      if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs,
2383                      rlevel_byte[XPU(7, ecc)].sqerrs,
2384                      rlevel_byte[XPU(6, ecc)].sqerrs,
2385                      rlevel_byte[XPU(5, ecc)].sqerrs,
2386                      rlevel_byte[XPU(4, ecc)].sqerrs,
2387                      rlevel_byte[XPU(3, ecc)].sqerrs,
2388                      rlevel_byte[XPU(2, ecc)].sqerrs,
2389                      rlevel_byte[XPU(1, ecc)].sqerrs,
2390                      rlevel_byte[XPU(0, ecc)].sqerrs);
2391        }
2392}
2393
2394static void display_wl_bm(int if_num, int rank, int *bitmasks)
2395{
2396        do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
2397}
2398
2399static void display_rl_bm(int if_num, int rank,
2400                          struct rlevel_bitmask *bitmasks, int ecc_ena)
2401{
2402        do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS,
2403                      ecc_ena);
2404}
2405
2406static void display_rl_bm_scores(int if_num, int rank,
2407                                 struct rlevel_bitmask *bitmasks, int ecc_ena)
2408{
2409        do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES,
2410                      ecc_ena);
2411}
2412
2413static void display_rl_seq_scores(int if_num, int rank,
2414                                  struct rlevel_byte_data *bytes, int ecc_ena)
2415{
2416        do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
2417}
2418
2419#define RODT_OHMS_COUNT        8
2420#define RTT_NOM_OHMS_COUNT     8
2421#define RTT_NOM_TABLE_COUNT    8
2422#define RTT_WR_OHMS_COUNT      8
2423#define DIC_OHMS_COUNT         3
2424#define DRIVE_STRENGTH_COUNT  15
2425
2426static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = {
2427        0, 40, 60, 80, 120, 240, 34, 48 };
2428static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2429        0, 60, 120, 40, 240, 48, 80, 34 };
2430static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2431        0, 4, 2, 6, 1, 5, 3, 7 };
2432// setting HiZ ohms to 99 for computed vref
2433static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = {
2434        0, 120, 240, 99, 80 };
2435static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 };
2436static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {
2437        0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 };
2438static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = {
2439        0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2440struct impedence_values ddr4_impedence_val = {
2441        .rodt_ohms = ddr4_rodt_ohms,
2442        .rtt_nom_ohms = ddr4_rtt_nom_ohms,
2443        .rtt_nom_table = ddr4_rtt_nom_table,
2444        .rtt_wr_ohms = ddr4_rtt_wr_ohms,
2445        .dic_ohms = ddr4_dic_ohms,
2446        .drive_strength = ddr4_drive_strength,
2447        .dqx_strength = ddr4_dqx_strength,
2448};
2449
2450static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = {
2451        0, 20, 30, 40, 60, 120, 0, 0 };
2452static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2453        0, 60, 120, 40, 20, 30, 0, 0 };
2454static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2455        0, 2, 1, 3, 5, 4, 0, 0 };
2456static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 };
2457static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 };
2458static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = {
2459        0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2460static struct impedence_values ddr3_impedence_val = {
2461        .rodt_ohms = ddr3_rodt_ohms,
2462        .rtt_nom_ohms = ddr3_rtt_nom_ohms,
2463        .rtt_nom_table = ddr3_rtt_nom_table,
2464        .rtt_wr_ohms = ddr3_rtt_wr_ohms,
2465        .dic_ohms = ddr3_dic_ohms,
2466        .drive_strength = ddr3_drive_strength,
2467        .dqx_strength = ddr3_drive_strength,
2468};
2469
2470static u64 hertz_to_psecs(u64 hertz)
2471{
2472        /* Clock in psecs */
2473        return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz);
2474}
2475
2476#define DIVIDEND_SCALE 1000     /* Scale to avoid rounding error. */
2477
2478static u64 psecs_to_mts(u64 psecs)
2479{
2480        return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE),
2481                                       psecs), DIVIDEND_SCALE);
2482}
2483
2484#define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m))))
2485
2486static unsigned long pretty_psecs_to_mts(u64 psecs)
2487{
2488        u64 ret = 0;            // default to error
2489
2490        if (WITHIN(psecs, 2500, 1))
2491                ret = 800;
2492        else if (WITHIN(psecs, 1875, 1))
2493                ret = 1066;
2494        else if (WITHIN(psecs, 1500, 1))
2495                ret = 1333;
2496        else if (WITHIN(psecs, 1250, 1))
2497                ret = 1600;
2498        else if (WITHIN(psecs, 1071, 1))
2499                ret = 1866;
2500        else if (WITHIN(psecs, 937, 1))
2501                ret = 2133;
2502        else if (WITHIN(psecs, 833, 1))
2503                ret = 2400;
2504        else if (WITHIN(psecs, 750, 1))
2505                ret = 2666;
2506        return ret;
2507}
2508
2509static u64 mts_to_hertz(u64 mts)
2510{
2511        return ((mts * 1000 * 1000) / 2);
2512}
2513
2514static int compute_rc3x(int64_t tclk_psecs)
2515{
2516        long speed;
2517        long tclk_psecs_min, tclk_psecs_max;
2518        long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
2519        int rc3x;
2520
2521#define ENCODING_BASE 1240
2522
2523        data_rate_mhz = psecs_to_mts(tclk_psecs);
2524
2525        /*
2526         * 2400 MT/s is a special case. Using integer arithmetic it rounds
2527         * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
2528         * proper setting from the table.
2529         */
2530        if (tclk_psecs == 833)
2531                data_rate_mhz = 2400;
2532
2533        for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
2534                int error = 0;
2535
2536                /* Clock in psecs */
2537                tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00));
2538                /* Clock in psecs */
2539                tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18));
2540
2541                data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
2542                data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
2543
2544                /* Force alingment to multiple to avound rounding errors. */
2545                data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
2546                data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
2547
2548                error += (speed + 00 != data_rate_mhz_min);
2549                error += (speed + 20 != data_rate_mhz_max);
2550
2551                rc3x = (speed - ENCODING_BASE) / 20;
2552
2553                if (data_rate_mhz <= (speed + 20))
2554                        break;
2555        }
2556
2557        return rc3x;
2558}
2559
2560/*
2561 * static global variables needed, so that functions (loops) can be
2562 * restructured from the main huge function. Its not elegant, but the
2563 * only way to break the original functions like init_octeon3_ddr3_interface()
2564 * into separate logical smaller functions with less indentation levels.
2565 */
2566static int if_num __section(".data");
2567static u32 if_mask __section(".data");
2568static int ddr_hertz __section(".data");
2569
2570static struct ddr_conf *ddr_conf __section(".data");
2571static const struct dimm_odt_config *odt_1rank_config __section(".data");
2572static const struct dimm_odt_config *odt_2rank_config __section(".data");
2573static const struct dimm_odt_config *odt_4rank_config __section(".data");
2574static struct dimm_config *dimm_config_table __section(".data");
2575static const struct dimm_odt_config *odt_config __section(".data");
2576static const struct ddr3_custom_config *c_cfg __section(".data");
2577
2578static int odt_idx __section(".data");
2579
2580static ulong tclk_psecs __section(".data");
2581static ulong eclk_psecs __section(".data");
2582
2583static int row_bits __section(".data");
2584static int col_bits __section(".data");
2585static int num_banks __section(".data");
2586static int num_ranks __section(".data");
2587static int dram_width __section(".data");
2588static int dimm_count __section(".data");
2589/* Accumulate and report all the errors before giving up */
2590static int fatal_error __section(".data");
2591/* Flag that indicates safe DDR settings should be used */
2592static int safe_ddr_flag __section(".data");
2593/* Octeon II Default: 64bit interface width */
2594static int if_64b __section(".data");
2595static int if_bytemask __section(".data");
2596static u32 mem_size_mbytes __section(".data");
2597static unsigned int didx __section(".data");
2598static int bank_bits __section(".data");
2599static int bunk_enable __section(".data");
2600static int rank_mask __section(".data");
2601static int column_bits_start __section(".data");
2602static int row_lsb __section(".data");
2603static int pbank_lsb __section(".data");
2604static int use_ecc __section(".data");
2605static int mtb_psec __section(".data");
2606static short ftb_dividend __section(".data");
2607static short ftb_divisor __section(".data");
2608static int taamin __section(".data");
2609static int tckmin __section(".data");
2610static int cl __section(".data");
2611static int min_cas_latency __section(".data");
2612static int max_cas_latency __section(".data");
2613static int override_cas_latency __section(".data");
2614static int ddr_rtt_nom_auto __section(".data");
2615static int ddr_rodt_ctl_auto __section(".data");
2616
2617static int spd_addr __section(".data");
2618static int spd_org __section(".data");
2619static int spd_banks __section(".data");
2620static int spd_rdimm __section(".data");
2621static int spd_dimm_type __section(".data");
2622static int spd_ecc __section(".data");
2623static u32 spd_cas_latency __section(".data");
2624static int spd_mtb_dividend __section(".data");
2625static int spd_mtb_divisor __section(".data");
2626static int spd_tck_min __section(".data");
2627static int spd_taa_min __section(".data");
2628static int spd_twr __section(".data");
2629static int spd_trcd __section(".data");
2630static int spd_trrd __section(".data");
2631static int spd_trp __section(".data");
2632static int spd_tras __section(".data");
2633static int spd_trc __section(".data");
2634static int spd_trfc __section(".data");
2635static int spd_twtr __section(".data");
2636static int spd_trtp __section(".data");
2637static int spd_tfaw __section(".data");
2638static int spd_addr_mirror __section(".data");
2639static int spd_package __section(".data");
2640static int spd_rawcard __section(".data");
2641static int spd_rawcard_aorb __section(".data");
2642static int spd_rdimm_registers __section(".data");
2643static int spd_thermal_sensor __section(".data");
2644
2645static int is_stacked_die __section(".data");
2646static int is_3ds_dimm __section(".data");
2647// 3DS: logical ranks per package rank
2648static int lranks_per_prank __section(".data");
2649// 3DS: logical ranks bits
2650static int lranks_bits __section(".data");
2651// in Mbits; only used for 3DS
2652static int die_capacity __section(".data");
2653
2654static enum ddr_type ddr_type __section(".data");
2655
2656static int twr __section(".data");
2657static int trcd __section(".data");
2658static int trrd __section(".data");
2659static int trp __section(".data");
2660static int tras __section(".data");
2661static int trc __section(".data");
2662static int trfc __section(".data");
2663static int twtr __section(".data");
2664static int trtp __section(".data");
2665static int tfaw __section(".data");
2666
2667static int ddr4_tckavgmin __section(".data");
2668static int ddr4_tckavgmax __section(".data");
2669static int ddr4_trdcmin __section(".data");
2670static int ddr4_trpmin __section(".data");
2671static int ddr4_trasmin __section(".data");
2672static int ddr4_trcmin __section(".data");
2673static int ddr4_trfc1min __section(".data");
2674static int ddr4_trfc2min __section(".data");
2675static int ddr4_trfc4min __section(".data");
2676static int ddr4_tfawmin __section(".data");
2677static int ddr4_trrd_smin __section(".data");
2678static int ddr4_trrd_lmin __section(".data");
2679static int ddr4_tccd_lmin __section(".data");
2680
2681static int wl_mask_err __section(".data");
2682static int wl_loops __section(".data");
2683static int default_rtt_nom[4] __section(".data");
2684static int dyn_rtt_nom_mask __section(".data");
2685static struct impedence_values *imp_val __section(".data");
2686static char default_rodt_ctl __section(".data");
2687// default to disabled (ie, try LMC restart, not chip reset)
2688static int ddr_disable_chip_reset __section(".data");
2689static const char *dimm_type_name __section(".data");
2690static int match_wl_rtt_nom __section(".data");
2691
2692struct hwl_alt_by_rank {
2693        u16 hwl_alt_mask;       // mask of bytelanes with alternate
2694        u16 hwl_alt_delay[9];   // bytelane alternate avail if mask=1
2695};
2696
2697static struct hwl_alt_by_rank hwl_alts[4] __section(".data");
2698
2699#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3  // was: 5
2700static int internal_retries __section(".data");
2701
2702static int deskew_training_errors __section(".data");
2703static struct deskew_counts deskew_training_results __section(".data");
2704static int disable_deskew_training __section(".data");
2705static int restart_if_dsk_incomplete __section(".data");
2706static int dac_eval_retries __section(".data");
2707static int dac_settings[9] __section(".data");
2708static int num_samples __section(".data");
2709static int sample __section(".data");
2710static int lane __section(".data");
2711static int last_lane __section(".data");
2712static int total_dac_eval_retries __section(".data");
2713static int dac_eval_exhausted __section(".data");
2714
2715#define DEFAULT_DAC_SAMPLES 7   // originally was 5
2716#define DAC_RETRIES_LIMIT   2
2717
2718struct bytelane_sample {
2719        s16 bytes[DEFAULT_DAC_SAMPLES];
2720};
2721
2722static struct bytelane_sample lanes[9] __section(".data");
2723
2724static char disable_sequential_delay_check __section(".data");
2725static int wl_print __section(".data");
2726
2727static int enable_by_rank_init __section(".data");
2728static int saved_rank_mask __section(".data");
2729static int by_rank __section(".data");
2730static struct deskew_data rank_dsk[4] __section(".data");
2731static struct dac_data rank_dac[4] __section(".data");
2732
2733// todo: perhaps remove node at some time completely?
2734static int node __section(".data");
2735static int base_cl __section(".data");
2736
2737/* Parameters from DDR3 Specifications */
2738#define DDR3_TREFI         7800000      /* 7.8 us */
2739#define DDR3_ZQCS          80000ull     /* 80 ns */
2740#define DDR3_ZQCS_INTERNAL 1280000000ull        /* 128ms/100 */
2741#define DDR3_TCKE          5000 /* 5 ns */
2742#define DDR3_TMRD          4    /* 4 nCK */
2743#define DDR3_TDLLK         512  /* 512 nCK */
2744#define DDR3_TMPRR         1    /* 1 nCK */
2745#define DDR3_TWLMRD        40   /* 40 nCK */
2746#define DDR3_TWLDQSEN      25   /* 25 nCK */
2747
2748/* Parameters from DDR4 Specifications */
2749#define DDR4_TMRD          8    /* 8 nCK */
2750#define DDR4_TDLLK         768  /* 768 nCK */
2751
2752static void lmc_config(struct ddr_priv *priv)
2753{
2754        union cvmx_lmcx_config cfg;
2755        char *s;
2756
2757        cfg.u64 = 0;
2758
2759        cfg.cn78xx.ecc_ena = use_ecc;
2760        cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb);
2761        cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb);
2762
2763        cfg.cn78xx.idlepower = 0;       /* Disabled */
2764
2765        s = lookup_env(priv, "ddr_idlepower");
2766        if (s)
2767                cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0);
2768
2769        cfg.cn78xx.forcewrite = 0;      /* Disabled */
2770        /* Include memory reference address in the ECC */
2771        cfg.cn78xx.ecc_adr = 1;
2772
2773        s = lookup_env(priv, "ddr_ecc_adr");
2774        if (s)
2775                cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0);
2776
2777        cfg.cn78xx.reset = 0;
2778
2779        /*
2780         * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
2781         * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
2782         * ref_zqcs_int(18:7) to
2783         * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
2784         * value should always be greater than 32, to account for
2785         * resistor calibration delays.
2786         */
2787
2788        cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f);
2789        cfg.cn78xx.ref_zqcs_int |=
2790                ((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) /
2791                              (512 * 128))) & 0xfff) << 7);
2792
2793        cfg.cn78xx.early_dqx = 1;       /* Default to enabled */
2794
2795        s = lookup_env(priv, "ddr_early_dqx");
2796        if (!s)
2797                s = lookup_env(priv, "ddr%d_early_dqx", if_num);
2798
2799        if (s)
2800                cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0);
2801
2802        cfg.cn78xx.sref_with_dll = 0;
2803
2804        cfg.cn78xx.rank_ena = bunk_enable;
2805        cfg.cn78xx.rankmask = rank_mask;        /* Set later */
2806        cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) &
2807                rank_mask;
2808        /* Set once and don't change it. */
2809        cfg.cn78xx.init_status = rank_mask;
2810        cfg.cn78xx.early_unload_d0_r0 = 0;
2811        cfg.cn78xx.early_unload_d0_r1 = 0;
2812        cfg.cn78xx.early_unload_d1_r0 = 0;
2813        cfg.cn78xx.early_unload_d1_r1 = 0;
2814        cfg.cn78xx.scrz = 0;
2815        if (octeon_is_cpuid(OCTEON_CN70XX))
2816                cfg.cn78xx.mode32b = 1; /* Read-only. Always 1. */
2817        cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0;
2818        cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) &&
2819                                 (dram_width == 16)) ? 0 : 1;
2820
2821        s = lookup_env_ull(priv, "ddr_config");
2822        if (s)
2823                cfg.u64 = simple_strtoull(s, NULL, 0);
2824        debug("LMC_CONFIG                                    : 0x%016llx\n",
2825              cfg.u64);
2826        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
2827}
2828
2829static void lmc_control(struct ddr_priv *priv)
2830{
2831        union cvmx_lmcx_control ctrl;
2832        char *s;
2833
2834        ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2835        ctrl.s.rdimm_ena = spd_rdimm;
2836        ctrl.s.bwcnt = 0;       /* Clear counter later */
2837        if (spd_rdimm)
2838                ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm);
2839        else
2840                ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm);
2841        ctrl.s.pocas = 0;
2842        ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2);
2843        ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0;
2844        ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0;
2845        ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0;
2846        ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0;
2847        ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0;
2848        /* discards writes to addresses that don't exist in the DRAM */
2849        ctrl.s.nxm_write_en = 0;
2850        ctrl.s.max_write_batch = 8;
2851        ctrl.s.xor_bank = 1;
2852        ctrl.s.auto_dclkdis = 1;
2853        ctrl.s.int_zqcs_dis = 0;
2854        ctrl.s.ext_zqcs_dis = 0;
2855        ctrl.s.bprch = 1;
2856        ctrl.s.wodt_bprch = 1;
2857        ctrl.s.rodt_bprch = 1;
2858
2859        s = lookup_env(priv, "ddr_xor_bank");
2860        if (s)
2861                ctrl.s.xor_bank = simple_strtoul(s, NULL, 0);
2862
2863        s = lookup_env(priv, "ddr_2t");
2864        if (s)
2865                ctrl.s.ddr2t = simple_strtoul(s, NULL, 0);
2866
2867        s = lookup_env(priv, "ddr_fprch2");
2868        if (s)
2869                ctrl.s.fprch2 = simple_strtoul(s, NULL, 0);
2870
2871        s = lookup_env(priv, "ddr_bprch");
2872        if (s)
2873                ctrl.s.bprch = simple_strtoul(s, NULL, 0);
2874
2875        s = lookup_env(priv, "ddr_wodt_bprch");
2876        if (s)
2877                ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0);
2878
2879        s = lookup_env(priv, "ddr_rodt_bprch");
2880        if (s)
2881                ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0);
2882
2883        s = lookup_env(priv, "ddr_int_zqcs_dis");
2884        if (s)
2885                ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0);
2886
2887        s = lookup_env(priv, "ddr_ext_zqcs_dis");
2888        if (s)
2889                ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0);
2890
2891        s = lookup_env_ull(priv, "ddr_control");
2892        if (s)
2893                ctrl.u64 = simple_strtoull(s, NULL, 0);
2894
2895        debug("LMC_CONTROL                                   : 0x%016llx\n",
2896              ctrl.u64);
2897        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
2898}
2899
2900static void lmc_timing_params0(struct ddr_priv *priv)
2901{
2902        union cvmx_lmcx_timing_params0 tp0;
2903        unsigned int trp_value;
2904        char *s;
2905
2906        tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num));
2907
2908        trp_value = divide_roundup(trp, tclk_psecs) - 1;
2909        debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
2910              trp_value +
2911              (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull),
2912                                            tclk_psecs)) - 4);
2913        s = lookup_env_ull(priv, "ddr_use_old_trp");
2914        if (s) {
2915                if (!!simple_strtoull(s, NULL, 0)) {
2916                        trp_value +=
2917                            divide_roundup(max(4ull * tclk_psecs, 7500ull),
2918                                           tclk_psecs) - 4;
2919                        debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n",
2920                              trp_value);
2921                }
2922        }
2923
2924        tp0.cn78xx.txpr =
2925            divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull),
2926                           16 * tclk_psecs);
2927        tp0.cn78xx.trp = trp_value & 0x1f;
2928        tp0.cn78xx.tcksre =
2929            divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1;
2930
2931        if (ddr_type == DDR4_DRAM) {
2932                int tzqinit = 4;        // Default to 4, for all DDR4 speed bins
2933
2934                s = lookup_env(priv, "ddr_tzqinit");
2935                if (s)
2936                        tzqinit = simple_strtoul(s, NULL, 0);
2937
2938                tp0.cn78xx.tzqinit = tzqinit;
2939                /* Always 8. */
2940                tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs,
2941                                                  (16 * tclk_psecs));
2942                tp0.cn78xx.tcke =
2943                    divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE),
2944                                   tclk_psecs) - 1;
2945                tp0.cn78xx.tmrd =
2946                    divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1;
2947                tp0.cn78xx.tmod = 25;   /* 25 is the max allowed */
2948                tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256);
2949        } else {
2950                tp0.cn78xx.tzqinit =
2951                    divide_roundup(max(512ull * tclk_psecs, 640000ull),
2952                                   (256 * tclk_psecs));
2953                tp0.cn78xx.tzqcs =
2954                    divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS),
2955                                   (16 * tclk_psecs));
2956                tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1;
2957                tp0.cn78xx.tmrd =
2958                    divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1;
2959                tp0.cn78xx.tmod =
2960                    divide_roundup(max(12ull * tclk_psecs, 15000ull),
2961                                   tclk_psecs) - 1;
2962                tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256);
2963        }
2964
2965        s = lookup_env_ull(priv, "ddr_timing_params0");
2966        if (s)
2967                tp0.u64 = simple_strtoull(s, NULL, 0);
2968        debug("TIMING_PARAMS0                                : 0x%016llx\n",
2969              tp0.u64);
2970        lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64);
2971}
2972
2973static void lmc_timing_params1(struct ddr_priv *priv)
2974{
2975        union cvmx_lmcx_timing_params1 tp1;
2976        unsigned int txp, temp_trcd, trfc_dlr;
2977        char *s;
2978
2979        tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
2980
2981        /* .cn70xx. */
2982        tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1;
2983
2984        tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1;
2985
2986        temp_trcd = divide_roundup(trcd, tclk_psecs);
2987        if (temp_trcd > 15) {
2988                debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n",
2989                      temp_trcd);
2990        }
2991        if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) {
2992                /*
2993                 * Let .trcd=0 serve as a flag that the field has
2994                 * overflowed. Must use Additive Latency mode as a
2995                 * workaround.
2996                 */
2997                temp_trcd = 0;
2998        }
2999        tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf;
3000        tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1;
3001
3002        tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1;
3003        tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs);
3004
3005        if (ddr_type == DDR4_DRAM) {
3006                /* Workaround bug 24006. Use Trrd_l. */
3007                tp1.cn78xx.trrd =
3008                    divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3009        } else {
3010                tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2;
3011        }
3012
3013        /*
3014         * tXP = max( 3nCK, 7.5 ns)     DDR3-800   tCLK = 2500 psec
3015         * tXP = max( 3nCK, 7.5 ns)     DDR3-1066  tCLK = 1875 psec
3016         * tXP = max( 3nCK, 6.0 ns)     DDR3-1333  tCLK = 1500 psec
3017         * tXP = max( 3nCK, 6.0 ns)     DDR3-1600  tCLK = 1250 psec
3018         * tXP = max( 3nCK, 6.0 ns)     DDR3-1866  tCLK = 1071 psec
3019         * tXP = max( 3nCK, 6.0 ns)     DDR3-2133  tCLK =  937 psec
3020         */
3021        txp = (tclk_psecs < 1875) ? 6000 : 7500;
3022        txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp),
3023                             tclk_psecs) - 1;
3024        if (txp > 7) {
3025                debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n",
3026                      txp);
3027        }
3028        if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7)
3029                txp = 7;        // max it out
3030        tp1.cn78xx.txp = (txp >> 0) & 7;
3031        tp1.cn78xx.txp_ext = (txp >> 3) & 1;
3032
3033        tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs,
3034                                           4 * tclk_psecs);
3035        tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs,
3036                                             4 * tclk_psecs);
3037        tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs);
3038        tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull),
3039                                           tclk_psecs) - 1;
3040
3041        if (ddr_type == DDR4_DRAM && is_3ds_dimm) {
3042                /*
3043                 * 4 Gb: tRFC_DLR = 90 ns
3044                 * 8 Gb: tRFC_DLR = 120 ns
3045                 * 16 Gb: tRFC_DLR = 190 ns FIXME?
3046                 */
3047                if (die_capacity == 0x1000)     // 4 Gbit
3048                        trfc_dlr = 90;
3049                else if (die_capacity == 0x2000)        // 8 Gbit
3050                        trfc_dlr = 120;
3051                else if (die_capacity == 0x4000)        // 16 Gbit
3052                        trfc_dlr = 190;
3053                else
3054                        trfc_dlr = 0;
3055
3056                if (trfc_dlr == 0) {
3057                        debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
3058                              node, if_num, die_capacity);
3059                } else {
3060                        tp1.cn78xx.trfc_dlr =
3061                            divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs);
3062                        debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
3063                              node, if_num, tp1.cn78xx.trfc_dlr);
3064                }
3065        }
3066
3067        s = lookup_env_ull(priv, "ddr_timing_params1");
3068        if (s)
3069                tp1.u64 = simple_strtoull(s, NULL, 0);
3070
3071        debug("TIMING_PARAMS1                                : 0x%016llx\n",
3072              tp1.u64);
3073        lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3074}
3075
3076static void lmc_timing_params2(struct ddr_priv *priv)
3077{
3078        if (ddr_type == DDR4_DRAM) {
3079                union cvmx_lmcx_timing_params1 tp1;
3080                union cvmx_lmcx_timing_params2 tp2;
3081                int temp_trrd_l;
3082
3083                tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
3084                tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num));
3085                debug("TIMING_PARAMS2                                : 0x%016llx\n",
3086                      tp2.u64);
3087
3088                temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3089                if (temp_trrd_l > 7)
3090                        debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n",
3091                              temp_trrd_l);
3092                if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7)
3093                        temp_trrd_l = 7;        // max it out
3094                tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7;
3095                tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1;
3096
3097                // correct for 1600-2400
3098                tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull),
3099                                           tclk_psecs) - 1;
3100                tp2.s.t_rw_op_max = 7;
3101                tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull),
3102                                            tclk_psecs) - 1;
3103
3104                debug("TIMING_PARAMS2                                : 0x%016llx\n",
3105                      tp2.u64);
3106                lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64);
3107
3108                /*
3109                 * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
3110                 * for Write-to-Read operations to the same Bank Group
3111                 */
3112                if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) {
3113                        tp1.cn78xx.twtr = tp2.s.twtr_l - 4;
3114                        debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n",
3115                              tp1.cn78xx.twtr, tp2.s.twtr_l);
3116                        debug("TIMING_PARAMS1                                : 0x%016llx\n",
3117                              tp1.u64);
3118                        lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3119                }
3120        }
3121}
3122
3123static void lmc_modereg_params0(struct ddr_priv *priv)
3124{
3125        union cvmx_lmcx_modereg_params0 mp0;
3126        int param;
3127        char *s;
3128
3129        mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
3130
3131        if (ddr_type == DDR4_DRAM) {
3132                mp0.s.cwl = 0;  /* 1600 (1250ps) */
3133                if (tclk_psecs < 1250)
3134                        mp0.s.cwl = 1;  /* 1866 (1072ps) */
3135                if (tclk_psecs < 1072)
3136                        mp0.s.cwl = 2;  /* 2133 (938ps) */
3137                if (tclk_psecs < 938)
3138                        mp0.s.cwl = 3;  /* 2400 (833ps) */
3139                if (tclk_psecs < 833)
3140                        mp0.s.cwl = 4;  /* 2666 (750ps) */
3141                if (tclk_psecs < 750)
3142                        mp0.s.cwl = 5;  /* 3200 (625ps) */
3143        } else {
3144                /*
3145                 ** CSR   CWL         CAS write Latency
3146                 ** ===   ===   =================================
3147                 **  0      5   (           tCK(avg) >=   2.5 ns)
3148                 **  1      6   (2.5 ns   > tCK(avg) >= 1.875 ns)
3149                 **  2      7   (1.875 ns > tCK(avg) >= 1.5   ns)
3150                 **  3      8   (1.5 ns   > tCK(avg) >= 1.25  ns)
3151                 **  4      9   (1.25 ns  > tCK(avg) >= 1.07  ns)
3152                 **  5     10   (1.07 ns  > tCK(avg) >= 0.935 ns)
3153                 **  6     11   (0.935 ns > tCK(avg) >= 0.833 ns)
3154                 **  7     12   (0.833 ns > tCK(avg) >= 0.75  ns)
3155                 */
3156
3157                mp0.s.cwl = 0;
3158                if (tclk_psecs < 2500)
3159                        mp0.s.cwl = 1;
3160                if (tclk_psecs < 1875)
3161                        mp0.s.cwl = 2;
3162                if (tclk_psecs < 1500)
3163                        mp0.s.cwl = 3;
3164                if (tclk_psecs < 1250)
3165                        mp0.s.cwl = 4;
3166                if (tclk_psecs < 1070)
3167                        mp0.s.cwl = 5;
3168                if (tclk_psecs < 935)
3169                        mp0.s.cwl = 6;
3170                if (tclk_psecs < 833)
3171                        mp0.s.cwl = 7;
3172        }
3173
3174        s = lookup_env(priv, "ddr_cwl");
3175        if (s)
3176                mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5;
3177
3178        if (ddr_type == DDR4_DRAM) {
3179                debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3180                      mp0.s.cwl + 9
3181                      + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl);
3182        } else {
3183                debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3184                      mp0.s.cwl + 5, mp0.s.cwl);
3185        }
3186
3187        mp0.s.mprloc = 0;
3188        mp0.s.mpr = 0;
3189        mp0.s.dll = (ddr_type == DDR4_DRAM);    /* 0 for DDR3 and 1 for DDR4 */
3190        mp0.s.al = 0;
3191        mp0.s.wlev = 0;         /* Read Only */
3192        if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM)
3193                mp0.s.tdqs = 0;
3194        else
3195                mp0.s.tdqs = 1;
3196        mp0.s.qoff = 0;
3197
3198        s = lookup_env(priv, "ddr_cl");
3199        if (s) {
3200                cl = simple_strtoul(s, NULL, 0);
3201                debug("CAS Latency                                   : %6d\n",
3202                      cl);
3203        }
3204
3205        if (ddr_type == DDR4_DRAM) {
3206                mp0.s.cl = 0x0;
3207                if (cl > 9)
3208                        mp0.s.cl = 0x1;
3209                if (cl > 10)
3210                        mp0.s.cl = 0x2;
3211                if (cl > 11)
3212                        mp0.s.cl = 0x3;
3213                if (cl > 12)
3214                        mp0.s.cl = 0x4;
3215                if (cl > 13)
3216                        mp0.s.cl = 0x5;
3217                if (cl > 14)
3218                        mp0.s.cl = 0x6;
3219                if (cl > 15)
3220                        mp0.s.cl = 0x7;
3221                if (cl > 16)
3222                        mp0.s.cl = 0x8;
3223                if (cl > 18)
3224                        mp0.s.cl = 0x9;
3225                if (cl > 20)
3226                        mp0.s.cl = 0xA;
3227                if (cl > 24)
3228                        mp0.s.cl = 0xB;
3229        } else {
3230                mp0.s.cl = 0x2;
3231                if (cl > 5)
3232                        mp0.s.cl = 0x4;
3233                if (cl > 6)
3234                        mp0.s.cl = 0x6;
3235                if (cl > 7)
3236                        mp0.s.cl = 0x8;
3237                if (cl > 8)
3238                        mp0.s.cl = 0xA;
3239                if (cl > 9)
3240                        mp0.s.cl = 0xC;
3241                if (cl > 10)
3242                        mp0.s.cl = 0xE;
3243                if (cl > 11)
3244                        mp0.s.cl = 0x1;
3245                if (cl > 12)
3246                        mp0.s.cl = 0x3;
3247                if (cl > 13)
3248                        mp0.s.cl = 0x5;
3249                if (cl > 14)
3250                        mp0.s.cl = 0x7;
3251                if (cl > 15)
3252                        mp0.s.cl = 0x9;
3253        }
3254
3255        mp0.s.rbt = 0;          /* Read Only. */
3256        mp0.s.tm = 0;
3257        mp0.s.dllr = 0;
3258
3259        param = divide_roundup(twr, tclk_psecs);
3260
3261        if (ddr_type == DDR4_DRAM) {    /* DDR4 */
3262                mp0.s.wrp = 1;
3263                if (param > 12)
3264                        mp0.s.wrp = 2;
3265                if (param > 14)
3266                        mp0.s.wrp = 3;
3267                if (param > 16)
3268                        mp0.s.wrp = 4;
3269                if (param > 18)
3270                        mp0.s.wrp = 5;
3271                if (param > 20)
3272                        mp0.s.wrp = 6;
3273                if (param > 24) /* RESERVED in DDR4 spec */
3274                        mp0.s.wrp = 7;
3275        } else {                /* DDR3 */
3276                mp0.s.wrp = 1;
3277                if (param > 5)
3278                        mp0.s.wrp = 2;
3279                if (param > 6)
3280                        mp0.s.wrp = 3;
3281                if (param > 7)
3282                        mp0.s.wrp = 4;
3283                if (param > 8)
3284                        mp0.s.wrp = 5;
3285                if (param > 10)
3286                        mp0.s.wrp = 6;
3287                if (param > 12)
3288                        mp0.s.wrp = 7;
3289        }
3290
3291        mp0.s.ppd = 0;
3292
3293        s = lookup_env(priv, "ddr_wrp");
3294        if (s)
3295                mp0.s.wrp = simple_strtoul(s, NULL, 0);
3296
3297        debug("%-45s : %d, [0x%x]\n",
3298              "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp);
3299
3300        s = lookup_env_ull(priv, "ddr_modereg_params0");
3301        if (s)
3302                mp0.u64 = simple_strtoull(s, NULL, 0);
3303
3304        debug("MODEREG_PARAMS0                               : 0x%016llx\n",
3305              mp0.u64);
3306        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
3307}
3308
3309static void lmc_modereg_params1(struct ddr_priv *priv)
3310{
3311        union cvmx_lmcx_modereg_params1 mp1;
3312        char *s;
3313        int i;
3314
3315        mp1.u64 = odt_config[odt_idx].modereg_params1.u64;
3316
3317        /*
3318         * Special request: mismatched DIMM support. Slot 0: 2-Rank,
3319         * Slot 1: 1-Rank
3320         */
3321        if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
3322                mp1.s.rtt_nom_00 = 0;
3323                mp1.s.rtt_nom_01 = 3;   /* rttnom_40ohm */
3324                mp1.s.rtt_nom_10 = 3;   /* rttnom_40ohm */
3325                mp1.s.rtt_nom_11 = 0;
3326                dyn_rtt_nom_mask = 0x6;
3327        }
3328
3329        s = lookup_env(priv, "ddr_rtt_nom_mask");
3330        if (s)
3331                dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0);
3332
3333        /*
3334         * Save the original rtt_nom settings before sweeping through
3335         * settings.
3336         */
3337        default_rtt_nom[0] = mp1.s.rtt_nom_00;
3338        default_rtt_nom[1] = mp1.s.rtt_nom_01;
3339        default_rtt_nom[2] = mp1.s.rtt_nom_10;
3340        default_rtt_nom[3] = mp1.s.rtt_nom_11;
3341
3342        ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto;
3343
3344        for (i = 0; i < 4; ++i) {
3345                u64 value;
3346
3347                s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2),
3348                               !!(i & 1));
3349                if (!s)
3350                        s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num,
3351                                       !!(i & 2), !!(i & 1));
3352                if (s) {
3353                        value = simple_strtoul(s, NULL, 0);
3354                        mp1.u64 &= ~((u64)0x7 << (i * 12 + 9));
3355                        mp1.u64 |= ((value & 0x7) << (i * 12 + 9));
3356                        default_rtt_nom[i] = value;
3357                        ddr_rtt_nom_auto = 0;
3358                }
3359        }
3360
3361        s = lookup_env(priv, "ddr_rtt_nom");
3362        if (!s)
3363                s = lookup_env(priv, "ddr%d_rtt_nom", if_num);
3364        if (s) {
3365                u64 value;
3366
3367                value = simple_strtoul(s, NULL, 0);
3368
3369                if (dyn_rtt_nom_mask & 1) {
3370                        default_rtt_nom[0] = value;
3371                        mp1.s.rtt_nom_00 = value;
3372                }
3373                if (dyn_rtt_nom_mask & 2) {
3374                        default_rtt_nom[1] = value;
3375                        mp1.s.rtt_nom_01 = value;
3376                }
3377                if (dyn_rtt_nom_mask & 4) {
3378                        default_rtt_nom[2] = value;
3379                        mp1.s.rtt_nom_10 = value;
3380                }
3381                if (dyn_rtt_nom_mask & 8) {
3382                        default_rtt_nom[3] = value;
3383                        mp1.s.rtt_nom_11 = value;
3384                }
3385
3386                ddr_rtt_nom_auto = 0;
3387        }
3388
3389        for (i = 0; i < 4; ++i) {
3390                u64 value;
3391
3392                s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1));
3393                if (!s)
3394                        s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num,
3395                                       !!(i & 2), !!(i & 1));
3396                if (s) {
3397                        value = simple_strtoul(s, NULL, 0);
3398                        insrt_wr(&mp1.u64, i, value);
3399                }
3400        }
3401
3402        // Make sure 78XX pass 1 has valid RTT_WR settings, because
3403        // configuration files may be set-up for later chips, and
3404        // 78XX pass 1 supports no RTT_WR extension bits
3405        if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3406                for (i = 0; i < 4; ++i) {
3407                        // if 80 or undefined
3408                        if (extr_wr(mp1.u64, i) > 3) {
3409                                // FIXME? always insert 120
3410                                insrt_wr(&mp1.u64, i, 1);
3411                                debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n",
3412                                      !!(i & 2), i & 1);
3413                        }
3414                }
3415        }
3416
3417        s = lookup_env(priv, "ddr_dic");
3418        if (s) {
3419                u64 value = simple_strtoul(s, NULL, 0);
3420
3421                for (i = 0; i < 4; ++i) {
3422                        mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3423                        mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3424                }
3425        }
3426
3427        for (i = 0; i < 4; ++i) {
3428                u64 value;
3429
3430                s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1));
3431                if (s) {
3432                        value = simple_strtoul(s, NULL, 0);
3433                        mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3434                        mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3435                }
3436        }
3437
3438        s = lookup_env_ull(priv, "ddr_modereg_params1");
3439        if (s)
3440                mp1.u64 = simple_strtoull(s, NULL, 0);
3441
3442        debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3443              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
3444              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
3445              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
3446              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
3447              mp1.s.rtt_nom_11,
3448              mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00);
3449
3450        debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3451              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
3452              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
3453              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
3454              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
3455              extr_wr(mp1.u64, 3),
3456              extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0));
3457
3458        debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3459              imp_val->dic_ohms[mp1.s.dic_11],
3460              imp_val->dic_ohms[mp1.s.dic_10],
3461              imp_val->dic_ohms[mp1.s.dic_01],
3462              imp_val->dic_ohms[mp1.s.dic_00],
3463              mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00);
3464
3465        debug("MODEREG_PARAMS1                               : 0x%016llx\n",
3466              mp1.u64);
3467        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64);
3468}
3469
3470static void lmc_modereg_params2(struct ddr_priv *priv)
3471{
3472        char *s;
3473        int i;
3474
3475        if (ddr_type == DDR4_DRAM) {
3476                union cvmx_lmcx_modereg_params2 mp2;
3477
3478                mp2.u64 = odt_config[odt_idx].modereg_params2.u64;
3479
3480                s = lookup_env(priv, "ddr_rtt_park");
3481                if (s) {
3482                        u64 value = simple_strtoul(s, NULL, 0);
3483
3484                        for (i = 0; i < 4; ++i) {
3485                                mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3486                                mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3487                        }
3488                }
3489
3490                for (i = 0; i < 4; ++i) {
3491                        u64 value;
3492
3493                        s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2),
3494                                       !!(i & 1));
3495                        if (s) {
3496                                value = simple_strtoul(s, NULL, 0);
3497                                mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3498                                mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3499                        }
3500                }
3501
3502                s = lookup_env_ull(priv, "ddr_modereg_params2");
3503                if (s)
3504                        mp2.u64 = simple_strtoull(s, NULL, 0);
3505
3506                debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3507                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
3508                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
3509                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
3510                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
3511                      mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01,
3512                      mp2.s.rtt_park_00);
3513
3514                debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
3515                      mp2.s.vref_range_11,
3516                      mp2.s.vref_range_10,
3517                      mp2.s.vref_range_01, mp2.s.vref_range_00);
3518
3519                debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
3520                      mp2.s.vref_value_11,
3521                      mp2.s.vref_value_10,
3522                      mp2.s.vref_value_01, mp2.s.vref_value_00);
3523
3524                debug("MODEREG_PARAMS2                               : 0x%016llx\n",
3525                      mp2.u64);
3526                lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64);
3527        }
3528}
3529
3530static void lmc_modereg_params3(struct ddr_priv *priv)
3531{
3532        char *s;
3533
3534        if (ddr_type == DDR4_DRAM) {
3535                union cvmx_lmcx_modereg_params3 mp3;
3536
3537                mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num));
3538                /* Disable as workaround to Errata 20547 */
3539                mp3.s.rd_dbi = 0;
3540                mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs),
3541                                   5ull) - 4;
3542
3543                s = lookup_env(priv, "ddr_rd_preamble");
3544                if (s)
3545                        mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0);
3546
3547                if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3548                        int delay = 0;
3549
3550                        if (lranks_per_prank == 4 && ddr_hertz >= 1000000000)
3551                                delay = 1;
3552
3553                        mp3.s.xrank_add_tccd_l = delay;
3554                        mp3.s.xrank_add_tccd_s = delay;
3555                }
3556
3557                lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64);
3558                debug("MODEREG_PARAMS3                               : 0x%016llx\n",
3559                      mp3.u64);
3560        }
3561}
3562
3563static void lmc_nxm(struct ddr_priv *priv)
3564{
3565        union cvmx_lmcx_nxm lmc_nxm;
3566        int num_bits = row_lsb + row_bits + lranks_bits - 26;
3567        char *s;
3568
3569        lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num));
3570
3571        /* .cn78xx. */
3572        if (rank_mask & 0x1)
3573                lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits;
3574        if (rank_mask & 0x2)
3575                lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits;
3576        if (rank_mask & 0x4)
3577                lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits;
3578        if (rank_mask & 0x8)
3579                lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits;
3580
3581        /* Set the mask for non-existent ranks. */
3582        lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff;
3583
3584        s = lookup_env_ull(priv, "ddr_nxm");
3585        if (s)
3586                lmc_nxm.u64 = simple_strtoull(s, NULL, 0);
3587
3588        debug("LMC_NXM                                       : 0x%016llx\n",
3589              lmc_nxm.u64);
3590        lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64);
3591}
3592
3593static void lmc_wodt_mask(struct ddr_priv *priv)
3594{
3595        union cvmx_lmcx_wodt_mask wodt_mask;
3596        char *s;
3597
3598        wodt_mask.u64 = odt_config[odt_idx].odt_mask;
3599
3600        s = lookup_env_ull(priv, "ddr_wodt_mask");
3601        if (s)
3602                wodt_mask.u64 = simple_strtoull(s, NULL, 0);
3603
3604        debug("WODT_MASK                                     : 0x%016llx\n",
3605              wodt_mask.u64);
3606        lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64);
3607}
3608
3609static void lmc_rodt_mask(struct ddr_priv *priv)
3610{
3611        union cvmx_lmcx_rodt_mask rodt_mask;
3612        int rankx;
3613        char *s;
3614
3615        rodt_mask.u64 = odt_config[odt_idx].rodt_ctl;
3616
3617        s = lookup_env_ull(priv, "ddr_rodt_mask");
3618        if (s)
3619                rodt_mask.u64 = simple_strtoull(s, NULL, 0);
3620
3621        debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64);
3622        lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64);
3623
3624        dyn_rtt_nom_mask = 0;
3625        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
3626                if (!(rank_mask & (1 << rankx)))
3627                        continue;
3628                dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff);
3629        }
3630        if (num_ranks == 4) {
3631                /*
3632                 * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
3633                 * ODT1 is wired to the third rank (rank 2).  The mask,
3634                 * dyn_rtt_nom_mask, is used to indicate for which ranks
3635                 * to sweep RTT_NOM during read-leveling. Shift the bit
3636                 * from the ODT1 position over to the "ODT2" position so
3637                 * that the read-leveling analysis comes out right.
3638                 */
3639                int odt1_bit = dyn_rtt_nom_mask & 2;
3640
3641                dyn_rtt_nom_mask &= ~2;
3642                dyn_rtt_nom_mask |= odt1_bit << 1;
3643        }
3644        debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
3645}
3646
3647static void lmc_comp_ctl2(struct ddr_priv *priv)
3648{
3649        union cvmx_lmcx_comp_ctl2 cc2;
3650        char *s;
3651
3652        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
3653
3654        cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena;
3655        /* Default 4=34.3 ohm */
3656        cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl;
3657        /* Default 4=34.3 ohm */
3658        cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl;
3659        /* Default 4=34.3 ohm */
3660        cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl;
3661
3662        ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto;
3663        s = lookup_env(priv, "ddr_rodt_ctl_auto");
3664        if (s)
3665                ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0);
3666
3667        default_rodt_ctl = odt_config[odt_idx].qs_dic;
3668        s = lookup_env(priv, "ddr_rodt_ctl");
3669        if (!s)
3670                s = lookup_env(priv, "ddr%d_rodt_ctl", if_num);
3671        if (s) {
3672                default_rodt_ctl = simple_strtoul(s, NULL, 0);
3673                ddr_rodt_ctl_auto = 0;
3674        }
3675
3676        cc2.cn70xx.rodt_ctl = default_rodt_ctl;
3677
3678        // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms,
3679        // and DCLK speed is 1 GHz or more...
3680        if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm &&
3681            ddr_hertz >= 1000000000) {
3682                // lowest for DDR4 is 26 ohms
3683                cc2.s.ck_ctl = ddr4_driver_26_ohm;
3684                debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n",
3685                      node, if_num, cc2.s.ck_ctl,
3686                      imp_val->drive_strength[cc2.s.ck_ctl]);
3687        }
3688
3689        // if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms,
3690        // if DCLK speed is 1 GHz or more...
3691        if (ddr_type == DDR4_DRAM && dimm_count == 2 &&
3692            (spd_dimm_type == 2 || spd_dimm_type == 6) &&
3693            ddr_hertz >= 1000000000) {
3694                // lowest for DDR4 is 26 ohms
3695                cc2.cn78xx.control_ctl = ddr4_driver_26_ohm;
3696                // lowest for DDR4 is 26 ohms
3697                cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm;
3698                debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n",
3699                      node, if_num, ddr4_driver_26_ohm,
3700                      imp_val->drive_strength[ddr4_driver_26_ohm]);
3701        }
3702
3703        s = lookup_env(priv, "ddr_ck_ctl");
3704        if (s)
3705                cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0);
3706
3707        s = lookup_env(priv, "ddr_cmd_ctl");
3708        if (s)
3709                cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0);
3710
3711        s = lookup_env(priv, "ddr_control_ctl");
3712        if (s)
3713                cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0);
3714
3715        s = lookup_env(priv, "ddr_dqx_ctl");
3716        if (s)
3717                cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0);
3718
3719        debug("%-45s : %d, %d ohms\n", "DQX_CTL           ", cc2.cn78xx.dqx_ctl,
3720              imp_val->drive_strength[cc2.cn78xx.dqx_ctl]);
3721        debug("%-45s : %d, %d ohms\n", "CK_CTL            ", cc2.cn78xx.ck_ctl,
3722              imp_val->drive_strength[cc2.cn78xx.ck_ctl]);
3723        debug("%-45s : %d, %d ohms\n", "CMD_CTL           ", cc2.cn78xx.cmd_ctl,
3724              imp_val->drive_strength[cc2.cn78xx.cmd_ctl]);
3725        debug("%-45s : %d, %d ohms\n", "CONTROL_CTL       ",
3726              cc2.cn78xx.control_ctl,
3727              imp_val->drive_strength[cc2.cn78xx.control_ctl]);
3728        debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
3729              cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
3730
3731        debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64);
3732        lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
3733}
3734
3735static void lmc_phy_ctl(struct ddr_priv *priv)
3736{
3737        union cvmx_lmcx_phy_ctl phy_ctl;
3738
3739        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
3740        phy_ctl.s.ts_stagger = 0;
3741        // FIXME: are there others TBD?
3742        phy_ctl.s.dsk_dbg_overwrt_ena = 0;
3743
3744        if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3745                // C0 is TEN, C1 is A17
3746                phy_ctl.s.c0_sel = 2;
3747                phy_ctl.s.c1_sel = 2;
3748                debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
3749                      node, if_num, phy_ctl.s.c1_sel);
3750        }
3751
3752        debug("PHY_CTL                                       : 0x%016llx\n",
3753              phy_ctl.u64);
3754        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
3755}
3756
3757static void lmc_ext_config(struct ddr_priv *priv)
3758{
3759        union cvmx_lmcx_ext_config ext_cfg;
3760        char *s;
3761
3762        ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
3763        ext_cfg.s.vrefint_seq_deskew = 0;
3764        ext_cfg.s.read_ena_bprch = 1;
3765        ext_cfg.s.read_ena_fprch = 1;
3766        ext_cfg.s.drive_ena_fprch = 1;
3767        ext_cfg.s.drive_ena_bprch = 1;
3768        // make sure this is OFF for all current chips
3769        ext_cfg.s.invert_data = 0;
3770
3771        s = lookup_env(priv, "ddr_read_fprch");
3772        if (s)
3773                ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0);
3774
3775        s = lookup_env(priv, "ddr_read_bprch");
3776        if (s)
3777                ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0);
3778
3779        s = lookup_env(priv, "ddr_drive_fprch");
3780        if (s)
3781                ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0);
3782
3783        s = lookup_env(priv, "ddr_drive_bprch");
3784        if (s)
3785                ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0);
3786
3787        if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3788                ext_cfg.s.dimm0_cid = lranks_bits;
3789                ext_cfg.s.dimm1_cid = lranks_bits;
3790                debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
3791                      node, if_num, ext_cfg.s.dimm0_cid);
3792        }
3793
3794        lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
3795        debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64);
3796}
3797
3798static void lmc_ext_config2(struct ddr_priv *priv)
3799{
3800        char *s;
3801
3802        // NOTE: all chips have this register, but not necessarily the
3803        // fields we modify...
3804        if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
3805            !octeon_is_cpuid(OCTEON_CN73XX)) {
3806                union cvmx_lmcx_ext_config2 ext_cfg2;
3807                int value = 1;  // default to 1
3808
3809                ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num));
3810
3811                s = lookup_env(priv, "ddr_ext2_delay_unload");
3812                if (s)
3813                        value = !!simple_strtoul(s, NULL, 0);
3814
3815                ext_cfg2.s.delay_unload_r0 = value;
3816                ext_cfg2.s.delay_unload_r1 = value;
3817                ext_cfg2.s.delay_unload_r2 = value;
3818                ext_cfg2.s.delay_unload_r3 = value;
3819
3820                lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64);
3821                debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64);
3822        }
3823}
3824
3825static void lmc_dimm01_params_loop(struct ddr_priv *priv)
3826{
3827        union cvmx_lmcx_dimmx_params dimm_p;
3828        int dimmx = didx;
3829        char *s;
3830        int rc;
3831        int i;
3832
3833        dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num));
3834
3835        if (ddr_type == DDR4_DRAM) {
3836                union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0;
3837                union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1;
3838                union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl;
3839
3840                dimm_p.s.rc0 = 0;
3841                dimm_p.s.rc1 = 0;
3842                dimm_p.s.rc2 = 0;
3843
3844                rc = read_spd(&dimm_config_table[didx], 0,
3845                              DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
3846                dimm_p.s.rc3 = (rc >> 4) & 0xf;
3847                dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2;
3848                dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0;
3849
3850                rc = read_spd(&dimm_config_table[didx], 0,
3851                              DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
3852                dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2;
3853                dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0;
3854
3855                dimm_p.s.rc6 = 0;
3856                dimm_p.s.rc7 = 0;
3857                dimm_p.s.rc8 = 0;
3858                dimm_p.s.rc9 = 0;
3859
3860                /*
3861                 * rc10               DDR4 RDIMM Operating Speed
3862                 * ===  ===================================================
3863                 *  0               tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
3864                 *  1   1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
3865                 *  2   1071 psec > tclk_psecs >=  938 psec DDR4-2133 ( 938 ps)
3866                 *  3    938 psec > tclk_psecs >=  833 psec DDR4-2400 ( 833 ps)
3867                 *  4    833 psec > tclk_psecs >=  750 psec DDR4-2666 ( 750 ps)
3868                 *  5    750 psec > tclk_psecs >=  625 psec DDR4-3200 ( 625 ps)
3869                 */
3870                dimm_p.s.rc10 = 0;
3871                if (tclk_psecs < 1250)
3872                        dimm_p.s.rc10 = 1;
3873                if (tclk_psecs < 1071)
3874                        dimm_p.s.rc10 = 2;
3875                if (tclk_psecs < 938)
3876                        dimm_p.s.rc10 = 3;
3877                if (tclk_psecs < 833)
3878                        dimm_p.s.rc10 = 4;
3879                if (tclk_psecs < 750)
3880                        dimm_p.s.rc10 = 5;
3881
3882                dimm_p.s.rc11 = 0;
3883                dimm_p.s.rc12 = 0;
3884                /* 0=LRDIMM, 1=RDIMM */
3885                dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4;
3886                dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ?
3887                        (spd_addr_mirror << 3) : 0;
3888                dimm_p.s.rc14 = 0;
3889                dimm_p.s.rc15 = 0;      /* 1 nCK latency adder */
3890
3891                ddr4_p0.u64 = 0;
3892
3893                ddr4_p0.s.rc8x = 0;
3894                ddr4_p0.s.rc7x = 0;
3895                ddr4_p0.s.rc6x = 0;
3896                ddr4_p0.s.rc5x = 0;
3897                ddr4_p0.s.rc4x = 0;
3898
3899                ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs);
3900
3901                ddr4_p0.s.rc2x = 0;
3902                ddr4_p0.s.rc1x = 0;
3903
3904                ddr4_p1.u64 = 0;
3905
3906                ddr4_p1.s.rcbx = 0;
3907                ddr4_p1.s.rcax = 0;
3908                ddr4_p1.s.rc9x = 0;
3909
3910                ddr4_ctl.u64 = 0;
3911                ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004;
3912                ddr4_ctl.cn70xx.ddr4_dimm1_wmask =
3913                    (dimm_count > 1) ? 0x004 : 0x0000;
3914
3915                /*
3916                 * Handle any overrides from envvars here...
3917                 */
3918                s = lookup_env(priv, "ddr_ddr4_params0");
3919                if (s)
3920                        ddr4_p0.u64 = simple_strtoul(s, NULL, 0);
3921
3922                s = lookup_env(priv, "ddr_ddr4_params1");
3923                if (s)
3924                        ddr4_p1.u64 = simple_strtoul(s, NULL, 0);
3925
3926                s = lookup_env(priv, "ddr_ddr4_dimm_ctl");
3927                if (s)
3928                        ddr4_ctl.u64 = simple_strtoul(s, NULL, 0);
3929
3930                for (i = 0; i < 11; ++i) {
3931                        u64 value;
3932
3933                        s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1);
3934                        if (s) {
3935                                value = simple_strtoul(s, NULL, 0);
3936                                if (i < 8) {
3937                                        ddr4_p0.u64 &= ~((u64)0xff << (i * 8));
3938                                        ddr4_p0.u64 |= (value << (i * 8));
3939                                } else {
3940                                        ddr4_p1.u64 &=
3941                                            ~((u64)0xff << ((i - 8) * 8));
3942                                        ddr4_p1.u64 |= (value << ((i - 8) * 8));
3943                                }
3944                        }
3945                }
3946
3947                /*
3948                 * write the final CSR values
3949                 */
3950                lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num),
3951                       ddr4_p0.u64);
3952
3953                lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64);
3954
3955                lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num),
3956                       ddr4_p1.u64);
3957
3958                debug("DIMM%d Register Control Words        RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
3959                      dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax,
3960                      ddr4_p1.s.rc9x, ddr4_p0.s.rc8x,
3961                      ddr4_p0.s.rc7x, ddr4_p0.s.rc6x,
3962                      ddr4_p0.s.rc5x, ddr4_p0.s.rc4x,
3963                      ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x);
3964
3965        } else {
3966                rc = read_spd(&dimm_config_table[didx], 0, 69);
3967                dimm_p.s.rc0 = (rc >> 0) & 0xf;
3968                dimm_p.s.rc1 = (rc >> 4) & 0xf;
3969
3970                rc = read_spd(&dimm_config_table[didx], 0, 70);
3971                dimm_p.s.rc2 = (rc >> 0) & 0xf;
3972                dimm_p.s.rc3 = (rc >> 4) & 0xf;
3973
3974                rc = read_spd(&dimm_config_table[didx], 0, 71);
3975                dimm_p.s.rc4 = (rc >> 0) & 0xf;
3976                dimm_p.s.rc5 = (rc >> 4) & 0xf;
3977
3978                rc = read_spd(&dimm_config_table[didx], 0, 72);
3979                dimm_p.s.rc6 = (rc >> 0) & 0xf;
3980                dimm_p.s.rc7 = (rc >> 4) & 0xf;
3981
3982                rc = read_spd(&dimm_config_table[didx], 0, 73);
3983                dimm_p.s.rc8 = (rc >> 0) & 0xf;
3984                dimm_p.s.rc9 = (rc >> 4) & 0xf;
3985
3986                rc = read_spd(&dimm_config_table[didx], 0, 74);
3987                dimm_p.s.rc10 = (rc >> 0) & 0xf;
3988                dimm_p.s.rc11 = (rc >> 4) & 0xf;
3989
3990                rc = read_spd(&dimm_config_table[didx], 0, 75);
3991                dimm_p.s.rc12 = (rc >> 0) & 0xf;
3992                dimm_p.s.rc13 = (rc >> 4) & 0xf;
3993
3994                rc = read_spd(&dimm_config_table[didx], 0, 76);
3995                dimm_p.s.rc14 = (rc >> 0) & 0xf;
3996                dimm_p.s.rc15 = (rc >> 4) & 0xf;
3997
3998                s = ddr_getenv_debug(priv, "ddr_clk_drive");
3999                if (s) {
4000                        if (strcmp(s, "light") == 0)
4001                                dimm_p.s.rc5 = 0x0;     /* Light Drive */
4002                        if (strcmp(s, "moderate") == 0)
4003                                dimm_p.s.rc5 = 0x5;     /* Moderate Drive */
4004                        if (strcmp(s, "strong") == 0)
4005                                dimm_p.s.rc5 = 0xA;     /* Strong Drive */
4006                        printf("Parameter found in environment. ddr_clk_drive = %s\n",
4007                               s);
4008                }
4009
4010                s = ddr_getenv_debug(priv, "ddr_cmd_drive");
4011                if (s) {
4012                        if (strcmp(s, "light") == 0)
4013                                dimm_p.s.rc3 = 0x0;     /* Light Drive */
4014                        if (strcmp(s, "moderate") == 0)
4015                                dimm_p.s.rc3 = 0x5;     /* Moderate Drive */
4016                        if (strcmp(s, "strong") == 0)
4017                                dimm_p.s.rc3 = 0xA;     /* Strong Drive */
4018                        printf("Parameter found in environment. ddr_cmd_drive = %s\n",
4019                               s);
4020                }
4021
4022                s = ddr_getenv_debug(priv, "ddr_ctl_drive");
4023                if (s) {
4024                        if (strcmp(s, "light") == 0)
4025                                dimm_p.s.rc4 = 0x0;     /* Light Drive */
4026                        if (strcmp(s, "moderate") == 0)
4027                                dimm_p.s.rc4 = 0x5;     /* Moderate Drive */
4028                        printf("Parameter found in environment. ddr_ctl_drive = %s\n",
4029                               s);
4030                }
4031
4032                /*
4033                 * rc10               DDR3 RDIMM Operating Speed
4034                 * ==   =====================================================
4035                 *  0               tclk_psecs >= 2500 psec DDR3/DDR3L-800 def
4036                 *  1   2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
4037                 *  2   1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
4038                 *  3   1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
4039                 *  4   1250 psec > tclk_psecs >= 1071 psec DDR3-1866
4040                 */
4041                dimm_p.s.rc10 = 0;
4042                if (tclk_psecs < 2500)
4043                        dimm_p.s.rc10 = 1;
4044                if (tclk_psecs < 1875)
4045                        dimm_p.s.rc10 = 2;
4046                if (tclk_psecs < 1500)
4047                        dimm_p.s.rc10 = 3;
4048                if (tclk_psecs < 1250)
4049                        dimm_p.s.rc10 = 4;
4050        }
4051
4052        s = lookup_env(priv, "ddr_dimmx_params", i);
4053        if (s)
4054                dimm_p.u64 = simple_strtoul(s, NULL, 0);
4055
4056        for (i = 0; i < 16; ++i) {
4057                u64 value;
4058
4059                s = lookup_env(priv, "ddr_rc%d", i);
4060                if (s) {
4061                        value = simple_strtoul(s, NULL, 0);
4062                        dimm_p.u64 &= ~((u64)0xf << (i * 4));
4063                        dimm_p.u64 |= (value << (i * 4));
4064                }
4065        }
4066
4067        lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64);
4068
4069        debug("DIMM%d Register Control Words         RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
4070              dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13,
4071              dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10,
4072              dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7,
4073              dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4,
4074              dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0);
4075
4076        // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers,
4077        // and treat it specially
4078        if (ddr_type == DDR3_DRAM && num_ranks == 4 &&
4079            spd_rdimm_registers == 2 && dimmx == 0) {
4080                debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n");
4081                lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64);
4082        }
4083}
4084
4085static void lmc_dimm01_params(struct ddr_priv *priv)
4086{
4087        union cvmx_lmcx_dimm_ctl dimm_ctl;
4088        char *s;
4089
4090        if (spd_rdimm) {
4091                for (didx = 0; didx < (unsigned int)dimm_count; ++didx)
4092                        lmc_dimm01_params_loop(priv);
4093
4094                if (ddr_type == DDR4_DRAM) {
4095                        /* LMC0_DIMM_CTL */
4096                        dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4097                        dimm_ctl.s.dimm0_wmask = 0xdf3f;
4098                        dimm_ctl.s.dimm1_wmask =
4099                            (dimm_count > 1) ? 0xdf3f : 0x0000;
4100                        dimm_ctl.s.tcws = 0x4e0;
4101                        dimm_ctl.s.parity = c_cfg->parity;
4102
4103                        s = lookup_env(priv, "ddr_dimm0_wmask");
4104                        if (s) {
4105                                dimm_ctl.s.dimm0_wmask =
4106                                    simple_strtoul(s, NULL, 0);
4107                        }
4108
4109                        s = lookup_env(priv, "ddr_dimm1_wmask");
4110                        if (s) {
4111                                dimm_ctl.s.dimm1_wmask =
4112                                    simple_strtoul(s, NULL, 0);
4113                        }
4114
4115                        s = lookup_env(priv, "ddr_dimm_ctl_parity");
4116                        if (s)
4117                                dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4118
4119                        s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4120                        if (s)
4121                                dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4122
4123                        debug("LMC DIMM_CTL                                  : 0x%016llx\n",
4124                              dimm_ctl.u64);
4125                        lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4126
4127                        /* Init RCW */
4128                        oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4129
4130                        /* Write RC0D last */
4131                        dimm_ctl.s.dimm0_wmask = 0x2000;
4132                        dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ?
4133                                0x2000 : 0x0000;
4134                        debug("LMC DIMM_CTL                                  : 0x%016llx\n",
4135                              dimm_ctl.u64);
4136                        lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4137
4138                        /*
4139                         * Don't write any extended registers the second time
4140                         */
4141                        lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0);
4142
4143                        /* Init RCW */
4144                        oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4145                } else {
4146                        /* LMC0_DIMM_CTL */
4147                        dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4148                        dimm_ctl.s.dimm0_wmask = 0xffff;
4149                        // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2
4150                        // registers, and treat it specially
4151                        if (num_ranks == 4 && spd_rdimm_registers == 2) {
4152                                debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n");
4153                                dimm_ctl.s.dimm1_wmask = 0xffff;
4154                        } else {
4155                                dimm_ctl.s.dimm1_wmask =
4156                                    (dimm_count > 1) ? 0xffff : 0x0000;
4157                        }
4158                        dimm_ctl.s.tcws = 0x4e0;
4159                        dimm_ctl.s.parity = c_cfg->parity;
4160
4161                        s = lookup_env(priv, "ddr_dimm0_wmask");
4162                        if (s) {
4163                                dimm_ctl.s.dimm0_wmask =
4164                                    simple_strtoul(s, NULL, 0);
4165                        }
4166
4167                        s = lookup_env(priv, "ddr_dimm1_wmask");
4168                        if (s) {
4169                                dimm_ctl.s.dimm1_wmask =
4170                                    simple_strtoul(s, NULL, 0);
4171                        }
4172
4173                        s = lookup_env(priv, "ddr_dimm_ctl_parity");
4174                        if (s)
4175                                dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4176
4177                        s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4178                        if (s)
4179                                dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4180
4181                        debug("LMC DIMM_CTL                                  : 0x%016llx\n",
4182                              dimm_ctl.u64);
4183                        lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4184
4185                        /* Init RCW */
4186                        oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4187                }
4188
4189        } else {
4190                /* Disable register control writes for unbuffered */
4191                union cvmx_lmcx_dimm_ctl dimm_ctl;
4192
4193                dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4194                dimm_ctl.s.dimm0_wmask = 0;
4195                dimm_ctl.s.dimm1_wmask = 0;
4196                lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4197        }
4198}
4199
4200static int lmc_rank_init(struct ddr_priv *priv)
4201{
4202        char *s;
4203
4204        if (enable_by_rank_init) {
4205                by_rank = 3;
4206                saved_rank_mask = rank_mask;
4207        }
4208
4209start_by_rank_init:
4210
4211        if (enable_by_rank_init) {
4212                rank_mask = (1 << by_rank);
4213                if (!(rank_mask & saved_rank_mask))
4214                        goto end_by_rank_init;
4215                if (by_rank == 0)
4216                        rank_mask = saved_rank_mask;
4217
4218                debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n",
4219                      by_rank, rank_mask);
4220        }
4221
4222        /*
4223         * Comments (steps 3 through 5) continue in oct3_ddr3_seq()
4224         */
4225        union cvmx_lmcx_modereg_params0 mp0;
4226
4227        if (ddr_memory_preserved(priv)) {
4228                /*
4229                 * Contents are being preserved. Take DRAM out of self-refresh
4230                 * first. Then init steps can procede normally
4231                 */
4232                /* self-refresh exit */
4233                oct3_ddr3_seq(priv, rank_mask, if_num, 3);
4234        }
4235
4236        mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
4237        mp0.s.dllr = 1;         /* Set during first init sequence */
4238        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4239
4240        ddr_init_seq(priv, rank_mask, if_num);
4241
4242        mp0.s.dllr = 0;         /* Clear for normal operation */
4243        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4244
4245        if (spd_rdimm && ddr_type == DDR4_DRAM &&
4246            octeon_is_cpuid(OCTEON_CN7XXX)) {
4247                debug("Running init sequence 1\n");
4248                change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count);
4249        }
4250
4251        memset(lanes, 0, sizeof(lanes));
4252        for (lane = 0; lane < last_lane; lane++) {
4253                // init all lanes to reset value
4254                dac_settings[lane] = 127;
4255        }
4256
4257        // FIXME: disable internal VREF if deskew is disabled?
4258        if (disable_deskew_training) {
4259                debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n",
4260                      node, if_num);
4261                num_samples = 0;
4262        } else if (ddr_type == DDR4_DRAM &&
4263                   !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
4264                num_samples = DEFAULT_DAC_SAMPLES;
4265        } else {
4266                // if DDR3 or no ability to write DAC values
4267                num_samples = 1;
4268        }
4269
4270perform_internal_vref_training:
4271
4272        total_dac_eval_retries = 0;
4273        dac_eval_exhausted = 0;
4274
4275        for (sample = 0; sample < num_samples; sample++) {
4276                dac_eval_retries = 0;
4277
4278                // make offset and internal vref training repeatable
4279                do {
4280                        /*
4281                         * 6.9.8 LMC Offset Training
4282                         * LMC requires input-receiver offset training.
4283                         */
4284                        perform_offset_training(priv, rank_mask, if_num);
4285
4286                        /*
4287                         * 6.9.9 LMC Internal vref Training
4288                         * LMC requires input-reference-voltage training.
4289                         */
4290                        perform_internal_vref_training(priv, rank_mask, if_num);
4291
4292                        // read and maybe display the DAC values for a sample
4293                        read_dac_dbi_settings(priv, if_num, /*DAC*/ 1,
4294                                              dac_settings);
4295                        if (num_samples == 1 || ddr_verbose(priv)) {
4296                                display_dac_dbi_settings(if_num, /*DAC*/ 1,
4297                                                         use_ecc, dac_settings,
4298                                                         "Internal VREF");
4299                        }
4300
4301                        // for DDR4, evaluate the DAC settings and retry
4302                        // if any issues
4303                        if (ddr_type == DDR4_DRAM) {
4304                                if (evaluate_dac_settings
4305                                    (if_64b, use_ecc, dac_settings)) {
4306                                        dac_eval_retries += 1;
4307                                        if (dac_eval_retries >
4308                                            DAC_RETRIES_LIMIT) {
4309                                                debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
4310                                                      node, if_num);
4311                                                dac_eval_exhausted += 1;
4312                                        } else {
4313                                                debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
4314                                                      node, if_num);
4315                                                total_dac_eval_retries += 1;
4316                                                // try another sample
4317                                                continue;
4318                                        }
4319                                }
4320
4321                                // taking multiple samples, otherwise do nothing
4322                                if (num_samples > 1) {
4323                                        // good sample or exhausted retries,
4324                                        // record it
4325                                        for (lane = 0; lane < last_lane;
4326                                             lane++) {
4327                                                lanes[lane].bytes[sample] =
4328                                                    dac_settings[lane];
4329                                        }
4330                                }
4331                        }
4332                        // done if DDR3, or good sample, or exhausted retries
4333                        break;
4334                } while (1);
4335        }
4336
4337        if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) {
4338                debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n",
4339                      node, if_num, total_dac_eval_retries, dac_eval_exhausted);
4340        }
4341
4342        if (num_samples > 1) {
4343                debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
4344                      node, if_num);
4345
4346                for (lane = 0; lane < last_lane; lane++) {
4347                        dac_settings[lane] =
4348                            process_samples_average(&lanes[lane].bytes[0],
4349                                                    num_samples, if_num, lane);
4350                }
4351                display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4352                                         dac_settings, "Averaged VREF");
4353
4354                // finally, write the final DAC values
4355                for (lane = 0; lane < last_lane; lane++) {
4356                        load_dac_override(priv, if_num, dac_settings[lane],
4357                                          lane);
4358                }
4359        }
4360
4361        // allow override of any byte-lane internal VREF
4362        int overrode_vref_dac = 0;
4363
4364        for (lane = 0; lane < last_lane; lane++) {
4365                s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane);
4366                if (s) {
4367                        dac_settings[lane] = simple_strtoul(s, NULL, 0);
4368                        overrode_vref_dac = 1;
4369                        // finally, write the new DAC value
4370                        load_dac_override(priv, if_num, dac_settings[lane],
4371                                          lane);
4372                }
4373        }
4374        if (overrode_vref_dac) {
4375                display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4376                                         dac_settings, "Override VREF");
4377        }
4378
4379        // as a second step, after internal VREF training, before starting
4380        // deskew training:
4381        // for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting
4382        // to 127
4383        if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
4384            !disable_deskew_training) {
4385                load_dac_override(priv, if_num, 127, /* all */ 0x0A);
4386                debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n",
4387                      node, if_num);
4388        }
4389
4390        /*
4391         * 4.8.8 LMC Deskew Training
4392         *
4393         * LMC requires input-read-data deskew training.
4394         */
4395        if (!disable_deskew_training) {
4396                deskew_training_errors =
4397                    perform_deskew_training(priv, rank_mask, if_num,
4398                                            spd_rawcard_aorb);
4399
4400                // All the Deskew lock and saturation retries (may) have
4401                // been done, but we ended up with nibble errors; so,
4402                // as a last ditch effort, try the Internal vref
4403                // Training again...
4404                if (deskew_training_errors) {
4405                        if (internal_retries <
4406                            DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
4407                                internal_retries++;
4408                                debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n",
4409                                      node, if_num, internal_retries);
4410                                goto perform_internal_vref_training;
4411                        } else {
4412                                if (restart_if_dsk_incomplete) {
4413                                        debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n",
4414                                              node, if_num, internal_retries);
4415                                        return -EAGAIN;
4416                                }
4417                                debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
4418                                      node, if_num, internal_retries);
4419                        }
4420                }               /* if (deskew_training_errors) */
4421
4422                // FIXME: treat this as the final DSK print from now on,
4423                // and print if VBL_NORM or above also, save the results
4424                // of the original training in case we want them later
4425                validate_deskew_training(priv, rank_mask, if_num,
4426                                         &deskew_training_results, 1);
4427        } else {                /* if (! disable_deskew_training) */
4428                debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n",
4429                      node, if_num);
4430                validate_deskew_training(priv, rank_mask, if_num,
4431                                         &deskew_training_results, 1);
4432        }                       /* if (! disable_deskew_training) */
4433
4434        if (enable_by_rank_init) {
4435                read_dac_dbi_settings(priv, if_num, /*dac */ 1,
4436                                      &rank_dac[by_rank].bytes[0]);
4437                get_deskew_settings(priv, if_num, &rank_dsk[by_rank]);
4438                debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4439        }
4440
4441end_by_rank_init:
4442
4443        if (enable_by_rank_init) {
4444                //debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4445
4446                by_rank--;
4447                if (by_rank >= 0)
4448                        goto start_by_rank_init;
4449
4450                rank_mask = saved_rank_mask;
4451                ddr_init_seq(priv, rank_mask, if_num);
4452
4453                process_by_rank_dac(priv, if_num, rank_mask, rank_dac);
4454                process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk);
4455
4456                // FIXME: set this to prevent later checking!!!
4457                disable_deskew_training = 1;
4458
4459                debug("\n>>>>> BY_RANK: FINISHED!!\n\n");
4460        }
4461
4462        return 0;
4463}
4464
4465static void lmc_config_2(struct ddr_priv *priv)
4466{
4467        union cvmx_lmcx_config lmc_config;
4468        int save_ref_zqcs_int;
4469        u64 temp_delay_usecs;
4470
4471        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4472
4473        /*
4474         * Temporarily select the minimum ZQCS interval and wait
4475         * long enough for a few ZQCS calibrations to occur.  This
4476         * should ensure that the calibration circuitry is
4477         * stabilized before read/write leveling occurs.
4478         */
4479        if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4480                save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
4481                /* set smallest interval */
4482                lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7);
4483        } else {
4484                save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int;
4485                /* set smallest interval */
4486                lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7);
4487        }
4488        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4489        lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4490
4491        /*
4492         * Compute an appropriate delay based on the current ZQCS
4493         * interval. The delay should be long enough for the
4494         * current ZQCS delay counter to expire plus ten of the
4495         * minimum intarvals to ensure that some calibrations
4496         * occur.
4497         */
4498        temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs *
4499                            100 * 512 * 128) / (10000 * 10000) + 10 *
4500                ((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000);
4501
4502        debug("Waiting %lld usecs for ZQCS calibrations to start\n",
4503              temp_delay_usecs);
4504        udelay(temp_delay_usecs);
4505
4506        if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4507                /* Restore computed interval */
4508                lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
4509        } else {
4510                /* Restore computed interval */
4511                lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int;
4512        }
4513
4514        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4515        lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4516}
4517
4518static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data");
4519static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data");
4520static union cvmx_lmcx_modereg_params1 mp1 __section(".data");
4521
4522static int wl_mask[9] __section(".data");
4523static int byte_idx __section(".data");
4524static int ecc_ena __section(".data");
4525static int wl_roundup __section(".data");
4526static int save_mode32b __section(".data");
4527static int disable_hwl_validity __section(".data");
4528static int default_wl_rtt_nom __section(".data");
4529static int wl_pbm_pump __section(".data");
4530
4531static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx)
4532{
4533        int wloop = 0;
4534        // retries per sample for HW-related issues with bitmasks or values
4535        int wloop_retries = 0;
4536        int wloop_retries_total = 0;
4537        int wloop_retries_exhausted = 0;
4538#define WLOOP_RETRIES_DEFAULT 5
4539        int wl_val_err;
4540        int wl_mask_err_rank = 0;
4541        int wl_val_err_rank = 0;
4542        // array to collect counts of byte-lane values
4543        // assume low-order 3 bits and even, so really only 2-bit values
4544        struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9];
4545        int extra_bumps, extra_mask;
4546        int rank_nom = 0;
4547
4548        if (!(rank_mask & (1 << rankx)))
4549                return;
4550
4551        if (match_wl_rtt_nom) {
4552                if (rankx == 0)
4553                        rank_nom = mp1.s.rtt_nom_00;
4554                if (rankx == 1)
4555                        rank_nom = mp1.s.rtt_nom_01;
4556                if (rankx == 2)
4557                        rank_nom = mp1.s.rtt_nom_10;
4558                if (rankx == 3)
4559                        rank_nom = mp1.s.rtt_nom_11;
4560
4561                debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n",
4562                      node, if_num, rankx, rank_nom,
4563                      imp_val->rtt_nom_ohms[rank_nom]);
4564        }
4565
4566        memset(wl_bytes, 0, sizeof(wl_bytes));
4567        memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra));
4568
4569        // restructure the looping so we can keep trying until we get the
4570        // samples we want
4571        while (wloop < wl_loops) {
4572                wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
4573
4574                wl_ctl.cn78xx.rtt_nom =
4575                    (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7;
4576
4577                if (match_wl_rtt_nom) {
4578                        wl_ctl.cn78xx.rtt_nom =
4579                            (rank_nom > 0) ? (rank_nom - 1) : 7;
4580                }
4581
4582                /* Clear write-level delays */
4583                lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0);
4584
4585                wl_mask_err = 0;        /* Reset error counters */
4586                wl_val_err = 0;
4587
4588                for (byte_idx = 0; byte_idx < 9; ++byte_idx)
4589                        wl_mask[byte_idx] = 0;  /* Reset bitmasks */
4590
4591                // do all the byte-lanes at the same time
4592                wl_ctl.cn78xx.lanemask = 0x1ff;
4593
4594                lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
4595
4596                /*
4597                 * Read and write values back in order to update the
4598                 * status field. This insures that we read the updated
4599                 * values after write-leveling has completed.
4600                 */
4601                lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4602                       lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)));
4603
4604                /* write-leveling */
4605                oct3_ddr3_seq(priv, 1 << rankx, if_num, 6);
4606
4607                do {
4608                        wl_rank.u64 = lmc_rd(priv,
4609                                             CVMX_LMCX_WLEVEL_RANKX(rankx,
4610                                                                    if_num));
4611                } while (wl_rank.cn78xx.status != 3);
4612
4613                wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
4614                                                                  if_num));
4615
4616                for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4617                        wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv,
4618                                                                 if_num,
4619                                                                 byte_idx);
4620                        if (wl_mask[byte_idx] == 0)
4621                                ++wl_mask_err;
4622                }
4623
4624                // check validity only if no bitmask errors
4625                if (wl_mask_err == 0) {
4626                        if ((spd_dimm_type == 1 || spd_dimm_type == 2) &&
4627                            dram_width != 16 && if_64b &&
4628                            !disable_hwl_validity) {
4629                                // bypass if [mini|SO]-[RU]DIMM or x16 or
4630                                // 32-bit
4631                                wl_val_err =
4632                                    validate_hw_wl_settings(if_num,
4633                                                            &wl_rank,
4634                                                            spd_rdimm, ecc_ena);
4635                                wl_val_err_rank += (wl_val_err != 0);
4636                        }
4637                } else {
4638                        wl_mask_err_rank++;
4639                }
4640
4641                // before we print, if we had bitmask or validity errors,
4642                // do a retry...
4643                if (wl_mask_err != 0 || wl_val_err != 0) {
4644                        if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
4645                                wloop_retries++;
4646                                wloop_retries_total++;
4647                                // this printout is per-retry: only when VBL
4648                                // is high enough (DEV?)
4649                                // FIXME: do we want to show the bad bitmaps
4650                                // or delays here also?
4651                                debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
4652                                      node, if_num, rankx,
4653                                      (wl_mask_err) ? "Bitmask" : "Validity");
4654                                // this takes us back to the top without
4655                                // counting a sample
4656                                return;
4657                        }
4658
4659                        // retries exhausted, do not print at normal VBL
4660                        debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
4661                              node, if_num, rankx,
4662                              (wl_mask_err) ? "Bitmask" : "Validity");
4663                        wloop_retries_exhausted++;
4664                }
4665                // no errors or exhausted retries, use this sample
4666                wloop_retries = 0;      //reset for next sample
4667
4668                // when only 1 sample or forced, print the bitmasks then
4669                // current HW WL
4670                if (wl_loops == 1 || wl_print) {
4671                        if (wl_print > 1)
4672                                display_wl_bm(if_num, rankx, wl_mask);
4673                        display_wl(if_num, wl_rank, rankx);
4674                }
4675
4676                if (wl_roundup) {       /* Round up odd bitmask delays */
4677                        for (byte_idx = 0; byte_idx < (8 + ecc_ena);
4678                             ++byte_idx) {
4679                                if (!(if_bytemask & (1 << byte_idx)))
4680                                        return;
4681                                upd_wl_rank(&wl_rank, byte_idx,
4682                                            roundup_ddr3_wlevel_bitmask
4683                                            (wl_mask[byte_idx]));
4684                        }
4685                        lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4686                               wl_rank.u64);
4687                        display_wl(if_num, wl_rank, rankx);
4688                }
4689
4690                // OK, we have a decent sample, no bitmask or validity errors
4691                extra_bumps = 0;
4692                extra_mask = 0;
4693                for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4694                        int ix;
4695
4696                        if (!(if_bytemask & (1 << byte_idx)))
4697                                return;
4698
4699                        // increment count of byte-lane value
4700                        // only 4 values
4701                        ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3;
4702                        wl_bytes[byte_idx].bitcnt[ix]++;
4703                        wl_bytes_extra[byte_idx].bitcnt[ix]++;
4704                        // if perfect...
4705                        if (__builtin_popcount(wl_mask[byte_idx]) == 4) {
4706                                wl_bytes_extra[byte_idx].bitcnt[ix] +=
4707                                    wl_pbm_pump;
4708                                extra_bumps++;
4709                                extra_mask |= 1 << byte_idx;
4710                        }
4711                }
4712
4713                if (extra_bumps) {
4714                        if (wl_print > 1) {
4715                                debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n",
4716                                      node, if_num, rankx, extra_bumps,
4717                                      extra_mask);
4718                        }
4719                }
4720
4721                // if we get here, we have taken a decent sample
4722                wloop++;
4723
4724        }                       /* while (wloop < wl_loops) */
4725
4726        // if we did sample more than once, try to pick a majority vote
4727        if (wl_loops > 1) {
4728                // look for the majority in each byte-lane
4729                for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4730                        int mx, mc, xc, cc;
4731                        int ix, alts;
4732                        int maj, xmaj, xmx, xmc, xxc, xcc;
4733
4734                        if (!(if_bytemask & (1 << byte_idx)))
4735                                return;
4736                        maj = find_wl_majority(&wl_bytes[byte_idx], &mx,
4737                                               &mc, &xc, &cc);
4738                        xmaj = find_wl_majority(&wl_bytes_extra[byte_idx],
4739                                                &xmx, &xmc, &xxc, &xcc);
4740                        if (maj != xmaj) {
4741                                if (wl_print) {
4742                                        debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n",
4743                                              node, if_num, rankx,
4744                                              byte_idx, maj, xc, xmaj, xxc);
4745                                }
4746                                mx = xmx;
4747                                mc = xmc;
4748                                xc = xxc;
4749                                cc = xcc;
4750                        }
4751
4752                        // see if there was an alternate
4753                        // take out the majority choice
4754                        alts = (mc & ~(1 << mx));
4755                        if (alts != 0) {
4756                                for (ix = 0; ix < 4; ix++) {
4757                                        // FIXME: could be done multiple times?
4758                                        // bad if so
4759                                        if (alts & (1 << ix)) {
4760                                                // set the mask
4761                                                hwl_alts[rankx].hwl_alt_mask |=
4762                                                        (1 << byte_idx);
4763                                                // record the value
4764                                                hwl_alts[rankx].hwl_alt_delay[byte_idx] =
4765                                                        ix << 1;
4766                                                if (wl_print > 1) {
4767                                                        debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
4768                                                              node,
4769                                                              if_num,
4770                                                              rankx,
4771                                                              byte_idx,
4772                                                              mx << 1,
4773                                                              xc,
4774                                                              ix << 1,
4775                                                              wl_bytes
4776                                                              [byte_idx].bitcnt
4777                                                              [ix]);
4778                                                }
4779                                        }
4780                                }
4781                        }
4782
4783                        if (cc > 2) {   // unlikely, but...
4784                                // assume: counts for 3 indices are all 1
4785                                // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
4786                                // and the desired?:   2  ,   4  ,     6, 0
4787                                // we choose the middle, assuming one of the
4788                                // outliers is bad
4789                                // NOTE: this is an ugly hack at the moment;
4790                                // there must be a better way
4791                                switch (mc) {
4792                                case 0x7:
4793                                        mx = 1;
4794                                        break;  // was 0/2/4, choose 2
4795                                case 0xb:
4796                                        mx = 0;
4797                                        break;  // was 0/2/6, choose 0
4798                                case 0xd:
4799                                        mx = 3;
4800                                        break;  // was 0/4/6, choose 6
4801                                case 0xe:
4802                                        mx = 2;
4803                                        break;  // was 2/4/6, choose 4
4804                                default:
4805                                case 0xf:
4806                                        mx = 1;
4807                                        break;  // was 0/2/4/6, choose 2?
4808                                }
4809                                printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
4810                                       node, if_num, rankx, byte_idx, mc,
4811                                       mx << 1);
4812                        }
4813                        upd_wl_rank(&wl_rank, byte_idx, mx << 1);
4814                }
4815
4816                lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4817                       wl_rank.u64);
4818                display_wl_with_final(if_num, wl_rank, rankx);
4819
4820                // FIXME: does this help make the output a little easier
4821                // to focus?
4822                if (wl_print > 0)
4823                        debug("-----------\n");
4824
4825        }                       /* if (wl_loops > 1) */
4826
4827        // maybe print an error summary for the rank
4828        if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) {
4829                debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
4830                      node, if_num, rankx, wl_mask_err_rank,
4831                      wl_val_err_rank, wloop_retries_total,
4832                      wloop_retries_exhausted);
4833        }
4834}
4835
4836static void lmc_write_leveling(struct ddr_priv *priv)
4837{
4838        union cvmx_lmcx_config cfg;
4839        int rankx;
4840        char *s;
4841
4842        /*
4843         * 4.8.9 LMC Write Leveling
4844         *
4845         * LMC supports an automatic write leveling like that described in the
4846         * JEDEC DDR3 specifications separately per byte-lane.
4847         *
4848         * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations
4849         * must be completed prior to starting this LMC write-leveling sequence.
4850         *
4851         * There are many possible procedures that will write-level all the
4852         * attached DDR3 DRAM parts. One possibility is for software to simply
4853         * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
4854         * describes one possible sequence that uses LMC's autowrite-leveling
4855         * capabilities.
4856         *
4857         * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
4858         *    delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
4859         *    point.
4860         *
4861         * Do the remaining steps 2-7 separately for each rank i with attached
4862         * DRAM.
4863         *
4864         * 2. Write LMC(0)_WLEVEL_RANKi = 0.
4865         *
4866         * 3. For x8 parts:
4867         *
4868         *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4869         *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
4870         *    DRAM.
4871         *
4872         *    For x16 parts:
4873         *
4874         *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4875         *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
4876         *    attached DRAM.
4877         *
4878         * 4. Without changing any other fields in LMC(0)_CONFIG,
4879         *
4880         *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
4881         *
4882         *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
4883         *
4884         *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
4885         *
4886         *    LMC will initiate write-leveling at this point. Assuming
4887         *    LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
4888         *    the selected DRAM rank via a DDR3 MR1 write, then sequences
4889         *    through
4890         *    and accumulates write-leveling results for eight different delay
4891         *    settings twice, starting at a delay of zero in this case since
4892         *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
4893         *    setting, covering a total distance of one CK, then disables the
4894         *    write-leveling via another DDR3 MR1 write.
4895         *
4896         *    After the sequence through 16 delay settings is complete:
4897         *
4898         *    o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
4899         *
4900         *    o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
4901         *      by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
4902         *      leveling result of 1 that followed result of 0 during the
4903         *      sequence, except that the LMC always writes
4904         *      LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
4905         *
4906         *    o Software can read the eight write-leveling results from the
4907         *      first pass through the delay settings by reading
4908         *      LMC(0)_WLEVEL_DBG[BITMASK] (after writing
4909         *      LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
4910         *      results from the second pass through the eight delay
4911         *      settings. They should often be identical to the
4912         *      LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
4913         *
4914         * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
4915         *
4916         *    LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
4917         *    lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
4918         *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
4919         *    software wrote in substep 2 above, which is 0.
4920         *
4921         * 6. For x16 parts:
4922         *
4923         *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4924         *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
4925         *    attached DRAM.
4926         *
4927         *    Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
4928         *    setting. Skip to substep 7 if this has already been done.
4929         *
4930         *    For x8 parts:
4931         *
4932         *    Skip this substep. Go to substep 7.
4933         *
4934         * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
4935         *    lanes on all ranks with attached DRAM.
4936         *
4937         *    At this point, all byte lanes on rank i with attached DRAM should
4938         *    have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
4939         *    the result for each byte lane.
4940         *
4941         *    But note that the DDR3 write-leveling sequence will only determine
4942         *    the delay modulo the CK cycle time, and cannot determine how many
4943         *    additional CK cycles of delay are present. Software must calculate
4944         *    the number of CK cycles, or equivalently, the
4945         *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
4946         *
4947         *    This BYTE*<4:3> calculation is system/board specific.
4948         *
4949         * Many techniques can be used to calculate write-leveling BYTE*<4:3>
4950         * values, including:
4951         *
4952         *    o Known values for some byte lanes.
4953         *
4954         *    o Relative values for some byte lanes relative to others.
4955         *
4956         *    For example, suppose lane X is likely to require a larger
4957         *    write-leveling delay than lane Y. A BYTEX<2:0> value that is much
4958         *    smaller than the BYTEY<2:0> value may then indicate that the
4959         *    required lane X delay wrapped into the next CK, so BYTEX<4:3>
4960         *    should be set to BYTEY<4:3>+1.
4961         *
4962         *    When ECC DRAM is not present (i.e. when DRAM is not attached to
4963         *    the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the
4964         *    DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write
4965         *    LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0],
4966         *    using the final calculated BYTE0 value.
4967         *    Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
4968         *    using the final calculated BYTE0 value.
4969         *
4970         * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
4971         *
4972         *    Let rank i be a rank with attached DRAM.
4973         *
4974         *    For all ranks j that do not have attached DRAM, set
4975         *    LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
4976         */
4977
4978        rankx = 0;
4979        wl_roundup = 0;
4980        disable_hwl_validity = 0;
4981
4982        // wl_pbm_pump: weight for write-leveling PBMs...
4983        // 0 causes original behavior
4984        // 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms
4985        // 4 would allow a minority of 1 pbm to outscore a majority of 4
4986        // non-pbms
4987        wl_pbm_pump = 4;        // FIXME: is 4 too much?
4988
4989        if (wl_loops) {
4990                debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node,
4991                      if_num);
4992        } else {
4993                /* Force software write-leveling to run */
4994                wl_mask_err = 1;
4995                debug("N%d.LMC%d: Forcing software Write-Leveling\n", node,
4996                      if_num);
4997        }
4998
4999        default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ?
5000                rttnom_20ohm : ddr4_rttnom_40ohm;
5001
5002        cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5003        ecc_ena = cfg.s.ecc_ena;
5004        save_mode32b = cfg.cn78xx.mode32b;
5005        cfg.cn78xx.mode32b = (!if_64b);
5006        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5007        debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5008
5009        s = lookup_env(priv, "ddr_wlevel_roundup");
5010        if (s)
5011                wl_roundup = simple_strtoul(s, NULL, 0);
5012
5013        s = lookup_env(priv, "ddr_wlevel_printall");
5014        if (s)
5015                wl_print = strtoul(s, NULL, 0);
5016
5017        s = lookup_env(priv, "ddr_wlevel_pbm_bump");
5018        if (s)
5019                wl_pbm_pump = strtoul(s, NULL, 0);
5020
5021        // default to disable when RL sequential delay check is disabled
5022        disable_hwl_validity = disable_sequential_delay_check;
5023        s = lookup_env(priv, "ddr_disable_hwl_validity");
5024        if (s)
5025                disable_hwl_validity = !!strtoul(s, NULL, 0);
5026
5027        s = lookup_env(priv, "ddr_wl_rtt_nom");
5028        if (s)
5029                default_wl_rtt_nom = simple_strtoul(s, NULL, 0);
5030
5031        s = lookup_env(priv, "ddr_match_wl_rtt_nom");
5032        if (s)
5033                match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0);
5034
5035        if (match_wl_rtt_nom)
5036                mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
5037
5038        // For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK
5039        // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
5040        if (ddr_type == DDR4_DRAM) {
5041                int default_or_dis = 1;
5042                int default_bitmask = 0xff;
5043
5044                // when x4, use only the lower nibble
5045                if (dram_width == 4) {
5046                        default_bitmask = 0x0f;
5047                        if (wl_print) {
5048                                debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n",
5049                                      node, if_num, default_bitmask);
5050                        }
5051                }
5052
5053                wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
5054                wl_ctl.s.or_dis = default_or_dis;
5055                wl_ctl.s.bitmask = default_bitmask;
5056
5057                // allow overrides
5058                s = lookup_env(priv, "ddr_wlevel_ctl_or_dis");
5059                if (s)
5060                        wl_ctl.s.or_dis = !!strtoul(s, NULL, 0);
5061
5062                s = lookup_env(priv, "ddr_wlevel_ctl_bitmask");
5063                if (s)
5064                        wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0);
5065
5066                // print only if not defaults
5067                if (wl_ctl.s.or_dis != default_or_dis ||
5068                    wl_ctl.s.bitmask != default_bitmask) {
5069                        debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
5070                              node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask);
5071                }
5072
5073                // always write
5074                lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
5075        }
5076
5077        // Start the hardware write-leveling loop per rank
5078        for (rankx = 0; rankx < dimm_count * 4; rankx++)
5079                lmc_write_leveling_loop(priv, rankx);
5080
5081        cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5082        cfg.cn78xx.mode32b = save_mode32b;
5083        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5084        debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5085
5086        // At the end of HW Write Leveling, check on some DESKEW things...
5087        if (!disable_deskew_training) {
5088                struct deskew_counts dsk_counts;
5089                int retry_count = 0;
5090
5091                debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n",
5092                      node, if_num);
5093
5094                do {
5095                        validate_deskew_training(priv, rank_mask, if_num,
5096                                                 &dsk_counts, 1);
5097
5098                        // only RAWCARD A or B will not benefit from
5099                        // retraining if there's only saturation
5100                        // or any rawcard if there is a nibble error
5101                        if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) ||
5102                            (dsk_counts.nibrng_errs != 0 ||
5103                             dsk_counts.nibunl_errs != 0)) {
5104                                retry_count++;
5105                                debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
5106                                      node, if_num, retry_count);
5107                                perform_deskew_training(priv, rank_mask, if_num,
5108                                                        spd_rawcard_aorb);
5109                        } else {
5110                                break;
5111                        }
5112                } while (retry_count < 5);
5113        }
5114}
5115
5116static void lmc_workaround(struct ddr_priv *priv)
5117{
5118        /* Workaround Trcd overflow by using Additive latency. */
5119        if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
5120                union cvmx_lmcx_modereg_params0 mp0;
5121                union cvmx_lmcx_timing_params1 tp1;
5122                union cvmx_lmcx_control ctrl;
5123                int rankx;
5124
5125                tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
5126                mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
5127                ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
5128
5129                if (tp1.cn78xx.trcd == 0) {
5130                        debug("Workaround Trcd overflow by using Additive latency.\n");
5131                        /* Hard code this to 12 and enable additive latency */
5132                        tp1.cn78xx.trcd = 12;
5133                        mp0.s.al = 2;   /* CL-2 */
5134                        ctrl.s.pocas = 1;
5135
5136                        debug("MODEREG_PARAMS0                               : 0x%016llx\n",
5137                              mp0.u64);
5138                        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
5139                               mp0.u64);
5140                        debug("TIMING_PARAMS1                                : 0x%016llx\n",
5141                              tp1.u64);
5142                        lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
5143
5144                        debug("LMC_CONTROL                                   : 0x%016llx\n",
5145                              ctrl.u64);
5146                        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
5147
5148                        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5149                                if (!(rank_mask & (1 << rankx)))
5150                                        continue;
5151
5152                                /* MR1 */
5153                                ddr4_mrw(priv, if_num, rankx, -1, 1, 0);
5154                        }
5155                }
5156        }
5157
5158        // this is here just for output, to allow check of the Deskew
5159        // settings one last time...
5160        if (!disable_deskew_training) {
5161                struct deskew_counts dsk_counts;
5162
5163                debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
5164                      node, if_num);
5165                validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
5166                                         3);
5167        }
5168
5169        /*
5170         * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x)
5171         *
5172         * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
5173         * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
5174         * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
5175         */
5176        if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
5177            octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) {
5178                union cvmx_lmcx_dll_ctl3 dll_ctl3;
5179                union cvmx_lmcx_phy_ctl2 phy_ctl2;
5180                union cvmx_lmcx_ext_config ext_cfg;
5181                int increased_dsk_adj = 0;
5182                int byte;
5183
5184                phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num));
5185                ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
5186                dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
5187
5188                for (byte = 0; byte < 8; ++byte) {
5189                        if (!(if_bytemask & (1 << byte)))
5190                                continue;
5191                        increased_dsk_adj |=
5192                            (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4);
5193                }
5194
5195                if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) {
5196                        ext_cfg.s.drive_ena_bprch = 1;
5197                        lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
5198                        debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n",
5199                              if_num);
5200                }
5201        }
5202}
5203
5204// Software Write-Leveling block
5205
5206#define VREF_RANGE1_LIMIT 0x33  // range1 is valid for 0x00 - 0x32
5207#define VREF_RANGE2_LIMIT 0x18  // range2 is valid for 0x00 - 0x17
5208// full window is valid for 0x00 to 0x4A
5209// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
5210#define VREF_LIMIT        (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
5211#define VREF_FINAL        (VREF_LIMIT - 1)
5212
5213enum sw_wl_status {
5214        WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */
5215        WL_HARDWARE = 1,        /* H/W wleveling succeeded */
5216        WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */
5217        WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */
5218};
5219
5220static u64 rank_addr __section(".data");
5221static int vref_val __section(".data");
5222static int final_vref_val __section(".data");
5223static int final_vref_range __section(".data");
5224static int start_vref_val __section(".data");
5225static int computed_final_vref_val __section(".data");
5226static char best_vref_val_count __section(".data");
5227static char vref_val_count __section(".data");
5228static char best_vref_val_start __section(".data");
5229static char vref_val_start __section(".data");
5230static int bytes_failed __section(".data");
5231static enum sw_wl_status byte_test_status[9] __section(".data");
5232static enum sw_wl_status sw_wl_rank_status __section(".data");
5233static int sw_wl_failed __section(".data");
5234static int sw_wl_hw __section(".data");
5235static int measured_vref_flag __section(".data");
5236
5237static void ddr4_vref_loop(struct ddr_priv *priv, int rankx)
5238{
5239        char *s;
5240
5241        if (vref_val < VREF_FINAL) {
5242                int vrange, vvalue;
5243
5244                if (vref_val < VREF_RANGE2_LIMIT) {
5245                        vrange = 1;
5246                        vvalue = vref_val;
5247                } else {
5248                        vrange = 0;
5249                        vvalue = vref_val - VREF_RANGE2_LIMIT;
5250                }
5251
5252                set_vref(priv, if_num, rankx, vrange, vvalue);
5253        } else {                /* if (vref_val < VREF_FINAL) */
5254                /* Print the final vref value first. */
5255
5256                /* Always print the computed first if its valid */
5257                if (computed_final_vref_val >= 0) {
5258                        debug("N%d.LMC%d.R%d: vref Computed Summary                 :              %2d (0x%02x)\n",
5259                              node, if_num, rankx,
5260                              computed_final_vref_val, computed_final_vref_val);
5261                }
5262
5263                if (!measured_vref_flag) {      // setup to use the computed
5264                        best_vref_val_count = 1;
5265                        final_vref_val = computed_final_vref_val;
5266                } else {        // setup to use the measured
5267                        if (best_vref_val_count > 0) {
5268                                best_vref_val_count =
5269                                    max(best_vref_val_count, (char)2);
5270                                final_vref_val = best_vref_val_start +
5271                                        divide_nint(best_vref_val_count - 1, 2);
5272
5273                                if (final_vref_val < VREF_RANGE2_LIMIT) {
5274                                        final_vref_range = 1;
5275                                } else {
5276                                        final_vref_range = 0;
5277                                        final_vref_val -= VREF_RANGE2_LIMIT;
5278                                }
5279
5280                                int vvlo = best_vref_val_start;
5281                                int vrlo;
5282                                int vvhi = best_vref_val_start +
5283                                        best_vref_val_count - 1;
5284                                int vrhi;
5285
5286                                if (vvlo < VREF_RANGE2_LIMIT) {
5287                                        vrlo = 2;
5288                                } else {
5289                                        vrlo = 1;
5290                                        vvlo -= VREF_RANGE2_LIMIT;
5291                                }
5292
5293                                if (vvhi < VREF_RANGE2_LIMIT) {
5294                                        vrhi = 2;
5295                                } else {
5296                                        vrhi = 1;
5297                                        vvhi -= VREF_RANGE2_LIMIT;
5298                                }
5299                                debug("N%d.LMC%d.R%d: vref Training Summary                 :  0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
5300                                      node, if_num, rankx, vvlo, vrlo,
5301                                      final_vref_val,
5302                                      final_vref_range + 1, vvhi, vrhi,
5303                                      best_vref_val_count - 1);
5304
5305                        } else {
5306                                /*
5307                                 * If nothing passed use the default vref
5308                                 * value for this rank
5309                                 */
5310                                union cvmx_lmcx_modereg_params2 mp2;
5311
5312                                mp2.u64 =
5313                                        lmc_rd(priv,
5314                                               CVMX_LMCX_MODEREG_PARAMS2(if_num));
5315                                final_vref_val = (mp2.u64 >>
5316                                                  (rankx * 10 + 3)) & 0x3f;
5317                                final_vref_range = (mp2.u64 >>
5318                                                    (rankx * 10 + 9)) & 0x01;
5319
5320                                debug("N%d.LMC%d.R%d: vref Using Default                    :    %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
5321                                      node, if_num, rankx, final_vref_val,
5322                                      final_vref_val, final_vref_val,
5323                                      final_vref_val, final_vref_range + 1);
5324                        }
5325                }
5326
5327                // allow override
5328                s = lookup_env(priv, "ddr%d_vref_val_%1d%1d",
5329                               if_num, !!(rankx & 2), !!(rankx & 1));
5330                if (s)
5331                        final_vref_val = strtoul(s, NULL, 0);
5332
5333                set_vref(priv, if_num, rankx, final_vref_range, final_vref_val);
5334        }
5335}
5336
5337#define WL_MIN_NO_ERRORS_COUNT 3        // FIXME? three passes without errors
5338
5339static int errors __section(".data");
5340static int byte_delay[9] __section(".data");
5341static u64 bytemask __section(".data");
5342static int bytes_todo __section(".data");
5343static int no_errors_count __section(".data");
5344static u64 bad_bits[2] __section(".data");
5345static u64 sum_dram_dclk __section(".data");
5346static u64 sum_dram_ops __section(".data");
5347static u64 start_dram_dclk __section(".data");
5348static u64 stop_dram_dclk __section(".data");
5349static u64 start_dram_ops __section(".data");
5350static u64 stop_dram_ops __section(".data");
5351
5352static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx)
5353{
5354        int delay;
5355        int b;
5356
5357        // write the current set of WL delays
5358        lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64);
5359        wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
5360
5361        // do the test
5362        if (sw_wl_hw) {
5363                errors = run_best_hw_patterns(priv, if_num, rank_addr,
5364                                              DBTRAIN_TEST, bad_bits);
5365                errors &= bytes_todo;   // keep only the ones we are still doing
5366        } else {
5367                start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5368                start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5369                errors = test_dram_byte64(priv, if_num, rank_addr, bytemask,
5370                                          bad_bits);
5371
5372                stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5373                stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5374                sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
5375                sum_dram_ops += stop_dram_ops - start_dram_ops;
5376        }
5377
5378        debug("WL pass1: test_dram_byte returned 0x%x\n", errors);
5379
5380        // remember, errors will not be returned for byte-lanes that have
5381        // maxxed out...
5382        if (errors == 0) {
5383                no_errors_count++;      // bump
5384                // bypass check/update completely
5385                if (no_errors_count > 1)
5386                        return; // to end of do-while
5387        } else {
5388                no_errors_count = 0;    // reset
5389        }
5390
5391        // check errors by byte
5392        for (b = 0; b < 9; ++b) {
5393                if (!(bytes_todo & (1 << b)))
5394                        continue;
5395
5396                delay = byte_delay[b];
5397                // yes, an error in this byte lane
5398                if (errors & (1 << b)) {
5399                        debug("        byte %d delay %2d Errors\n", b, delay);
5400                        // since this byte had an error, we move to the next
5401                        // delay value, unless done with it
5402                        delay += 8;     // incr by 8 to do delay high-order bits
5403                        if (delay < 32) {
5404                                upd_wl_rank(&wl_rank, b, delay);
5405                                debug("        byte %d delay %2d New\n",
5406                                      b, delay);
5407                                byte_delay[b] = delay;
5408                        } else {
5409                                // reached max delay, maybe really done with
5410                                // this byte
5411                                // consider an alt only for computed VREF and
5412                                if (!measured_vref_flag &&
5413                                    (hwl_alts[rankx].hwl_alt_mask & (1 << b))) {
5414                                        // if an alt exists...
5415                                        // just orig low-3 bits
5416                                        int bad_delay = delay & 0x6;
5417
5418                                        // yes, use it
5419                                        delay = hwl_alts[rankx].hwl_alt_delay[b];
5420                                        // clear that flag
5421                                        hwl_alts[rankx].hwl_alt_mask &=
5422                                                ~(1 << b);
5423                                        upd_wl_rank(&wl_rank, b, delay);
5424                                        byte_delay[b] = delay;
5425                                        debug("        byte %d delay %2d ALTERNATE\n",
5426                                              b, delay);
5427                                        debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
5428                                              node, if_num,
5429                                              rankx, b, bad_delay, delay);
5430
5431                                } else {
5432                                        unsigned int bits_bad;
5433
5434                                        if (b < 8) {
5435                                                // test no longer, remove from
5436                                                // byte mask
5437                                                bytemask &=
5438                                                        ~(0xffULL << (8 * b));
5439                                                bits_bad = (unsigned int)
5440                                                        ((bad_bits[0] >>
5441                                                          (8 * b)) & 0xffUL);
5442                                        } else {
5443                                                bits_bad = (unsigned int)
5444                                                    (bad_bits[1] & 0xffUL);
5445                                        }
5446
5447                                        // remove from bytes to do
5448                                        bytes_todo &= ~(1 << b);
5449                                        // make sure this is set for this case
5450                                        byte_test_status[b] = WL_ESTIMATED;
5451                                        debug("        byte %d delay %2d Exhausted\n",
5452                                              b, delay);
5453                                        if (!measured_vref_flag) {
5454                                                // this is too noisy when doing
5455                                                // measured VREF
5456                                                debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n",
5457                                                      node, if_num, rankx,
5458                                                      b, bits_bad, delay);
5459                                        }
5460                                }
5461                        }
5462                } else {
5463                        // no error, stay with current delay, but keep testing
5464                        // it...
5465                        debug("        byte %d delay %2d Passed\n", b, delay);
5466                        byte_test_status[b] = WL_HARDWARE;      // change status
5467                }
5468        }                       /* for (b = 0; b < 9; ++b) */
5469}
5470
5471static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx)
5472{
5473        int save_byte8 = wl_rank.s.byte8;
5474
5475        byte_test_status[8] = WL_HARDWARE;      /* H/W delay value */
5476
5477        if (save_byte8 != wl_rank.s.byte3 &&
5478            save_byte8 != wl_rank.s.byte4) {
5479                int test_byte8 = save_byte8;
5480                int test_byte8_error;
5481                int byte8_error = 0x1f;
5482                int adder;
5483                int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4,
5484                                            2);
5485
5486                for (adder = 0; adder <= 32; adder += 8) {
5487                        test_byte8_error = abs((adder + save_byte8) -
5488                                               avg_bytes);
5489                        if (test_byte8_error < byte8_error) {
5490                                byte8_error = test_byte8_error;
5491                                test_byte8 = save_byte8 + adder;
5492                        }
5493                }
5494
5495                // only do the check if we are not using measured VREF
5496                if (!measured_vref_flag) {
5497                        /* Use only even settings, rounding down... */
5498                        test_byte8 &= ~1;
5499
5500                        // do validity check on the calculated ECC delay value
5501                        // this depends on the DIMM type
5502                        if (spd_rdimm) {        // RDIMM
5503                                // but not mini-RDIMM
5504                                if (spd_dimm_type != 5) {
5505                                        // it can be > byte4, but should never
5506                                        // be > byte3
5507                                        if (test_byte8 > wl_rank.s.byte3) {
5508                                                /* say it is still estimated */
5509                                                byte_test_status[8] =
5510                                                        WL_ESTIMATED;
5511                                        }
5512                                }
5513                        } else {        // UDIMM
5514                                if (test_byte8 < wl_rank.s.byte3 ||
5515                                    test_byte8 > wl_rank.s.byte4) {
5516                                        // should never be outside the
5517                                        // byte 3-4 range
5518                                        /* say it is still estimated */
5519                                        byte_test_status[8] = WL_ESTIMATED;
5520                                }
5521                        }
5522                        /*
5523                         * Report whenever the calculation appears bad.
5524                         * This happens if some of the original values were off,
5525                         * or unexpected geometry from DIMM type, or custom
5526                         * circuitry (NIC225E, I am looking at you!).
5527                         * We will trust the calculated value, and depend on
5528                         * later testing to catch any instances when that
5529                         * value is truly bad.
5530                         */
5531                        // ESTIMATED means there may be an issue
5532                        if (byte_test_status[8] == WL_ESTIMATED) {
5533                                debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
5534                                      node, if_num, rankx,
5535                                      (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4,
5536                                      test_byte8, wl_rank.s.byte3);
5537                                byte_test_status[8] = WL_HARDWARE;
5538                        }
5539                }
5540                /* Use only even settings */
5541                wl_rank.s.byte8 = test_byte8 & ~1;
5542        }
5543
5544        if (wl_rank.s.byte8 != save_byte8) {
5545                /* Change the status if s/w adjusted the delay */
5546                byte_test_status[8] = WL_SOFTWARE;      /* Estimated delay */
5547        }
5548}
5549
5550static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv,
5551                                                   int rankx)
5552{
5553        int errors;
5554        int byte_delay[8];
5555        int byte_passed[8];
5556        u64 bytemask;
5557        u64 bitmask;
5558        int wl_offset;
5559        int bytes_todo;
5560        int sw_wl_offset = 1;
5561        int delay;
5562        int b;
5563
5564        for (b = 0; b < 8; ++b)
5565                byte_passed[b] = 0;
5566
5567        bytes_todo = if_bytemask;
5568
5569        for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) {
5570                debug("Starting wl_offset for-loop: %d\n", wl_offset);
5571
5572                bytemask = 0;
5573
5574                for (b = 0; b < 8; ++b) {
5575                        byte_delay[b] = 0;
5576                        // this does not contain fully passed bytes
5577                        if (!(bytes_todo & (1 << b)))
5578                                continue;
5579
5580                        // reset across passes if not fully passed
5581                        byte_passed[b] = 0;
5582                        upd_wl_rank(&wl_rank, b, 0);    // all delays start at 0
5583                        bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff;
5584                        // set the bytes bits in the bytemask
5585                        bytemask |= bitmask << (8 * b);
5586                }               /* for (b = 0; b < 8; ++b) */
5587
5588                // start a pass if there is any byte lane to test
5589                while (bytemask != 0) {
5590                        debug("Starting bytemask while-loop: 0x%llx\n",
5591                              bytemask);
5592
5593                        // write this set of WL delays
5594                        lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
5595                               wl_rank.u64);
5596                        wl_rank.u64 = lmc_rd(priv,
5597                                             CVMX_LMCX_WLEVEL_RANKX(rankx,
5598                                                                    if_num));
5599
5600                        // do the test
5601                        if (sw_wl_hw) {
5602                                errors = run_best_hw_patterns(priv, if_num,
5603                                                              rank_addr,
5604                                                              DBTRAIN_TEST,
5605                                                              NULL) & 0xff;
5606                        } else {
5607                                errors = test_dram_byte64(priv, if_num,
5608                                                          rank_addr, bytemask,
5609                                                          NULL);
5610                        }
5611
5612                        debug("test_dram_byte returned 0x%x\n", errors);
5613
5614                        // check errors by byte
5615                        for (b = 0; b < 8; ++b) {
5616                                if (!(bytes_todo & (1 << b)))
5617                                        continue;
5618
5619                                delay = byte_delay[b];
5620                                if (errors & (1 << b)) {        // yes, an error
5621                                        debug("        byte %d delay %2d Errors\n",
5622                                              b, delay);
5623                                        byte_passed[b] = 0;
5624                                } else {        // no error
5625                                        byte_passed[b] += 1;
5626                                        // Look for consecutive working settings
5627                                        if (byte_passed[b] == (1 + wl_offset)) {
5628                                                debug("        byte %d delay %2d FULLY Passed\n",
5629                                                      b, delay);
5630                                                if (wl_offset == 1) {
5631                                                        byte_test_status[b] =
5632                                                                WL_SOFTWARE;
5633                                                } else if (wl_offset == 0) {
5634                                                        byte_test_status[b] =
5635                                                                WL_SOFTWARE1;
5636                                                }
5637
5638                                                // test no longer, remove
5639                                                // from byte mask this pass
5640                                                bytemask &= ~(0xffULL <<
5641                                                              (8 * b));
5642                                                // remove completely from
5643                                                // concern
5644                                                bytes_todo &= ~(1 << b);
5645                                                // on to the next byte, bypass
5646                                                // delay updating!!
5647                                                continue;
5648                                        } else {
5649                                                debug("        byte %d delay %2d Passed\n",
5650                                                      b, delay);
5651                                        }
5652                                }
5653
5654                                // error or no, here we move to the next delay
5655                                // value for this byte, unless done all delays
5656                                // only a byte that has "fully passed" will
5657                                // bypass around this,
5658                                delay += 2;
5659                                if (delay < 32) {
5660                                        upd_wl_rank(&wl_rank, b, delay);
5661                                        debug("        byte %d delay %2d New\n",
5662                                              b, delay);
5663                                        byte_delay[b] = delay;
5664                                } else {
5665                                        // reached max delay, done with this
5666                                        // byte
5667                                        debug("        byte %d delay %2d Exhausted\n",
5668                                              b, delay);
5669                                        // test no longer, remove from byte
5670                                        // mask this pass
5671                                        bytemask &= ~(0xffULL << (8 * b));
5672                                }
5673                        }       /* for (b = 0; b < 8; ++b) */
5674                        debug("End of for-loop: bytemask 0x%llx\n", bytemask);
5675                }               /* while (bytemask != 0) */
5676        }
5677
5678        for (b = 0; b < 8; ++b) {
5679                // any bytes left in bytes_todo did not pass
5680                if (bytes_todo & (1 << b)) {
5681                        union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank;
5682
5683                        /*
5684                         * Last resort. Use Rlevel settings to estimate
5685                         * Wlevel if software write-leveling fails
5686                         */
5687                        debug("Using RLEVEL as WLEVEL estimate for byte %d\n",
5688                              b);
5689                        lmc_rlevel_rank.u64 =
5690                                lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
5691                                                                    if_num));
5692                        rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b);
5693                }
5694        }                       /* for (b = 0; b < 8; ++b) */
5695}
5696
5697static int lmc_sw_write_leveling(struct ddr_priv *priv)
5698{
5699        /* Try to determine/optimize write-level delays experimentally. */
5700        union cvmx_lmcx_wlevel_rankx wl_rank_hw_res;
5701        union cvmx_lmcx_config cfg;
5702        int rankx;
5703        int byte;
5704        char *s;
5705        int i;
5706
5707        int active_rank;
5708        int sw_wl_enable = 1;   /* FIX... Should be customizable. */
5709        int interfaces;
5710
5711        static const char * const wl_status_strings[] = {
5712                "(e)",
5713                "   ",
5714                "   ",
5715                "(1)"
5716        };
5717
5718        // FIXME: make HW-assist the default now?
5719        int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT;
5720        int dram_connection = c_cfg->dram_connection;
5721
5722        s = lookup_env(priv, "ddr_sw_wlevel_hw");
5723        if (s)
5724                sw_wl_hw_default = !!strtoul(s, NULL, 0);
5725        if (!if_64b)            // must use SW algo if 32-bit mode
5726                sw_wl_hw_default = 0;
5727
5728        // can never use hw-assist
5729        if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
5730                sw_wl_hw_default = 0;
5731
5732        s = lookup_env(priv, "ddr_software_wlevel");
5733        if (s)
5734                sw_wl_enable = strtoul(s, NULL, 0);
5735
5736        s = lookup_env(priv, "ddr%d_dram_connection", if_num);
5737        if (s)
5738                dram_connection = !!strtoul(s, NULL, 0);
5739
5740        cvmx_rng_enable();
5741
5742        /*
5743         * Get the measured_vref setting from the config, check for an
5744         * override...
5745         */
5746        /* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */
5747        // NOTE: measured VREF can only be done for DDR4
5748        if (ddr_type == DDR4_DRAM) {
5749                measured_vref_flag = c_cfg->measured_vref;
5750                s = lookup_env(priv, "ddr_measured_vref");
5751                if (s)
5752                        measured_vref_flag = !!strtoul(s, NULL, 0);
5753        } else {
5754                measured_vref_flag = 0; // OFF for DDR3
5755        }
5756
5757        /*
5758         * Ensure disabled ECC for DRAM tests using the SW algo, else leave
5759         * it untouched
5760         */
5761        if (!sw_wl_hw_default) {
5762                cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5763                cfg.cn78xx.ecc_ena = 0;
5764                lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5765        }
5766
5767        /*
5768         * We need to track absolute rank number, as well as how many
5769         * active ranks we have.  Two single rank DIMMs show up as
5770         * ranks 0 and 2, but only 2 ranks are active.
5771         */
5772        active_rank = 0;
5773
5774        interfaces = __builtin_popcount(if_mask);
5775
5776        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5777                final_vref_range = 0;
5778                start_vref_val = 0;
5779                computed_final_vref_val = -1;
5780                sw_wl_rank_status = WL_HARDWARE;
5781                sw_wl_failed = 0;
5782                sw_wl_hw = sw_wl_hw_default;
5783
5784                if (!sw_wl_enable)
5785                        break;
5786
5787                if (!(rank_mask & (1 << rankx)))
5788                        continue;
5789
5790                debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
5791                      node, if_num, rankx,
5792                      (sw_wl_hw) ? "with H/W assist" :
5793                      "with S/W algorithm");
5794
5795                if (ddr_type == DDR4_DRAM && num_ranks != 4) {
5796                        // always compute when we can...
5797                        computed_final_vref_val =
5798                            compute_vref_val(priv, if_num, rankx, dimm_count,
5799                                             num_ranks, imp_val,
5800                                             is_stacked_die, dram_connection);
5801
5802                        // but only use it if allowed
5803                        if (!measured_vref_flag) {
5804                                // skip all the measured vref processing,
5805                                // just the final setting
5806                                start_vref_val = VREF_FINAL;
5807                        }
5808                }
5809
5810                /* Save off the h/w wl results */
5811                wl_rank_hw_res.u64 = lmc_rd(priv,
5812                                            CVMX_LMCX_WLEVEL_RANKX(rankx,
5813                                                                   if_num));
5814
5815                vref_val_count = 0;
5816                vref_val_start = 0;
5817                best_vref_val_count = 0;
5818                best_vref_val_start = 0;
5819
5820                /* Loop one extra time using the Final vref value. */
5821                for (vref_val = start_vref_val; vref_val < VREF_LIMIT;
5822                     ++vref_val) {
5823                        if (ddr_type == DDR4_DRAM)
5824                                ddr4_vref_loop(priv, rankx);
5825
5826                        /* Restore the saved value */
5827                        wl_rank.u64 = wl_rank_hw_res.u64;
5828
5829                        for (byte = 0; byte < 9; ++byte)
5830                                byte_test_status[byte] = WL_ESTIMATED;
5831
5832                        if (wl_mask_err == 0) {
5833                                /*
5834                                 * Determine address of DRAM to test for
5835                                 * pass 1 of software write leveling.
5836                                 */
5837                                rank_addr = active_rank *
5838                                        (1ull << (pbank_lsb - bunk_enable +
5839                                                  (interfaces / 2)));
5840
5841                                /*
5842                                 * Adjust address for boot bus hole in memory
5843                                 * map.
5844                                 */
5845                                if (rank_addr > 0x10000000)
5846                                        rank_addr += 0x10000000;
5847
5848                                debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
5849                                      node, if_num, rankx, active_rank,
5850                                      rank_addr);
5851
5852                                // start parallel write-leveling block for
5853                                // delay high-order bits
5854                                errors = 0;
5855                                no_errors_count = 0;
5856                                sum_dram_dclk = 0;
5857                                sum_dram_ops = 0;
5858
5859                                if (if_64b) {
5860                                        bytes_todo = (sw_wl_hw) ?
5861                                                if_bytemask : 0xFF;
5862                                        bytemask = ~0ULL;
5863                                } else {
5864                                        // 32-bit, must be using SW algo,
5865                                        // only data bytes
5866                                        bytes_todo = 0x0f;
5867                                        bytemask = 0x00000000ffffffffULL;
5868                                }
5869
5870                                for (byte = 0; byte < 9; ++byte) {
5871                                        if (!(bytes_todo & (1 << byte))) {
5872                                                byte_delay[byte] = 0;
5873                                        } else {
5874                                                byte_delay[byte] =
5875                                                    get_wl_rank(&wl_rank, byte);
5876                                        }
5877                                }       /* for (byte = 0; byte < 9; ++byte) */
5878
5879                                do {
5880                                        lmc_sw_write_leveling_loop(priv, rankx);
5881                                } while (no_errors_count <
5882                                         WL_MIN_NO_ERRORS_COUNT);
5883
5884                                if (!sw_wl_hw) {
5885                                        u64 percent_x10;
5886
5887                                        if (sum_dram_dclk == 0)
5888                                                sum_dram_dclk = 1;
5889                                        percent_x10 = sum_dram_ops * 1000 /
5890                                                sum_dram_dclk;
5891                                        debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
5892                                              node, if_num, rankx, sum_dram_ops,
5893                                              sum_dram_dclk, percent_x10 / 10,
5894                                              percent_x10 % 10);
5895                                }
5896                                if (errors) {
5897                                        debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n",
5898                                              vref_val, vref_val, errors);
5899                                }
5900                                // end parallel write-leveling block for
5901                                // delay high-order bits
5902
5903                                // if we used HW-assist, we did the ECC byte
5904                                // when approp.
5905                                if (sw_wl_hw) {
5906                                        if (wl_print) {
5907                                                debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n",
5908                                                      node, if_num, rankx);
5909                                        }
5910                                        goto no_ecc_estimate;
5911                                }
5912
5913                                if ((if_bytemask & 0xff) == 0xff) {
5914                                        if (use_ecc) {
5915                                                sw_write_lvl_use_ecc(priv,
5916                                                                     rankx);
5917                                        } else {
5918                                                /* H/W delay value */
5919                                                byte_test_status[8] =
5920                                                        WL_HARDWARE;
5921                                                /* ECC is not used */
5922                                                wl_rank.s.byte8 =
5923                                                        wl_rank.s.byte0;
5924                                        }
5925                                } else {
5926                                        if (use_ecc) {
5927                                                /* Estimate the ECC byte dly */
5928                                                // add hi-order to b4
5929                                                wl_rank.s.byte4 |=
5930                                                        (wl_rank.s.byte3 &
5931                                                         0x38);
5932                                                if ((wl_rank.s.byte4 & 0x06) <
5933                                                    (wl_rank.s.byte3 & 0x06)) {
5934                                                        // must be next clock
5935                                                        wl_rank.s.byte4 += 8;
5936                                                }
5937                                        } else {
5938                                                /* ECC is not used */
5939                                                wl_rank.s.byte4 =
5940                                                        wl_rank.s.byte0;
5941                                        }
5942
5943                                        /*
5944                                         * Change the status if s/w adjusted
5945                                         * the delay
5946                                         */
5947                                        /* Estimated delay */
5948                                        byte_test_status[4] = WL_SOFTWARE;
5949                                }       /* if ((if_bytemask & 0xff) == 0xff) */
5950                        }       /* if (wl_mask_err == 0) */
5951
5952no_ecc_estimate:
5953
5954                        bytes_failed = 0;
5955                        for (byte = 0; byte < 9; ++byte) {
5956                                /* Don't accumulate errors for untested bytes */
5957                                if (!(if_bytemask & (1 << byte)))
5958                                        continue;
5959                                bytes_failed +=
5960                                    (byte_test_status[byte] == WL_ESTIMATED);
5961                        }
5962
5963                        /* vref training loop is only used for DDR4  */
5964                        if (ddr_type != DDR4_DRAM)
5965                                break;
5966
5967                        if (bytes_failed == 0) {
5968                                if (vref_val_count == 0)
5969                                        vref_val_start = vref_val;
5970
5971                                ++vref_val_count;
5972                                if (vref_val_count > best_vref_val_count) {
5973                                        best_vref_val_count = vref_val_count;
5974                                        best_vref_val_start = vref_val_start;
5975                                        debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    0x%02x <----- ???? -----> 0x%02x\n",
5976                                              node, if_num, rankx, vref_val,
5977                                              best_vref_val_start,
5978                                              best_vref_val_start +
5979                                              best_vref_val_count - 1);
5980                                }
5981                        } else {
5982                                vref_val_count = 0;
5983                                debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    failed\n",
5984                                      node, if_num, rankx, vref_val);
5985                        }
5986                }
5987
5988                /*
5989                 * Determine address of DRAM to test for software write
5990                 * leveling.
5991                 */
5992                rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable +
5993                                                    (interfaces / 2)));
5994                /* Adjust address for boot bus hole in memory map. */
5995                if (rank_addr > 0x10000000)
5996                        rank_addr += 0x10000000;
5997
5998                debug("Rank Address: 0x%llx\n", rank_addr);
5999
6000                if (bytes_failed) {
6001                        // FIXME? the big hammer, did not even try SW WL pass2,
6002                        // assume only chip reset will help
6003                        debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
6004                              node, if_num, rankx);
6005                        sw_wl_failed = 1;
6006                } else {        /* if (bytes_failed) */
6007                        // SW WL pass 1 was OK, write the settings
6008                        lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6009                               wl_rank.u64);
6010                        wl_rank.u64 = lmc_rd(priv,
6011                                             CVMX_LMCX_WLEVEL_RANKX(rankx,
6012                                                                    if_num));
6013
6014                        // do validity check on the delay values by running
6015                        // the test 1 more time...
6016                        // FIXME: we really need to check the ECC byte setting
6017                        // here as well, so we need to enable ECC for this test!
6018                        // if there are any errors, claim SW WL failure
6019                        u64 datamask = (if_64b) ? 0xffffffffffffffffULL :
6020                                0x00000000ffffffffULL;
6021                        int errors;
6022
6023                        // do the test
6024                        if (sw_wl_hw) {
6025                                errors = run_best_hw_patterns(priv, if_num,
6026                                                              rank_addr,
6027                                                              DBTRAIN_TEST,
6028                                                              NULL) & 0xff;
6029                        } else {
6030                                errors = test_dram_byte64(priv, if_num,
6031                                                          rank_addr, datamask,
6032                                                          NULL);
6033                        }
6034
6035                        if (errors) {
6036                                debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n",
6037                                      node, if_num, rankx, errors);
6038                                sw_wl_failed = 1;
6039                        }
6040                }               /* if (bytes_failed) */
6041
6042                // FIXME? dump the WL settings, so we get more of a clue
6043                // as to what happened where
6044                debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX  : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
6045                      node, if_num, rankx, wl_rank.s.status, wl_rank.u64,
6046                      wl_rank.s.byte8, wl_status_strings[byte_test_status[8]],
6047                      wl_rank.s.byte7, wl_status_strings[byte_test_status[7]],
6048                      wl_rank.s.byte6, wl_status_strings[byte_test_status[6]],
6049                      wl_rank.s.byte5, wl_status_strings[byte_test_status[5]],
6050                      wl_rank.s.byte4, wl_status_strings[byte_test_status[4]],
6051                      wl_rank.s.byte3, wl_status_strings[byte_test_status[3]],
6052                      wl_rank.s.byte2, wl_status_strings[byte_test_status[2]],
6053                      wl_rank.s.byte1, wl_status_strings[byte_test_status[1]],
6054                      wl_rank.s.byte0, wl_status_strings[byte_test_status[0]],
6055                      (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)");
6056
6057                // finally, check for fatal conditions: either chip reset
6058                // right here, or return error flag
6059                if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) ||
6060                    sw_wl_failed) {
6061                        if (!ddr_disable_chip_reset) {  // do chip RESET
6062                                printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n",
6063                                       node, if_num, rankx);
6064                                mdelay(500);
6065                                do_reset(NULL, 0, 0, NULL);
6066                        } else {
6067                                // return error flag so LMC init can be retried.
6068                                debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n",
6069                                      node, if_num, rankx);
6070                                return -EAGAIN; // 0 indicates restart possible.
6071                        }
6072                }
6073                active_rank++;
6074        }
6075
6076        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6077                int parameter_set = 0;
6078                u64 value;
6079
6080                if (!(rank_mask & (1 << rankx)))
6081                        continue;
6082
6083                wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
6084                                                                  if_num));
6085
6086                for (i = 0; i < 9; ++i) {
6087                        s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d",
6088                                       if_num, rankx, i);
6089                        if (s) {
6090                                parameter_set |= 1;
6091                                value = strtoul(s, NULL, 0);
6092
6093                                upd_wl_rank(&wl_rank, i, value);
6094                        }
6095                }
6096
6097                s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx);
6098                if (s) {
6099                        parameter_set |= 1;
6100                        value = strtoull(s, NULL, 0);
6101                        wl_rank.u64 = value;
6102                }
6103
6104                if (parameter_set) {
6105                        lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6106                               wl_rank.u64);
6107                        wl_rank.u64 =
6108                            lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
6109                        display_wl(if_num, wl_rank, rankx);
6110                }
6111                // if there are unused entries to be filled
6112                if ((rank_mask & 0x0F) != 0x0F) {
6113                        if (rankx < 3) {
6114                                debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
6115                                      node, if_num, rankx);
6116
6117                                // if rank 0, write ranks 1 and 2 here if empty
6118                                if (rankx == 0) {
6119                                        // check that rank 1 is empty
6120                                        if (!(rank_mask & (1 << 1))) {
6121                                                debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6122                                                      node, if_num, rankx, 1);
6123                                                lmc_wr(priv,
6124                                                       CVMX_LMCX_WLEVEL_RANKX(1,
6125                                                                if_num),
6126                                                       wl_rank.u64);
6127                                        }
6128
6129                                        // check that rank 2 is empty
6130                                        if (!(rank_mask & (1 << 2))) {
6131                                                debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6132                                                      node, if_num, rankx, 2);
6133                                                lmc_wr(priv,
6134                                                       CVMX_LMCX_WLEVEL_RANKX(2,
6135                                                                if_num),
6136                                                       wl_rank.u64);
6137                                        }
6138                                }
6139
6140                                // if rank 0, 1 or 2, write rank 3 here if empty
6141                                // check that rank 3 is empty
6142                                if (!(rank_mask & (1 << 3))) {
6143                                        debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6144                                              node, if_num, rankx, 3);
6145                                        lmc_wr(priv,
6146                                               CVMX_LMCX_WLEVEL_RANKX(3,
6147                                                                      if_num),
6148                                               wl_rank.u64);
6149                                }
6150                        }
6151                }
6152        }
6153
6154        /* Enable 32-bit mode if required. */
6155        cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
6156        cfg.cn78xx.mode32b = (!if_64b);
6157        debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
6158
6159        /* Restore the ECC configuration */
6160        if (!sw_wl_hw_default)
6161                cfg.cn78xx.ecc_ena = use_ecc;
6162
6163        lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
6164
6165        return 0;
6166}
6167
6168static void lmc_dll(struct ddr_priv *priv)
6169{
6170        union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
6171        int setting[9];
6172        int i;
6173
6174        ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6175
6176        for (i = 0; i < 9; ++i) {
6177                SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
6178                lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
6179                lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6180                ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6181                setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
6182                debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num,
6183                      GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64,
6184                      setting[i]);
6185        }
6186
6187        debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
6188              node, if_num, "DLL90 Setting 8:0",
6189              setting[8], setting[7], setting[6], setting[5], setting[4],
6190              setting[3], setting[2], setting[1], setting[0]);
6191
6192        process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset",
6193                                   c_cfg->dll_write_offset,
6194                                   "ddr%d_dll_write_offset_byte%d", 1);
6195        process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset",
6196                                   c_cfg->dll_read_offset,
6197                                   "ddr%d_dll_read_offset_byte%d", 2);
6198}
6199
6200#define SLOT_CTL_INCR(csr, chip, field, incr)                           \
6201        csr.chip.field = (csr.chip.field < (64 - incr)) ?               \
6202                (csr.chip.field + incr) : 63
6203
6204#define INCR(csr, chip, field, incr)                                    \
6205        csr.chip.field = (csr.chip.field < (64 - incr)) ?               \
6206                (csr.chip.field + incr) : 63
6207
6208static void lmc_workaround_2(struct ddr_priv *priv)
6209{
6210        /* Workaround Errata 21063 */
6211        if (octeon_is_cpuid(OCTEON_CN78XX) ||
6212            octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6213                union cvmx_lmcx_slot_ctl0 slot_ctl0;
6214                union cvmx_lmcx_slot_ctl1 slot_ctl1;
6215                union cvmx_lmcx_slot_ctl2 slot_ctl2;
6216                union cvmx_lmcx_ext_config ext_cfg;
6217
6218                slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num));
6219                slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6220                slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6221
6222                ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
6223
6224                /* When ext_cfg.s.read_ena_bprch is set add 1 */
6225                if (ext_cfg.s.read_ena_bprch) {
6226                        SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1);
6227                        SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1);
6228                        SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1);
6229                        SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1);
6230                }
6231
6232                /* Always add 2 */
6233                SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2);
6234                SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2);
6235
6236                lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64);
6237                lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6238                lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6239        }
6240
6241        /* Workaround Errata 21216 */
6242        if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) ||
6243            octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6244                union cvmx_lmcx_slot_ctl1 slot_ctl1;
6245                union cvmx_lmcx_slot_ctl2 slot_ctl2;
6246
6247                slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6248                slot_ctl1.cn78xx.w2w_xrank_init =
6249                    max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init);
6250                lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6251
6252                slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6253                slot_ctl2.cn78xx.w2w_xdimm_init =
6254                    max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init);
6255                lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6256        }
6257}
6258
6259static void lmc_final(struct ddr_priv *priv)
6260{
6261        /*
6262         * 4.8.11 Final LMC Initialization
6263         *
6264         * Early LMC initialization, LMC write-leveling, and LMC read-leveling
6265         * must be completed prior to starting this final LMC initialization.
6266         *
6267         * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
6268         * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
6269         * readleveling and write-leveling settings. Software should not write
6270         * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
6271         * values until after the final read-leveling and write-leveling
6272         * settings are written.
6273         *
6274         * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
6275         * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
6276         * select the minimum gaps between read operations and write operations
6277         * of various types.
6278         *
6279         * Software must not reduce the values in these CSR fields below the
6280         * values previously selected by the LMC hardware (during write-leveling
6281         * and read-leveling steps above).
6282         *
6283         * All sections in this chapter may be used to derive proper settings
6284         * for these registers.
6285         *
6286         * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
6287         * properly. This should be done prior to the first read.
6288         */
6289
6290        /* Clear any residual ECC errors */
6291        int num_tads = 1;
6292        int tad;
6293        int num_mcis = 1;
6294        int mci;
6295
6296        if (octeon_is_cpuid(OCTEON_CN78XX)) {
6297                num_tads = 8;
6298                num_mcis = 4;
6299        } else if (octeon_is_cpuid(OCTEON_CN70XX)) {
6300                num_tads = 1;
6301                num_mcis = 1;
6302        } else if (octeon_is_cpuid(OCTEON_CN73XX) ||
6303                   octeon_is_cpuid(OCTEON_CNF75XX)) {
6304                num_tads = 4;
6305                num_mcis = 3;
6306        }
6307
6308        lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL);
6309        lmc_rd(priv, CVMX_LMCX_INT(if_num));
6310
6311        for (tad = 0; tad < num_tads; tad++) {
6312                l2c_wr(priv, CVMX_L2C_TADX_INT_REL(tad),
6313                       l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad)));
6314                debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad,
6315                      l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad)));
6316        }
6317
6318        for (mci = 0; mci < num_mcis; mci++) {
6319                l2c_wr(priv, CVMX_L2C_MCIX_INT_REL(mci),
6320                       l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci)));
6321                debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci,
6322                      l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci)));
6323        }
6324
6325        debug("%-45s : 0x%08llx\n", "LMC_INT",
6326              lmc_rd(priv, CVMX_LMCX_INT(if_num)));
6327}
6328
6329static void lmc_scrambling(struct ddr_priv *priv)
6330{
6331        // Make sure scrambling is disabled during init...
6332        union cvmx_lmcx_control ctrl;
6333        union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0;
6334        union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1;
6335        union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2;
6336        union cvmx_lmcx_ns_ctl lmc_ns_ctl;
6337        int use_scramble = 0;   // default OFF
6338        char *s;
6339
6340        ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
6341        lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num));
6342        lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num));
6343        lmc_scramble_cfg2.u64 = 0;      // quiet compiler
6344        if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6345                lmc_scramble_cfg2.u64 =
6346                    lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num));
6347        }
6348        lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num));
6349
6350        s = lookup_env_ull(priv, "ddr_use_scramble");
6351        if (s)
6352                use_scramble = simple_strtoull(s, NULL, 0);
6353
6354        /* Generate random values if scrambling is needed */
6355        if (use_scramble) {
6356                lmc_scramble_cfg0.u64 = cvmx_rng_get_random64();
6357                lmc_scramble_cfg1.u64 = cvmx_rng_get_random64();
6358                lmc_scramble_cfg2.u64 = cvmx_rng_get_random64();
6359                lmc_ns_ctl.s.ns_scramble_dis = 0;
6360                lmc_ns_ctl.s.adr_offset = 0;
6361                ctrl.s.scramble_ena = 1;
6362        }
6363
6364        s = lookup_env_ull(priv, "ddr_scramble_cfg0");
6365        if (s) {
6366                lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0);
6367                ctrl.s.scramble_ena = 1;
6368        }
6369        debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0",
6370              lmc_scramble_cfg0.u64);
6371
6372        lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64);
6373
6374        s = lookup_env_ull(priv, "ddr_scramble_cfg1");
6375        if (s) {
6376                lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0);
6377                ctrl.s.scramble_ena = 1;
6378        }
6379        debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1",
6380              lmc_scramble_cfg1.u64);
6381        lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64);
6382
6383        if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6384                s = lookup_env_ull(priv, "ddr_scramble_cfg2");
6385                if (s) {
6386                        lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0);
6387                        ctrl.s.scramble_ena = 1;
6388                }
6389                debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2",
6390                      lmc_scramble_cfg1.u64);
6391                lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num),
6392                       lmc_scramble_cfg2.u64);
6393        }
6394
6395        s = lookup_env_ull(priv, "ddr_ns_ctl");
6396        if (s)
6397                lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0);
6398        debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64);
6399        lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64);
6400
6401        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
6402}
6403
6404struct rl_score {
6405        u64 setting;
6406        int score;
6407};
6408
6409static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data");
6410static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data");
6411static unsigned char rodt_ctl __section(".data");
6412
6413static int rl_rodt_err __section(".data");
6414static unsigned char rtt_nom __section(".data");
6415static unsigned char rtt_idx __section(".data");
6416static char min_rtt_nom_idx __section(".data");
6417static char max_rtt_nom_idx __section(".data");
6418static char min_rodt_ctl __section(".data");
6419static char max_rodt_ctl __section(".data");
6420static int rl_dbg_loops __section(".data");
6421static unsigned char save_ddr2t __section(".data");
6422static int rl_samples __section(".data");
6423static char rl_compute __section(".data");
6424static char saved_ddr__ptune __section(".data");
6425static char saved_ddr__ntune __section(".data");
6426static char rl_comp_offs __section(".data");
6427static char saved_int_zqcs_dis __section(".data");
6428static int max_adj_rl_del_inc __section(".data");
6429static int print_nom_ohms __section(".data");
6430static int rl_print __section(".data");
6431
6432#ifdef ENABLE_HARDCODED_RLEVEL
6433static char part_number[21] __section(".data");
6434#endif /* ENABLE_HARDCODED_RLEVEL */
6435
6436struct perfect_counts {
6437        u16 count[9][32]; // 8+ECC by 64 values
6438        u32 mask[9];      // 8+ECC, bitmask of perfect delays
6439};
6440
6441static struct perfect_counts rank_perf[4] __section(".data");
6442static struct perfect_counts rodt_perfect_counts __section(".data");
6443static int pbm_lowsum_limit __section(".data");
6444// FIXME: PBM skip for RODT 240 and 34
6445static u32 pbm_rodt_skip __section(".data");
6446
6447// control rank majority processing
6448static int disable_rank_majority __section(".data");
6449
6450// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
6451// for DDR3
6452static int enable_rldelay_bump __section(".data");
6453static int rldelay_bump_incr __section(".data");
6454static int disable_rlv_bump_this_byte __section(".data");
6455static u64 value_mask __section(".data");
6456
6457static struct rlevel_byte_data rl_byte[9] __section(".data");
6458static int sample_loops __section(".data");
6459static int max_samples __section(".data");
6460static int rl_rank_errors __section(".data");
6461static int rl_mask_err __section(".data");
6462static int rl_nonseq_err __section(".data");
6463static struct rlevel_bitmask rl_mask[9] __section(".data");
6464static int rl_best_rank_score __section(".data");
6465
6466static int rodt_row_skip_mask __section(".data");
6467
6468static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score
6469                      rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6470{
6471        union cvmx_lmcx_comp_ctl2 cc2;
6472        const int rl_separate_ab = 1;
6473        int i;
6474
6475        rl_best_rank_score = DEFAULT_BEST_RANK_SCORE;
6476        rl_rodt_err = 0;
6477        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6478        cc2.cn78xx.rodt_ctl = rodt_ctl;
6479        lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
6480        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6481        udelay(1); /* Give it a little time to take affect */
6482        if (rl_print > 1) {
6483                debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
6484                      cc2.cn78xx.rodt_ctl,
6485                      imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
6486        }
6487
6488        memset(rl_byte, 0, sizeof(rl_byte));
6489        memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts));
6490
6491        // when iter RODT is the target RODT, take more samples...
6492        max_samples = rl_samples;
6493        if (rodt_ctl == default_rodt_ctl)
6494                max_samples += rl_samples + 1;
6495
6496        for (sample_loops = 0; sample_loops < max_samples; sample_loops++) {
6497                int redoing_nonseq_errs = 0;
6498
6499                rl_mask_err = 0;
6500
6501                if (!(rl_separate_ab && spd_rdimm &&
6502                      ddr_type == DDR4_DRAM)) {
6503                        /* Clear read-level delays */
6504                        lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6505
6506                        /* read-leveling */
6507                        oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6508
6509                        do {
6510                                rl_rank.u64 =
6511                                        lmc_rd(priv,
6512                                               CVMX_LMCX_RLEVEL_RANKX(rankx,
6513                                                                      if_num));
6514                        } while (rl_rank.cn78xx.status != 3);
6515                }
6516
6517                rl_rank.u64 =
6518                        lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6519
6520                // start bitmask interpretation block
6521
6522                memset(rl_mask, 0, sizeof(rl_mask));
6523
6524                if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) {
6525                        union cvmx_lmcx_rlevel_rankx rl_rank_aside;
6526                        union cvmx_lmcx_modereg_params0 mp0;
6527
6528                        /* A-side */
6529                        mp0.u64 =
6530                                lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6531                        mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6532                        lmc_wr(priv,
6533                               CVMX_LMCX_MODEREG_PARAMS0(if_num),
6534                               mp0.u64);
6535
6536                        /* Clear read-level delays */
6537                        lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6538
6539                        /* read-leveling */
6540                        oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6541
6542                        do {
6543                                rl_rank.u64 =
6544                                        lmc_rd(priv,
6545                                               CVMX_LMCX_RLEVEL_RANKX(rankx,
6546                                                                      if_num));
6547                        } while (rl_rank.cn78xx.status != 3);
6548
6549                        rl_rank.u64 =
6550                                lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6551                                                                    if_num));
6552
6553                        rl_rank_aside.u64 = rl_rank.u64;
6554
6555                        rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0);
6556                        rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1);
6557                        rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2);
6558                        rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3);
6559                        rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8);
6560                        /* A-side complete */
6561
6562                        /* B-side */
6563                        mp0.u64 =
6564                                lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6565                        mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */
6566                        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6567                               mp0.u64);
6568
6569                        /* Clear read-level delays */
6570                        lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6571
6572                        /* read-leveling */
6573                        oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6574
6575                        do {
6576                                rl_rank.u64 =
6577                                        lmc_rd(priv,
6578                                               CVMX_LMCX_RLEVEL_RANKX(rankx,
6579                                                                      if_num));
6580                        } while (rl_rank.cn78xx.status != 3);
6581
6582                        rl_rank.u64 =
6583                                lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6584                                                                    if_num));
6585
6586                        rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4);
6587                        rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5);
6588                        rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6);
6589                        rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7);
6590                        /* B-side complete */
6591
6592                        upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0);
6593                        upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1);
6594                        upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2);
6595                        upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3);
6596                        /* ECC A-side */
6597                        upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8);
6598
6599                        mp0.u64 =
6600                                lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6601                        mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6602                        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6603                               mp0.u64);
6604                }
6605
6606                /*
6607                 * Evaluate the quality of the read-leveling delays from the
6608                 * bitmasks. Also save off a software computed read-leveling
6609                 * mask that may be used later to qualify the delay results
6610                 * from Octeon.
6611                 */
6612                for (i = 0; i < (8 + ecc_ena); ++i) {
6613                        int bmerr;
6614
6615                        if (!(if_bytemask & (1 << i)))
6616                                continue;
6617                        if (!(rl_separate_ab && spd_rdimm &&
6618                              ddr_type == DDR4_DRAM)) {
6619                                rl_mask[i].bm =
6620                                        lmc_ddr3_rl_dbg_read(priv, if_num, i);
6621                        }
6622                        bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i],
6623                                                             ddr_type);
6624                        rl_mask[i].errs = bmerr;
6625                        rl_mask_err += bmerr;
6626                        // count only the "perfect" bitmasks
6627                        if (ddr_type == DDR4_DRAM && !bmerr) {
6628                                int delay;
6629                                // FIXME: for now, simple filtering:
6630                                // do NOT count PBMs for RODTs in skip mask
6631                                if ((1U << rodt_ctl) & pbm_rodt_skip)
6632                                        continue;
6633                                // FIXME: could optimize this a bit?
6634                                delay = get_rl_rank(&rl_rank, i);
6635                                rank_perf[rankx].count[i][delay] += 1;
6636                                rank_perf[rankx].mask[i] |=
6637                                        (1ULL << delay);
6638                                rodt_perfect_counts.count[i][delay] += 1;
6639                                rodt_perfect_counts.mask[i] |= (1ULL << delay);
6640                        }
6641                }
6642
6643                /* Set delays for unused bytes to match byte 0. */
6644                for (i = 0; i < 9; ++i) {
6645                        if (if_bytemask & (1 << i))
6646                                continue;
6647                        upd_rl_rank(&rl_rank, i, rl_rank.s.byte0);
6648                }
6649
6650                /*
6651                 * Save a copy of the byte delays in physical
6652                 * order for sequential evaluation.
6653                 */
6654                unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank);
6655
6656        redo_nonseq_errs:
6657
6658                rl_nonseq_err  = 0;
6659                if (!disable_sequential_delay_check) {
6660                        for (i = 0; i < 9; ++i)
6661                                rl_byte[i].sqerrs = 0;
6662
6663                        if ((if_bytemask & 0xff) == 0xff) {
6664                                /*
6665                                 * Evaluate delay sequence across the whole
6666                                 * range of bytes for standard dimms.
6667                                 */
6668                                /* 1=RDIMM, 5=Mini-RDIMM */
6669                                if (spd_dimm_type == 1 || spd_dimm_type == 5) {
6670                                        int reg_adj_del = abs(rl_byte[4].delay -
6671                                                              rl_byte[5].delay);
6672
6673                                        /*
6674                                         * Registered dimm topology routes
6675                                         * from the center.
6676                                         */
6677                                        rl_nonseq_err +=
6678                                                nonseq_del(rl_byte, 0,
6679                                                           3 + ecc_ena,
6680                                                           max_adj_rl_del_inc);
6681                                        rl_nonseq_err +=
6682                                                nonseq_del(rl_byte, 5,
6683                                                           7 + ecc_ena,
6684                                                           max_adj_rl_del_inc);
6685                                        // byte 5 sqerrs never gets cleared
6686                                        // for RDIMMs
6687                                        rl_byte[5].sqerrs = 0;
6688                                        if (reg_adj_del > 1) {
6689                                                /*
6690                                                 * Assess proximity of bytes on
6691                                                 * opposite sides of register
6692                                                 */
6693                                                rl_nonseq_err += (reg_adj_del -
6694                                                                  1) *
6695                                                        RLEVEL_ADJACENT_DELAY_ERROR;
6696                                                // update byte 5 error
6697                                                rl_byte[5].sqerrs +=
6698                                                        (reg_adj_del - 1) *
6699                                                        RLEVEL_ADJACENT_DELAY_ERROR;
6700                                        }
6701                                }
6702
6703                                /* 2=UDIMM, 6=Mini-UDIMM */
6704                                if (spd_dimm_type == 2 || spd_dimm_type == 6) {
6705                                        /*
6706                                         * Unbuffered dimm topology routes
6707                                         * from end to end.
6708                                         */
6709                                        rl_nonseq_err += nonseq_del(rl_byte, 0,
6710                                                                    7 + ecc_ena,
6711                                                                    max_adj_rl_del_inc);
6712                                }
6713                        } else {
6714                                rl_nonseq_err += nonseq_del(rl_byte, 0,
6715                                                            3 + ecc_ena,
6716                                                            max_adj_rl_del_inc);
6717                        }
6718                } /* if (! disable_sequential_delay_check) */
6719
6720                rl_rank_errors = rl_mask_err + rl_nonseq_err;
6721
6722                // print original sample here only if we are not really
6723                // averaging or picking best
6724                // also do not print if we were redoing the NONSEQ score
6725                // for using COMPUTED
6726                if (!redoing_nonseq_errs && rl_samples < 2) {
6727                        if (rl_print > 1) {
6728                                display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6729                                display_rl_bm_scores(if_num, rankx, rl_mask,
6730                                                     ecc_ena);
6731                                display_rl_seq_scores(if_num, rankx, rl_byte,
6732                                                      ecc_ena);
6733                        }
6734                        display_rl_with_score(if_num, rl_rank, rankx,
6735                                              rl_rank_errors);
6736                }
6737
6738                if (rl_compute) {
6739                        if (!redoing_nonseq_errs) {
6740                                /* Recompute the delays based on the bitmask */
6741                                for (i = 0; i < (8 + ecc_ena); ++i) {
6742                                        if (!(if_bytemask & (1 << i)))
6743                                                continue;
6744
6745                                        upd_rl_rank(&rl_rank, i,
6746                                                    compute_ddr3_rlevel_delay(
6747                                                            rl_mask[i].mstart,
6748                                                            rl_mask[i].width,
6749                                                            rl_ctl));
6750                                }
6751
6752                                /*
6753                                 * Override the copy of byte delays with the
6754                                 * computed results.
6755                                 */
6756                                unpack_rlevel_settings(if_bytemask, ecc_ena,
6757                                                       rl_byte, rl_rank);
6758
6759                                redoing_nonseq_errs = 1;
6760                                goto redo_nonseq_errs;
6761
6762                        } else {
6763                                /*
6764                                 * now print this if already printed the
6765                                 * original sample
6766                                 */
6767                                if (rl_samples < 2 || rl_print) {
6768                                        display_rl_with_computed(if_num,
6769                                                                 rl_rank, rankx,
6770                                                                 rl_rank_errors);
6771                                }
6772                        }
6773                } /* if (rl_compute) */
6774
6775                // end bitmask interpretation block
6776
6777                // if it is a better (lower) score, then  keep it
6778                if (rl_rank_errors < rl_best_rank_score) {
6779                        rl_best_rank_score = rl_rank_errors;
6780
6781                        // save the new best delays and best errors
6782                        for (i = 0; i < (8 + ecc_ena); ++i) {
6783                                rl_byte[i].best = rl_byte[i].delay;
6784                                rl_byte[i].bestsq = rl_byte[i].sqerrs;
6785                                // save bitmasks and their scores as well
6786                                // xlate UNPACKED index to PACKED index to
6787                                // get from rl_mask
6788                                rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm;
6789                                rl_byte[i].bmerrs =
6790                                        rl_mask[XUP(i, !!ecc_ena)].errs;
6791                        }
6792                }
6793
6794                rl_rodt_err += rl_rank_errors;
6795        }
6796
6797        /* We recorded the best score across the averaging loops */
6798        rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score;
6799
6800        /*
6801         * Restore the delays from the best fields that go with the best
6802         * score
6803         */
6804        for (i = 0; i < 9; ++i) {
6805                rl_byte[i].delay = rl_byte[i].best;
6806                rl_byte[i].sqerrs = rl_byte[i].bestsq;
6807        }
6808
6809        rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6810
6811        pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank);
6812
6813        if (rl_samples > 1) {
6814                // restore the "best" bitmasks and their scores for printing
6815                for (i = 0; i < 9; ++i) {
6816                        if ((if_bytemask & (1 << i)) == 0)
6817                                continue;
6818                        // xlate PACKED index to UNPACKED index to get from
6819                        // rl_byte
6820                        rl_mask[i].bm   = rl_byte[XPU(i, !!ecc_ena)].bm;
6821                        rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs;
6822                }
6823
6824                // maybe print bitmasks/scores here
6825                if (rl_print > 1) {
6826                        display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6827                        display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena);
6828                        display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena);
6829
6830                        display_rl_with_rodt(if_num, rl_rank, rankx,
6831                                             rl_score[rtt_nom][rodt_ctl][rankx].score,
6832                                             print_nom_ohms,
6833                                             imp_val->rodt_ohms[rodt_ctl],
6834                                             WITH_RODT_BESTSCORE);
6835
6836                        debug("-----------\n");
6837                }
6838        }
6839
6840        rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64;
6841
6842        // print out the PBMs for the current RODT
6843        if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity?
6844                // FIXME: change verbosity level after debug complete...
6845
6846                for (i = 0; i < 9; i++) {
6847                        u64 temp_mask;
6848                        int num_values;
6849
6850                        // FIXME: PBM skip for RODTs in mask
6851                        if ((1U << rodt_ctl) & pbm_rodt_skip)
6852                                continue;
6853
6854                        temp_mask = rodt_perfect_counts.mask[i];
6855                        num_values = __builtin_popcountll(temp_mask);
6856                        i = __builtin_ffsll(temp_mask) - 1;
6857
6858                        debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ",
6859                              node, if_num, rankx,
6860                              imp_val->rodt_ohms[rodt_ctl],
6861                              i, temp_mask >> i, num_values);
6862
6863                        while (temp_mask != 0) {
6864                                i = __builtin_ffsll(temp_mask) - 1;
6865                                debug("%2d(%2d) ", i,
6866                                      rodt_perfect_counts.count[i][i]);
6867                                temp_mask &= ~(1UL << i);
6868                        } /* while (temp_mask != 0) */
6869                        debug("\n");
6870                }
6871        }
6872}
6873
6874static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score
6875                            rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6876{
6877        /* Start with an arbitrarily high score */
6878        int best_rank_score = DEFAULT_BEST_RANK_SCORE;
6879        int best_rank_rtt_nom = 0;
6880        int best_rank_ctl = 0;
6881        int best_rank_ohms = 0;
6882        int best_rankx = 0;
6883        int dimm_rank_mask;
6884        int max_rank_score;
6885        union cvmx_lmcx_rlevel_rankx saved_rl_rank;
6886        int next_ohms;
6887        int orankx;
6888        int next_score = 0;
6889        int best_byte, new_byte, temp_byte, orig_best_byte;
6890        int rank_best_bytes[9];
6891        int byte_sh;
6892        int avg_byte;
6893        int avg_diff;
6894        int i;
6895
6896        if (!(rank_mask & (1 << rankx)))
6897                return;
6898
6899        // some of the rank-related loops below need to operate only on
6900        // the ranks of a single DIMM,
6901        // so create a mask for their use here
6902        if (num_ranks == 4) {
6903                dimm_rank_mask = rank_mask; // should be 1111
6904        } else {
6905                dimm_rank_mask = rank_mask & 3; // should be 01 or 11
6906                if (rankx >= 2) {
6907                        // doing a rank on the second DIMM, should be
6908                        // 0100 or 1100
6909                        dimm_rank_mask <<= 2;
6910                }
6911        }
6912        debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n",
6913              dimm_rank_mask, rank_mask, rankx);
6914
6915        // this is the start of the BEST ROW SCORE LOOP
6916
6917        for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6918                rtt_nom = imp_val->rtt_nom_table[rtt_idx];
6919
6920                debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
6921                      node, if_num, rankx, rtt_nom,
6922                      imp_val->rtt_nom_ohms[rtt_nom]);
6923
6924                for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
6925                     --rodt_ctl) {
6926                        next_ohms = imp_val->rodt_ohms[rodt_ctl];
6927
6928                        // skip RODT rows in mask, but *NOT* rows with too
6929                        // high a score;
6930                        // we will not use the skipped ones for printing or
6931                        // evaluating, but we need to allow all the
6932                        // non-skipped ones to be candidates for "best"
6933                        if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
6934                                debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
6935                                      node, if_num, rankx, rodt_ctl,
6936                                      next_ohms, next_score);
6937                                continue;
6938                        }
6939
6940                        // this is ROFFIX-0528
6941                        for (orankx = 0; orankx < dimm_count * 4; orankx++) {
6942                                // stay on the same DIMM
6943                                if (!(dimm_rank_mask & (1 << orankx)))
6944                                        continue;
6945
6946                                next_score = rl_score[rtt_nom][rodt_ctl][orankx].score;
6947
6948                                // always skip a higher score
6949                                if (next_score > best_rank_score)
6950                                        continue;
6951
6952                                // if scores are equal
6953                                if (next_score == best_rank_score) {
6954                                        // always skip lower ohms
6955                                        if (next_ohms < best_rank_ohms)
6956                                                continue;
6957
6958                                        // if same ohms
6959                                        if (next_ohms == best_rank_ohms) {
6960                                                // always skip the other rank(s)
6961                                                if (orankx != rankx)
6962                                                        continue;
6963                                        }
6964                                        // else next_ohms are greater,
6965                                        // always choose it
6966                                }
6967                                // else next_score is less than current best,
6968                                // so always choose it
6969                                debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
6970                                      node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
6971                                      best_rank_score, best_rank_ohms);
6972                                best_rank_score     = next_score;
6973                                best_rank_rtt_nom   = rtt_nom;
6974                                //best_rank_nom_ohms  = rtt_nom_ohms;
6975                                best_rank_ctl       = rodt_ctl;
6976                                best_rank_ohms      = next_ohms;
6977                                best_rankx          = orankx;
6978                                rl_rank.u64 =
6979                                        rl_score[rtt_nom][rodt_ctl][orankx].setting;
6980                        }
6981                }
6982        }
6983
6984        // this is the end of the BEST ROW SCORE LOOP
6985
6986        // DANGER, Will Robinson!! Abort now if we did not find a best
6987        // score at all...
6988        if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
6989                printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n",
6990                       node, if_num, rankx);
6991                mdelay(500);
6992                do_reset(NULL, 0, 0, NULL);
6993        }
6994
6995        // FIXME: relative now, but still arbitrary...
6996        max_rank_score = best_rank_score;
6997        if (ddr_type == DDR4_DRAM) {
6998                // halve the range if 2 DIMMs unless they are single rank...
6999                max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ?
7000                                                           dimm_count : 1));
7001        } else {
7002                // Since DDR3 typically has a wider score range,
7003                // keep more of them always
7004                max_rank_score += MAX_RANK_SCORE_LIMIT;
7005        }
7006
7007        if (!ecc_ena) {
7008                /* ECC is not used */
7009                rl_rank.s.byte8 = rl_rank.s.byte0;
7010        }
7011
7012        // at the end, write the best row settings to the current rank
7013        lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64);
7014        rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7015
7016        saved_rl_rank.u64 = rl_rank.u64;
7017
7018        // this is the start of the PRINT LOOP
7019        int pass;
7020
7021        // for pass==0, print current rank, pass==1 print other rank(s)
7022        // this is done because we want to show each ranks RODT values
7023        // together, not interlaced
7024        // keep separates for ranks - pass=0 target rank, pass=1 other
7025        // rank on DIMM
7026        int mask_skipped[2] = {0, 0};
7027        int score_skipped[2] = {0, 0};
7028        int selected_rows[2] = {0, 0};
7029        int zero_scores[2] = {0, 0};
7030        for (pass = 0; pass < 2; pass++) {
7031                for (orankx = 0; orankx < dimm_count * 4; orankx++) {
7032                        // stay on the same DIMM
7033                        if (!(dimm_rank_mask & (1 << orankx)))
7034                                continue;
7035
7036                        if ((pass == 0 && orankx != rankx) ||
7037                            (pass != 0 && orankx == rankx))
7038                                continue;
7039
7040                        for (rtt_idx = min_rtt_nom_idx;
7041                             rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
7042                                rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7043                                if (dyn_rtt_nom_mask == 0) {
7044                                        print_nom_ohms = -1;
7045                                } else {
7046                                        print_nom_ohms =
7047                                                imp_val->rtt_nom_ohms[rtt_nom];
7048                                }
7049
7050                                // cycle through all the RODT values...
7051                                for (rodt_ctl = max_rodt_ctl;
7052                                     rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
7053                                        union cvmx_lmcx_rlevel_rankx
7054                                                temp_rl_rank;
7055                                        int temp_score =
7056                                                rl_score[rtt_nom][rodt_ctl][orankx].score;
7057                                        int skip_row;
7058
7059                                        temp_rl_rank.u64 =
7060                                                rl_score[rtt_nom][rodt_ctl][orankx].setting;
7061
7062                                        // skip RODT rows in mask, or rows
7063                                        // with too high a score;
7064                                        // we will not use them for printing
7065                                        // or evaluating...
7066                                        if ((1 << rodt_ctl) &
7067                                            rodt_row_skip_mask) {
7068                                                skip_row = WITH_RODT_SKIPPING;
7069                                                ++mask_skipped[pass];
7070                                        } else if (temp_score >
7071                                                   max_rank_score) {
7072                                                skip_row = WITH_RODT_SKIPPING;
7073                                                ++score_skipped[pass];
7074                                        } else {
7075                                                skip_row = WITH_RODT_BLANK;
7076                                                ++selected_rows[pass];
7077                                                if (temp_score == 0)
7078                                                        ++zero_scores[pass];
7079                                        }
7080
7081                                        // identify and print the BEST ROW
7082                                        // when it comes up
7083                                        if (skip_row == WITH_RODT_BLANK &&
7084                                            best_rankx == orankx &&
7085                                            best_rank_rtt_nom == rtt_nom &&
7086                                            best_rank_ctl == rodt_ctl)
7087                                                skip_row = WITH_RODT_BESTROW;
7088
7089                                        if (rl_print) {
7090                                                display_rl_with_rodt(if_num,
7091                                                                     temp_rl_rank, orankx, temp_score,
7092                                                                     print_nom_ohms,
7093                                                                     imp_val->rodt_ohms[rodt_ctl],
7094                                                                     skip_row);
7095                                        }
7096                                }
7097                        }
7098                }
7099        }
7100        debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
7101              node, if_num, rankx, selected_rows[0], selected_rows[1],
7102              zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1],
7103              score_skipped[0], score_skipped[1]);
7104        // this is the end of the PRINT LOOP
7105
7106        // now evaluate which bytes need adjusting
7107        // collect the new byte values; first init with current best for
7108        // neighbor use
7109        for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7110                rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) &
7111                        RLEVEL_BYTE_MSK;
7112        }
7113
7114        // this is the start of the BEST BYTE LOOP
7115
7116        for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7117                int sum = 0, count = 0;
7118                int count_less = 0, count_same = 0, count_more = 0;
7119                int count_byte; // save the value we counted around
7120                // for rank majority use
7121                int rank_less = 0, rank_same = 0, rank_more = 0;
7122                int neighbor;
7123                int neigh_byte;
7124
7125                best_byte = rank_best_bytes[i];
7126                orig_best_byte = rank_best_bytes[i];
7127
7128                // this is the start of the BEST BYTE AVERAGING LOOP
7129
7130                // validate the initial "best" byte by looking at the
7131                // average of the unskipped byte-column entries
7132                // we want to do this before we go further, so we can
7133                // try to start with a better initial value
7134                // this is the so-called "BESTBUY" patch set
7135
7136                for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7137                     ++rtt_idx) {
7138                        rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7139
7140                        for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7141                             --rodt_ctl) {
7142                                union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7143                                int temp_score;
7144
7145                                // average over all the ranks
7146                                for (orankx = 0; orankx < dimm_count * 4;
7147                                     orankx++) {
7148                                        // stay on the same DIMM
7149                                        if (!(dimm_rank_mask & (1 << orankx)))
7150                                                continue;
7151
7152                                        temp_score =
7153                                                rl_score[rtt_nom][rodt_ctl][orankx].score;
7154                                        // skip RODT rows in mask, or rows with
7155                                        // too high a score;
7156                                        // we will not use them for printing or
7157                                        // evaluating...
7158
7159                                        if (!((1 << rodt_ctl) &
7160                                              rodt_row_skip_mask) &&
7161                                            temp_score <= max_rank_score) {
7162                                                temp_rl_rank.u64 =
7163                                                        rl_score[rtt_nom][rodt_ctl][orankx].setting;
7164                                                temp_byte =
7165                                                        (int)(temp_rl_rank.u64 >> byte_sh) &
7166                                                        RLEVEL_BYTE_MSK;
7167                                                sum += temp_byte;
7168                                                count++;
7169                                        }
7170                                }
7171                        }
7172                }
7173
7174                // this is the end of the BEST BYTE AVERAGING LOOP
7175
7176                // FIXME: validate count and sum??
7177                avg_byte = (int)divide_nint(sum, count);
7178                avg_diff = best_byte - avg_byte;
7179                new_byte = best_byte;
7180                if (avg_diff != 0) {
7181                        // bump best up/dn by 1, not necessarily all the
7182                        // way to avg
7183                        new_byte = best_byte + ((avg_diff > 0) ? -1 : 1);
7184                }
7185
7186                if (rl_print) {
7187                        debug("N%d.LMC%d.R%d: START:   Byte %d: best %d is different by %d from average %d, using %d.\n",
7188                              node, if_num, rankx,
7189                              i, best_byte, avg_diff, avg_byte, new_byte);
7190                }
7191                best_byte = new_byte;
7192                count_byte = new_byte; // save the value we will count around
7193
7194                // At this point best_byte is either:
7195                // 1. the original byte-column value from the best scoring
7196                //    RODT row, OR
7197                // 2. that value bumped toward the average of all the
7198                //    byte-column values
7199                //
7200                // best_byte will not change from here on...
7201
7202                // this is the start of the BEST BYTE COUNTING LOOP
7203
7204                // NOTE: we do this next loop separately from above, because
7205                // we count relative to "best_byte"
7206                // which may have been modified by the above averaging
7207                // operation...
7208
7209                for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7210                     ++rtt_idx) {
7211                        rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7212
7213                        for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7214                             --rodt_ctl) {
7215                                union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7216                                int temp_score;
7217
7218                                for (orankx = 0; orankx < dimm_count * 4;
7219                                     orankx++) { // count over all the ranks
7220                                        // stay on the same DIMM
7221                                        if (!(dimm_rank_mask & (1 << orankx)))
7222                                                continue;
7223
7224                                        temp_score =
7225                                                rl_score[rtt_nom][rodt_ctl][orankx].score;
7226                                        // skip RODT rows in mask, or rows
7227                                        // with too high a score;
7228                                        // we will not use them for printing
7229                                        // or evaluating...
7230                                        if (((1 << rodt_ctl) &
7231                                             rodt_row_skip_mask) ||
7232                                            temp_score > max_rank_score)
7233                                                continue;
7234
7235                                        temp_rl_rank.u64 =
7236                                                rl_score[rtt_nom][rodt_ctl][orankx].setting;
7237                                        temp_byte = (temp_rl_rank.u64 >>
7238                                                     byte_sh) & RLEVEL_BYTE_MSK;
7239
7240                                        if (temp_byte == 0)
7241                                                ;  // do not count it if illegal
7242                                        else if (temp_byte == best_byte)
7243                                                count_same++;
7244                                        else if (temp_byte == best_byte - 1)
7245                                                count_less++;
7246                                        else if (temp_byte == best_byte + 1)
7247                                                count_more++;
7248                                        // else do not count anything more
7249                                        // than 1 away from the best
7250
7251                                        // no rank counting if disabled
7252                                        if (disable_rank_majority)
7253                                                continue;
7254
7255                                        // FIXME? count is relative to
7256                                        // best_byte; should it be rank-based?
7257                                        // rank counts only on main rank
7258                                        if (orankx != rankx)
7259                                                continue;
7260                                        else if (temp_byte == best_byte)
7261                                                rank_same++;
7262                                        else if (temp_byte == best_byte - 1)
7263                                                rank_less++;
7264                                        else if (temp_byte == best_byte + 1)
7265                                                rank_more++;
7266                                }
7267                        }
7268                }
7269
7270                if (rl_print) {
7271                        debug("N%d.LMC%d.R%d: COUNT:   Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
7272                              node, if_num, rankx,
7273                              i, orig_best_byte, best_byte,
7274                              count_more, count_same, count_less,
7275                              rank_more, rank_same, rank_less);
7276                }
7277
7278                // this is the end of the BEST BYTE COUNTING LOOP
7279
7280                // choose the new byte value
7281                // we need to check that there is no gap greater than 2
7282                // between adjacent bytes (adjacency depends on DIMM type)
7283                // use the neighbor value to help decide
7284                // initially, the rank_best_bytes[] will contain values from
7285                // the chosen lowest score rank
7286                new_byte = 0;
7287
7288                // neighbor is index-1 unless we are index 0 or index 8 (ECC)
7289                neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1);
7290                neigh_byte = rank_best_bytes[neighbor];
7291
7292                // can go up or down or stay the same, so look at a numeric
7293                // average to help
7294                new_byte = (int)divide_nint(((count_more * (best_byte + 1)) +
7295                                             (count_same * (best_byte + 0)) +
7296                                             (count_less * (best_byte - 1))),
7297                                            max(1, (count_more + count_same +
7298                                                    count_less)));
7299
7300                // use neighbor to help choose with average
7301                if (i > 0 && (abs(neigh_byte - new_byte) > 2) &&
7302                    !disable_sequential_delay_check) {
7303                        // but not for byte 0
7304                        int avg_pick = new_byte;
7305
7306                        if ((new_byte - best_byte) != 0) {
7307                                // back to best, average did not get better
7308                                new_byte = best_byte;
7309                        } else {
7310                                // avg was the same, still too far, now move
7311                                // it towards the neighbor
7312                                new_byte += (neigh_byte > new_byte) ? 1 : -1;
7313                        }
7314
7315                        if (rl_print) {
7316                                debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
7317                                      node, if_num, rankx,
7318                                      i, neighbor, neigh_byte, avg_pick,
7319                                      new_byte);
7320                        }
7321                } else {
7322                        // NOTE:
7323                        // For now, we let the neighbor processing above trump
7324                        // the new simple majority processing here.
7325                        // This is mostly because we have seen no smoking gun
7326                        // for a neighbor bad choice (yet?).
7327                        // Also note that we will ALWAYS be using byte 0
7328                        // majority, because of the if clause above.
7329
7330                        // majority is dependent on the counts, which are
7331                        // relative to best_byte, so start there
7332                        int maj_byte = best_byte;
7333                        int rank_maj;
7334                        int rank_sum;
7335
7336                        if (count_more > count_same &&
7337                            count_more > count_less) {
7338                                maj_byte++;
7339                        } else if (count_less > count_same &&
7340                                   count_less > count_more) {
7341                                maj_byte--;
7342                        }
7343
7344                        if (maj_byte != new_byte) {
7345                                // print only when majority choice is
7346                                // different from average
7347                                if (rl_print) {
7348                                        debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
7349                                              node, if_num, rankx, i, maj_byte,
7350                                              new_byte);
7351                                }
7352                                new_byte = maj_byte;
7353                        } else {
7354                                if (rl_print) {
7355                                        debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7356                                              node, if_num, rankx, i, new_byte);
7357                                }
7358                        }
7359
7360                        if (!disable_rank_majority) {
7361                                // rank majority is dependent on the rank
7362                                // counts, which are relative to best_byte,
7363                                // so start there, and adjust according to the
7364                                // rank counts majority
7365                                rank_maj = best_byte;
7366                                if (rank_more > rank_same &&
7367                                    rank_more > rank_less) {
7368                                        rank_maj++;
7369                                } else if (rank_less > rank_same &&
7370                                           rank_less > rank_more) {
7371                                        rank_maj--;
7372                                }
7373                                rank_sum = rank_more + rank_same + rank_less;
7374
7375                                // now, let rank majority possibly rule over
7376                                // the current new_byte however we got it
7377                                if (rank_maj != new_byte) { // only if different
7378                                        // Here is where we decide whether to
7379                                        // completely apply RANK_MAJORITY or not
7380                                        // ignore if less than
7381                                        if (rank_maj < new_byte) {
7382                                                if (rl_print) {
7383                                                        debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n",
7384                                                              node, if_num,
7385                                                              rankx, i,
7386                                                              rank_maj,
7387                                                              new_byte);
7388                                                }
7389                                        } else {
7390                                                // For the moment, we do it
7391                                                // ONLY when running 2-slot
7392                                                // configs
7393                                                //  OR when rank_sum is big
7394                                                // enough
7395                                                if (dimm_count > 1 ||
7396                                                    rank_sum > 2) {
7397                                                        // print only when rank
7398                                                        // majority choice is
7399                                                        // selected
7400                                                        if (rl_print) {
7401                                                                debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
7402                                                                      node,
7403                                                                      if_num,
7404                                                                      rankx,
7405                                                                      i,
7406                                                                      rank_maj,
7407                                                                      new_byte);
7408                                                        }
7409                                                        new_byte = rank_maj;
7410                                                } else {
7411                                                        // FIXME: print some
7412                                                        // info when we could
7413                                                        // have chosen RANKMAJ
7414                                                        // but did not
7415                                                        if (rl_print) {
7416                                                                debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
7417                                                                      node,
7418                                                                      if_num,
7419                                                                      rankx,
7420                                                                      i,
7421                                                                      rank_maj,
7422                                                                      new_byte,
7423                                                                      best_byte,
7424                                                                      rank_sum);
7425                                                        }
7426                                                }
7427                                        }
7428                                }
7429                        } /* if (!disable_rank_majority) */
7430                }
7431                // one last check:
7432                // if new_byte is still count_byte, BUT there was no count
7433                // for that value, DO SOMETHING!!!
7434                // FIXME: go back to original best byte from the best row
7435                if (new_byte == count_byte && count_same == 0) {
7436                        new_byte = orig_best_byte;
7437                        if (rl_print) {
7438                                debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
7439                                      node, if_num, rankx, i, new_byte);
7440                        }
7441                }
7442                // Look at counts for "perfect" bitmasks (PBMs) if we had
7443                // any for this byte-lane.
7444                // Remember, we only counted for DDR4, so zero means none
7445                // or DDR3, and we bypass this...
7446                value_mask = rank_perf[rankx].mask[i];
7447                disable_rlv_bump_this_byte = 0;
7448
7449                if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) {
7450                        int i, delay_count, delay_max = 0, del_val = 0;
7451                        int num_values = __builtin_popcountll(value_mask);
7452                        int sum_counts = 0;
7453                        u64 temp_mask = value_mask;
7454
7455                        disable_rlv_bump_this_byte = 1;
7456                        i = __builtin_ffsll(temp_mask) - 1;
7457                        if (rl_print)
7458                                debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ",
7459                                      node, if_num, rankx, i, value_mask >> i,
7460                                      num_values);
7461
7462                        while (temp_mask != 0) {
7463                                i = __builtin_ffsll(temp_mask) - 1;
7464                                delay_count = rank_perf[rankx].count[i][i];
7465                                sum_counts += delay_count;
7466                                if (rl_print)
7467                                        debug("%2d(%2d) ", i, delay_count);
7468                                if (delay_count >= delay_max) {
7469                                        delay_max = delay_count;
7470                                        del_val = i;
7471                                }
7472                                temp_mask &= ~(1UL << i);
7473                        } /* while (temp_mask != 0) */
7474
7475                        // if sum_counts is small, just use NEW_BYTE
7476                        if (sum_counts < pbm_lowsum_limit) {
7477                                if (rl_print)
7478                                        debug(": LOWSUM (%2d), choose ORIG ",
7479                                              sum_counts);
7480                                del_val = new_byte;
7481                                delay_max = rank_perf[rankx].count[i][del_val];
7482                        }
7483
7484                        // finish printing here...
7485                        if (rl_print) {
7486                                debug(": USING %2d (%2d) D%d\n", del_val,
7487                                      delay_max, disable_rlv_bump_this_byte);
7488                        }
7489
7490                        new_byte = del_val; // override with best PBM choice
7491
7492                } else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) {
7493                        //                        if (value_mask != 0) {
7494                        int i, delay_count, del_val;
7495                        int num_values = __builtin_popcountll(value_mask);
7496                        int sum_counts = 0;
7497                        u64 temp_mask = value_mask;
7498
7499                        i = __builtin_ffsll(temp_mask) - 1;
7500                        if (rl_print)
7501                                debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ",
7502                                      node, if_num, rankx, i, value_mask >> i,
7503                                      num_values);
7504                        while (temp_mask != 0) {
7505                                i = __builtin_ffsll(temp_mask) - 1;
7506                                delay_count = rank_perf[rankx].count[i][i];
7507                                sum_counts += delay_count;
7508                                if (rl_print)
7509                                        debug("%2d(%2d) ", i, delay_count);
7510                                temp_mask &= ~(1UL << i);
7511                        } /* while (temp_mask != 0) */
7512
7513                        del_val = __builtin_ffsll(value_mask) - 1;
7514                        delay_count =
7515                                rank_perf[rankx].count[i][del_val];
7516
7517                        // overkill, normally only 1-4 bits
7518                        i = (value_mask >> del_val) & 0x1F;
7519
7520                        // if sum_counts is small, treat as special and use
7521                        // NEW_BYTE
7522                        if (sum_counts < pbm_lowsum_limit) {
7523                                if (rl_print)
7524                                        debug(": LOWSUM (%2d), choose ORIG",
7525                                              sum_counts);
7526                                i = 99; // SPECIAL case...
7527                        }
7528
7529                        switch (i) {
7530                        case 0x01 /* 00001b */:
7531                                // allow BUMP
7532                                break;
7533
7534                        case 0x13 /* 10011b */:
7535                        case 0x0B /* 01011b */:
7536                        case 0x03 /* 00011b */:
7537                                del_val += 1; // take the second
7538                                disable_rlv_bump_this_byte = 1; // allow no BUMP
7539                                break;
7540
7541                        case 0x0D /* 01101b */:
7542                        case 0x05 /* 00101b */:
7543                                // test count of lowest and all
7544                                if (delay_count >= 5 || sum_counts <= 5)
7545                                        del_val += 1; // take the hole
7546                                else
7547                                        del_val += 2; // take the next set
7548                                disable_rlv_bump_this_byte = 1; // allow no BUMP
7549                                break;
7550
7551                        case 0x0F /* 01111b */:
7552                        case 0x17 /* 10111b */:
7553                        case 0x07 /* 00111b */:
7554                                del_val += 1; // take the second
7555                                if (delay_count < 5) { // lowest count is small
7556                                        int second =
7557                                                rank_perf[rankx].count[i][del_val];
7558                                        int third =
7559                                                rank_perf[rankx].count[i][del_val + 1];
7560                                        // test if middle is more than 1 OR
7561                                        // top is more than 1;
7562                                        // this means if they are BOTH 1,
7563                                        // then we keep the second...
7564                                        if (second > 1 || third > 1) {
7565                                                // if middle is small OR top
7566                                                // is large
7567                                                if (second < 5 ||
7568                                                    third > 1) {
7569                                                        // take the top
7570                                                        del_val += 1;
7571                                                        if (rl_print)
7572                                                                debug(": TOP7 ");
7573                                                }
7574                                        }
7575                                }
7576                                disable_rlv_bump_this_byte = 1; // allow no BUMP
7577                                break;
7578
7579                        default: // all others...
7580                                if (rl_print)
7581                                        debug(": ABNORMAL, choose ORIG");
7582
7583                        case 99: // special
7584                                 // FIXME: choose original choice?
7585                                del_val = new_byte;
7586                                disable_rlv_bump_this_byte = 1; // allow no BUMP
7587                                break;
7588                        }
7589                        delay_count =
7590                                rank_perf[rankx].count[i][del_val];
7591
7592                        // finish printing here...
7593                        if (rl_print)
7594                                debug(": USING %2d (%2d) D%d\n", del_val,
7595                                      delay_count, disable_rlv_bump_this_byte);
7596                        new_byte = del_val; // override with best PBM choice
7597                } else {
7598                        if (ddr_type == DDR4_DRAM) { // only report when DDR4
7599                                // FIXME: remove or increase VBL for this
7600                                // output...
7601                                if (rl_print)
7602                                        debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n",
7603                                              node, if_num, rankx, i,
7604                                              new_byte);
7605                                // prevent ODD bump, rely on original
7606                                disable_rlv_bump_this_byte = 1;
7607                        }
7608                } /* if (value_mask != 0) */
7609
7610                // optionally bump the delay value
7611                if (enable_rldelay_bump && !disable_rlv_bump_this_byte) {
7612                        if ((new_byte & enable_rldelay_bump) ==
7613                            enable_rldelay_bump) {
7614                                int bump_value = new_byte + rldelay_bump_incr;
7615
7616                                if (rl_print) {
7617                                        debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n",
7618                                              node, if_num, rankx, i,
7619                                              new_byte, bump_value,
7620                                              (value_mask &
7621                                               (1 << bump_value)) ?
7622                                              "PBM" : "NOPBM");
7623                                }
7624                                new_byte = bump_value;
7625                        }
7626                }
7627
7628                // last checks for count-related purposes
7629                if (new_byte == best_byte && count_more > 0 &&
7630                    count_less == 0) {
7631                        // we really should take best_byte + 1
7632                        if (rl_print) {
7633                                debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n",
7634                                      node, if_num, rankx, i,
7635                                      new_byte, best_byte + 1);
7636                                new_byte = best_byte + 1;
7637                        }
7638                } else if ((new_byte < best_byte) && (count_same > 0)) {
7639                        // we really should take best_byte
7640                        if (rl_print) {
7641                                debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n",
7642                                      node, if_num, rankx, i,
7643                                      new_byte, best_byte);
7644                                new_byte = best_byte;
7645                        }
7646                } else if (new_byte > best_byte) {
7647                        if ((new_byte == (best_byte + 1)) &&
7648                            count_more == 0 && count_less > 0) {
7649                                // we really should take best_byte
7650                                if (rl_print) {
7651                                        debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n",
7652                                              node, if_num, rankx, i,
7653                                              new_byte, best_byte);
7654                                        new_byte = best_byte;
7655                                }
7656                        } else if ((new_byte >= (best_byte + 2)) &&
7657                                   ((count_more > 0) || (count_same > 0))) {
7658                                if (rl_print) {
7659                                        debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n",
7660                                              node, if_num, rankx, i,
7661                                              new_byte, best_byte + 1);
7662                                        new_byte = best_byte + 1;
7663                                }
7664                        }
7665                }
7666
7667                if (rl_print) {
7668                        debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n",
7669                              node, if_num, rankx, i, orig_best_byte,
7670                              best_byte, count_more, count_same, count_less,
7671                              new_byte);
7672                }
7673
7674                // update the byte with the new value (NOTE: orig value in
7675                // the CSR may not be current "best")
7676                upd_rl_rank(&rl_rank, i, new_byte);
7677
7678                // save new best for neighbor use
7679                rank_best_bytes[i] = new_byte;
7680        } /* for (i = 0; i < 8+ecc_ena; i++) */
7681
7682        ////////////////// this is the end of the BEST BYTE LOOP
7683
7684        if (saved_rl_rank.u64 != rl_rank.u64) {
7685                lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
7686                       rl_rank.u64);
7687                rl_rank.u64 = lmc_rd(priv,
7688                                     CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7689                debug("Adjusting Read-Leveling per-RANK settings.\n");
7690        } else {
7691                debug("Not Adjusting Read-Leveling per-RANK settings.\n");
7692        }
7693        display_rl_with_final(if_num, rl_rank, rankx);
7694
7695        // FIXME: does this help make the output a little easier to focus?
7696        if (rl_print > 0)
7697                debug("-----------\n");
7698
7699#define RLEVEL_RANKX_EXTRAS_INCR  0
7700        // if there are unused entries to be filled
7701        if ((rank_mask & 0x0f) != 0x0f) {
7702                // copy the current rank
7703                union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank;
7704
7705                if (rankx < 3) {
7706#if RLEVEL_RANKX_EXTRAS_INCR > 0
7707                        int byte, delay;
7708
7709                        // modify the copy in prep for writing to empty slot(s)
7710                        for (byte = 0; byte < 9; byte++) {
7711                                delay = get_rl_rank(&temp_rl_rank, byte) +
7712                                        RLEVEL_RANKX_EXTRAS_INCR;
7713                                if (delay > RLEVEL_BYTE_MSK)
7714                                        delay = RLEVEL_BYTE_MSK;
7715                                upd_rl_rank(&temp_rl_rank, byte, delay);
7716                        }
7717#endif
7718
7719                        // if rank 0, write rank 1 and rank 2 here if empty
7720                        if (rankx == 0) {
7721                                // check that rank 1 is empty
7722                                if (!(rank_mask & (1 << 1))) {
7723                                        debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7724                                              node, if_num, rankx, 1);
7725                                        lmc_wr(priv,
7726                                               CVMX_LMCX_RLEVEL_RANKX(1,
7727                                                                      if_num),
7728                                               temp_rl_rank.u64);
7729                                }
7730
7731                                // check that rank 2 is empty
7732                                if (!(rank_mask & (1 << 2))) {
7733                                        debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7734                                              node, if_num, rankx, 2);
7735                                        lmc_wr(priv,
7736                                               CVMX_LMCX_RLEVEL_RANKX(2,
7737                                                                      if_num),
7738                                               temp_rl_rank.u64);
7739                                }
7740                        }
7741
7742                        // if ranks 0, 1 or 2, write rank 3 here if empty
7743                        // check that rank 3 is empty
7744                        if (!(rank_mask & (1 << 3))) {
7745                                debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7746                                      node, if_num, rankx, 3);
7747                                lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num),
7748                                       temp_rl_rank.u64);
7749                        }
7750                }
7751        }
7752}
7753
7754static void lmc_read_leveling(struct ddr_priv *priv)
7755{
7756        struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
7757        union cvmx_lmcx_control ctl;
7758        union cvmx_lmcx_config cfg;
7759        int rankx;
7760        char *s;
7761        int i;
7762
7763        /*
7764         * 4.8.10 LMC Read Leveling
7765         *
7766         * LMC supports an automatic read-leveling separately per byte-lane
7767         * using the DDR3 multipurpose register predefined pattern for system
7768         * calibration defined in the JEDEC DDR3 specifications.
7769         *
7770         * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
7771         * must be completed prior to starting this LMC read-leveling sequence.
7772         *
7773         * Software could simply write the desired read-leveling values into
7774         * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
7775         * LMC's autoread-leveling capabilities.
7776         *
7777         * When LMC does the read-leveling sequence for a rank, it first enables
7778         * the DDR3 multipurpose register predefined pattern for system
7779         * calibration on the selected DRAM rank via a DDR3 MR3 write, then
7780         * executes 64 RD operations at different internal delay settings, then
7781         * disables the predefined pattern via another DDR3 MR3 write
7782         * operation. LMC determines the pass or fail of each of the 64 settings
7783         * independently for each byte lane, then writes appropriate
7784         * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
7785         *
7786         * After read-leveling for a rank, software can read the 64 pass/fail
7787         * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK].
7788         * Software can observe all pass/fail results for all byte lanes in a
7789         * rank via separate read-leveling sequences on the rank with different
7790         * LMC(0)_RLEVEL_CTL[BYTE] values.
7791         *
7792         * The 64 pass/fail results will typically have failures for the low
7793         * delays, followed by a run of some passing settings, followed by more
7794         * failures in the remaining high delays.  LMC sets
7795         * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
7796         * First, LMC selects the longest run of successes in the 64 results.
7797         * (In the unlikely event that there is more than one longest run, LMC
7798         * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
7799         * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
7800         * LMC selects the last passing setting in the run minus
7801         * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting
7802         * in the run (rounding earlier when necessary). We expect the
7803         * read-leveling sequence to produce good results with the reset values
7804         * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
7805         *
7806         * The read-leveling sequence has the following steps:
7807         *
7808         * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
7809         *    Do the remaining substeps 2-4 separately for each rank i with
7810         *    attached DRAM.
7811         *
7812         * 2. Without changing any other fields in LMC(0)_CONFIG,
7813         *
7814         *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
7815         *
7816         *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
7817         *
7818         *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
7819         *
7820         *    This initiates the previously-described read-leveling.
7821         *
7822         * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
7823         *
7824         *    LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte
7825         *    lanes at this point.
7826         *
7827         *    If ECC DRAM is not present (i.e. when DRAM is not attached to the
7828         *    DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
7829         *    DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
7830         *    LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
7831         *    LMC(0)_RLEVEL_RANK*[BYTE0].
7832         *
7833         * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
7834         *    LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
7835         *    LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify
7836         *    LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all
7837         *    BITMASKs can be observed.
7838         *
7839         * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
7840         *
7841         *    Let rank i be a rank with attached DRAM.
7842         *
7843         *    For all ranks j that do not have attached DRAM, set
7844         *    LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
7845         *
7846         * This read-leveling sequence can help select the proper CN70XX ODT
7847         * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
7848         * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
7849         * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
7850         * (for a used byte lane k) can indicate that the CN70XX ODT value is
7851         * bad. It is possible to simultaneously optimize both
7852         * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
7853         * performing this read-leveling sequence for several
7854         * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the
7855         * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
7856         */
7857
7858        rl_rodt_err = 0;
7859        rl_dbg_loops = 1;
7860        saved_int_zqcs_dis = 0;
7861        max_adj_rl_del_inc = 0;
7862        rl_print = RLEVEL_PRINTALL_DEFAULT;
7863
7864#ifdef ENABLE_HARDCODED_RLEVEL
7865        part_number[21] = {0};
7866#endif /* ENABLE_HARDCODED_RLEVEL */
7867
7868        pbm_lowsum_limit = 5; // FIXME: is this a good default?
7869        // FIXME: PBM skip for RODT 240 and 34
7870        pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) |
7871                (1U << ddr4_rodt_ctl_34_ohm);
7872
7873        disable_rank_majority = 0; // control rank majority processing
7874
7875        // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
7876        // for DDR3
7877        rldelay_bump_incr = 0;
7878        disable_rlv_bump_this_byte = 0;
7879
7880        enable_rldelay_bump = (ddr_type == DDR4_DRAM) ?
7881                ((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0;
7882
7883        s = lookup_env(priv, "ddr_disable_rank_majority");
7884        if (s)
7885                disable_rank_majority = !!simple_strtoul(s, NULL, 0);
7886
7887        s = lookup_env(priv, "ddr_pbm_lowsum_limit");
7888        if (s)
7889                pbm_lowsum_limit = simple_strtoul(s, NULL, 0);
7890
7891        s = lookup_env(priv, "ddr_pbm_rodt_skip");
7892        if (s)
7893                pbm_rodt_skip = simple_strtoul(s, NULL, 0);
7894        memset(rank_perf, 0, sizeof(rank_perf));
7895
7896        ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
7897        save_ddr2t = ctl.cn78xx.ddr2t;
7898
7899        cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
7900        ecc_ena = cfg.cn78xx.ecc_ena;
7901
7902        s = lookup_env(priv, "ddr_rlevel_2t");
7903        if (s)
7904                ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0);
7905
7906        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
7907
7908        debug("LMC%d: Performing Read-Leveling\n", if_num);
7909
7910        rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
7911
7912        rl_samples = c_cfg->rlevel_average_loops;
7913        if (rl_samples == 0) {
7914                rl_samples = RLEVEL_SAMPLES_DEFAULT;
7915                // up the samples for these cases
7916                if (dimm_count == 1 || num_ranks == 1)
7917                        rl_samples = rl_samples * 2 + 1;
7918        }
7919
7920        rl_compute = c_cfg->rlevel_compute;
7921        rl_ctl.cn78xx.offset_en = c_cfg->offset_en;
7922        rl_ctl.cn78xx.offset    = spd_rdimm
7923                ? c_cfg->offset_rdimm
7924                : c_cfg->offset_udimm;
7925
7926        int value = 1; // should ALWAYS be set
7927
7928        s = lookup_env(priv, "ddr_rlevel_delay_unload");
7929        if (s)
7930                value = !!simple_strtoul(s, NULL, 0);
7931        rl_ctl.cn78xx.delay_unload_0 = value;
7932        rl_ctl.cn78xx.delay_unload_1 = value;
7933        rl_ctl.cn78xx.delay_unload_2 = value;
7934        rl_ctl.cn78xx.delay_unload_3 = value;
7935
7936        // use OR_DIS=1 to try for better results
7937        rl_ctl.cn78xx.or_dis = 1;
7938
7939        /*
7940         * If we will be switching to 32bit mode level based on only
7941         * four bits because there are only 4 ECC bits.
7942         */
7943        rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F;
7944
7945        // allow overrides
7946        s = lookup_env(priv, "ddr_rlevel_ctl_or_dis");
7947        if (s)
7948                rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0);
7949
7950        s = lookup_env(priv, "ddr_rlevel_ctl_bitmask");
7951        if (s)
7952                rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0);
7953
7954        rl_comp_offs = spd_rdimm
7955                ? c_cfg->rlevel_comp_offset_rdimm
7956                : c_cfg->rlevel_comp_offset_udimm;
7957        s = lookup_env(priv, "ddr_rlevel_comp_offset");
7958        if (s)
7959                rl_comp_offs = strtoul(s, NULL, 0);
7960
7961        s = lookup_env(priv, "ddr_rlevel_offset");
7962        if (s)
7963                rl_ctl.cn78xx.offset   = simple_strtoul(s, NULL, 0);
7964
7965        s = lookup_env(priv, "ddr_rlevel_offset_en");
7966        if (s)
7967                rl_ctl.cn78xx.offset_en   = simple_strtoul(s, NULL, 0);
7968
7969        s = lookup_env(priv, "ddr_rlevel_ctl");
7970        if (s)
7971                rl_ctl.u64   = simple_strtoul(s, NULL, 0);
7972
7973        lmc_wr(priv,
7974               CVMX_LMCX_RLEVEL_CTL(if_num),
7975               rl_ctl.u64);
7976
7977        // do this here so we can look at final RLEVEL_CTL[offset] setting...
7978        s = lookup_env(priv, "ddr_enable_rldelay_bump");
7979        if (s) {
7980                // also use as mask bits
7981                enable_rldelay_bump = strtoul(s, NULL, 0);
7982        }
7983
7984        if (enable_rldelay_bump != 0)
7985                rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1;
7986
7987        s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num);
7988        if (s)
7989                rl_dbg_loops = simple_strtoul(s, NULL, 0);
7990
7991        s = lookup_env(priv, "ddr_rtt_nom_auto");
7992        if (s)
7993                ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0);
7994
7995        s = lookup_env(priv, "ddr_rlevel_average");
7996        if (s)
7997                rl_samples = simple_strtoul(s, NULL, 0);
7998
7999        s = lookup_env(priv, "ddr_rlevel_compute");
8000        if (s)
8001                rl_compute = simple_strtoul(s, NULL, 0);
8002
8003        s = lookup_env(priv, "ddr_rlevel_printall");
8004        if (s)
8005                rl_print = simple_strtoul(s, NULL, 0);
8006
8007        debug("RLEVEL_CTL                                    : 0x%016llx\n",
8008              rl_ctl.u64);
8009        debug("RLEVEL_OFFSET                                 : %6d\n",
8010              rl_ctl.cn78xx.offset);
8011        debug("RLEVEL_OFFSET_EN                              : %6d\n",
8012              rl_ctl.cn78xx.offset_en);
8013
8014        /*
8015         * The purpose for the indexed table is to sort the settings
8016         * by the ohm value to simplify the testing when incrementing
8017         * through the settings.  (index => ohms) 1=120, 2=60, 3=40,
8018         * 4=30, 5=20
8019         */
8020        min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ?
8021                1 : c_cfg->min_rtt_nom_idx;
8022        max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ?
8023                5 : c_cfg->max_rtt_nom_idx;
8024
8025        min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl;
8026        max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl;
8027
8028        s = lookup_env(priv, "ddr_min_rodt_ctl");
8029        if (s)
8030                min_rodt_ctl = simple_strtoul(s, NULL, 0);
8031
8032        s = lookup_env(priv, "ddr_max_rodt_ctl");
8033        if (s)
8034                max_rodt_ctl = simple_strtoul(s, NULL, 0);
8035
8036        s = lookup_env(priv, "ddr_min_rtt_nom_idx");
8037        if (s)
8038                min_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8039
8040        s = lookup_env(priv, "ddr_max_rtt_nom_idx");
8041        if (s)
8042                max_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8043
8044#ifdef ENABLE_HARDCODED_RLEVEL
8045        if (c_cfg->rl_tbl) {
8046                /* Check for hard-coded read-leveling settings */
8047                get_dimm_part_number(part_number, &dimm_config_table[0],
8048                                     0, ddr_type);
8049                for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8050                        if (!(rank_mask & (1 << rankx)))
8051                                continue;
8052
8053                        rl_rank.u64 = lmc_rd(priv,
8054                                             CVMX_LMCX_RLEVEL_RANKX(rankx,
8055                                                                    if_num));
8056
8057                        i = 0;
8058                        while (c_cfg->rl_tbl[i].part) {
8059                                debug("DIMM part number:\"%s\", SPD: \"%s\"\n",
8060                                      c_cfg->rl_tbl[i].part, part_number);
8061                                if ((strcmp(part_number,
8062                                            c_cfg->rl_tbl[i].part) == 0) &&
8063                                    (abs(c_cfg->rl_tbl[i].speed -
8064                                         2 * ddr_hertz / (1000 * 1000)) < 10)) {
8065                                        debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n",
8066                                              part_number);
8067                                        rl_rank.u64 =
8068                                                c_cfg->rl_tbl[i].rl_rank[if_num][rankx];
8069                                        lmc_wr(priv,
8070                                               CVMX_LMCX_RLEVEL_RANKX(rankx,
8071                                                                      if_num),
8072                                               rl_rank.u64);
8073                                        rl_rank.u64 =
8074                                                lmc_rd(priv,
8075                                                       CVMX_LMCX_RLEVEL_RANKX(rankx,
8076                                                                              if_num));
8077                                        display_rl(if_num, rl_rank, rankx);
8078                                        /* Disable h/w read-leveling */
8079                                        rl_dbg_loops = 0;
8080                                        break;
8081                                }
8082                                ++i;
8083                        }
8084                }
8085        }
8086#endif /* ENABLE_HARDCODED_RLEVEL */
8087
8088        max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment;
8089        s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment");
8090        if (s)
8091                max_adj_rl_del_inc = strtoul(s, NULL, 0);
8092
8093        while (rl_dbg_loops--) {
8094                union cvmx_lmcx_modereg_params1 mp1;
8095                union cvmx_lmcx_comp_ctl2 cc2;
8096
8097                /* Initialize the error scoreboard */
8098                memset(rl_score, 0, sizeof(rl_score));
8099
8100                cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8101                saved_ddr__ptune = cc2.cn78xx.ddr__ptune;
8102                saved_ddr__ntune = cc2.cn78xx.ddr__ntune;
8103
8104                /* Disable dynamic compensation settings */
8105                if (rl_comp_offs != 0) {
8106                        cc2.cn78xx.ptune = saved_ddr__ptune;
8107                        cc2.cn78xx.ntune = saved_ddr__ntune;
8108
8109                        /*
8110                         * Round up the ptune calculation to bias the odd
8111                         * cases toward ptune
8112                         */
8113                        cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2);
8114                        cc2.cn78xx.ntune -= rl_comp_offs / 2;
8115
8116                        ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8117                        saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8118                        /* Disable ZQCS while in bypass. */
8119                        ctl.s.int_zqcs_dis = 1;
8120                        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8121
8122                        cc2.cn78xx.byp = 1; /* Enable bypass mode */
8123                        lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8124                        lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8125                        /* Read again */
8126                        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8127                        debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
8128                              cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8129                }
8130
8131                mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
8132
8133                for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
8134                     ++rtt_idx) {
8135                        rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8136
8137                        /*
8138                         * When the read ODT mask is zero the dyn_rtt_nom_mask
8139                         * is zero than RTT_NOM will not be changing during
8140                         * read-leveling.  Since the value is fixed we only need
8141                         * to test it once.
8142                         */
8143                        if (dyn_rtt_nom_mask == 0) {
8144                                // flag not to print NOM ohms
8145                                print_nom_ohms = -1;
8146                        } else {
8147                                if (dyn_rtt_nom_mask & 1)
8148                                        mp1.s.rtt_nom_00 = rtt_nom;
8149                                if (dyn_rtt_nom_mask & 2)
8150                                        mp1.s.rtt_nom_01 = rtt_nom;
8151                                if (dyn_rtt_nom_mask & 4)
8152                                        mp1.s.rtt_nom_10 = rtt_nom;
8153                                if (dyn_rtt_nom_mask & 8)
8154                                        mp1.s.rtt_nom_11 = rtt_nom;
8155                                // FIXME? rank 0 ohms always?
8156                                print_nom_ohms =
8157                                        imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00];
8158                        }
8159
8160                        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8161                               mp1.u64);
8162
8163                        if (print_nom_ohms >= 0 && rl_print > 1) {
8164                                debug("\n");
8165                                debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8166                                      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8167                                      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8168                                      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8169                                      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8170                                      mp1.s.rtt_nom_11,
8171                                      mp1.s.rtt_nom_10,
8172                                      mp1.s.rtt_nom_01,
8173                                      mp1.s.rtt_nom_00);
8174                        }
8175
8176                        ddr_init_seq(priv, rank_mask, if_num);
8177
8178                        // Try RANK outside RODT to rearrange the output...
8179                        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8180                                if (!(rank_mask & (1 << rankx)))
8181                                        continue;
8182
8183                                for (rodt_ctl = max_rodt_ctl;
8184                                     rodt_ctl >= min_rodt_ctl; --rodt_ctl)
8185                                        rodt_loop(priv, rankx, rl_score);
8186                        }
8187                }
8188
8189                /* Re-enable dynamic compensation settings. */
8190                if (rl_comp_offs != 0) {
8191                        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8192
8193                        cc2.cn78xx.ptune = 0;
8194                        cc2.cn78xx.ntune = 0;
8195                        cc2.cn78xx.byp = 0; /* Disable bypass mode */
8196                        lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8197                        /* Read once */
8198                        lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8199
8200                        /* Read again */
8201                        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8202                        debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
8203                              cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8204
8205                        ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8206                        /* Restore original setting */
8207                        ctl.s.int_zqcs_dis = saved_int_zqcs_dis;
8208                        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8209                }
8210
8211                int override_compensation = 0;
8212
8213                s = lookup_env(priv, "ddr__ptune");
8214                if (s)
8215                        saved_ddr__ptune = strtoul(s, NULL, 0);
8216
8217                s = lookup_env(priv, "ddr__ntune");
8218                if (s) {
8219                        saved_ddr__ntune = strtoul(s, NULL, 0);
8220                        override_compensation = 1;
8221                }
8222
8223                if (override_compensation) {
8224                        cc2.cn78xx.ptune = saved_ddr__ptune;
8225                        cc2.cn78xx.ntune = saved_ddr__ntune;
8226
8227                        ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8228                        saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8229                        /* Disable ZQCS while in bypass. */
8230                        ctl.s.int_zqcs_dis = 1;
8231                        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8232
8233                        cc2.cn78xx.byp = 1; /* Enable bypass mode */
8234                        lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8235                        /* Read again */
8236                        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8237
8238                        debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
8239                              cc2.cn78xx.ptune, cc2.cn78xx.ntune);
8240                }
8241
8242                /* Evaluation block */
8243                /* Still at initial value? */
8244                int best_rodt_score = DEFAULT_BEST_RANK_SCORE;
8245                int auto_rodt_ctl = 0;
8246                int auto_rtt_nom  = 0;
8247                int rodt_score;
8248
8249                rodt_row_skip_mask = 0;
8250
8251                // just add specific RODT rows to the skip mask for DDR4
8252                // at this time...
8253                if (ddr_type == DDR4_DRAM) {
8254                        // skip RODT row 34 ohms for all DDR4 types
8255                        rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm);
8256                        // skip RODT row 40 ohms for all DDR4 types
8257                        rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm);
8258                        // For now, do not skip RODT row 40 or 48 ohm when
8259                        // ddr_hertz is above 1075 MHz
8260                        if (ddr_hertz > 1075000000) {
8261                                // noskip RODT row 40 ohms
8262                                rodt_row_skip_mask &=
8263                                        ~(1 << ddr4_rodt_ctl_40_ohm);
8264                                // noskip RODT row 48 ohms
8265                                rodt_row_skip_mask &=
8266                                        ~(1 << ddr4_rodt_ctl_48_ohm);
8267                        }
8268                        // For now, do not skip RODT row 48 ohm for 2Rx4
8269                        // stacked die DIMMs
8270                        if (is_stacked_die && num_ranks == 2 &&
8271                            dram_width == 4) {
8272                                // noskip RODT row 48 ohms
8273                                rodt_row_skip_mask &=
8274                                        ~(1 << ddr4_rodt_ctl_48_ohm);
8275                        }
8276                        // for now, leave all rows eligible when we have
8277                        // mini-DIMMs...
8278                        if (spd_dimm_type == 5 || spd_dimm_type == 6)
8279                                rodt_row_skip_mask = 0;
8280                        // for now, leave all rows eligible when we have
8281                        // a 2-slot 1-rank config
8282                        if (dimm_count == 2 && num_ranks == 1)
8283                                rodt_row_skip_mask = 0;
8284
8285                        debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
8286                        for (rtt_idx = min_rtt_nom_idx;
8287                             rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
8288                                rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8289
8290                                for (rodt_ctl = max_rodt_ctl;
8291                                     rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
8292                                        rodt_score = 0;
8293                                        for (rankx = 0; rankx < dimm_count * 4;
8294                                             rankx++) {
8295                                                if (!(rank_mask & (1 << rankx)))
8296                                                        continue;
8297
8298                                                debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
8299                                                      rtt_nom, rodt_ctl, rankx,
8300                                                      rl_score[rtt_nom][rodt_ctl][rankx].score);
8301                                                rodt_score +=
8302                                                        rl_score[rtt_nom][rodt_ctl][rankx].score;
8303                                        }
8304                                        // FIXME: do we need to skip RODT rows
8305                                        // here, like we do below in the
8306                                        // by-RANK settings?
8307
8308                                        /*
8309                                         * When using automatic ODT settings use
8310                                         * the ODT settings associated with the
8311                                         * best score for all of the tested ODT
8312                                         * combinations.
8313                                         */
8314
8315                                        if (rodt_score < best_rodt_score ||
8316                                            (rodt_score == best_rodt_score &&
8317                                             (imp_val->rodt_ohms[rodt_ctl] >
8318                                              imp_val->rodt_ohms[auto_rodt_ctl]))) {
8319                                                debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n",
8320                                                      rodt_ctl,
8321                                                      imp_val->rodt_ohms[rodt_ctl],
8322                                                      rodt_score,
8323                                                      best_rodt_score);
8324                                                best_rodt_score = rodt_score;
8325                                                auto_rodt_ctl   = rodt_ctl;
8326                                                auto_rtt_nom    = rtt_nom;
8327                                        }
8328                                }
8329                        }
8330
8331                        mp1.u64 = lmc_rd(priv,
8332                                         CVMX_LMCX_MODEREG_PARAMS1(if_num));
8333
8334                        if (ddr_rtt_nom_auto) {
8335                                /* Store the automatically set RTT_NOM value */
8336                                if (dyn_rtt_nom_mask & 1)
8337                                        mp1.s.rtt_nom_00 = auto_rtt_nom;
8338                                if (dyn_rtt_nom_mask & 2)
8339                                        mp1.s.rtt_nom_01 = auto_rtt_nom;
8340                                if (dyn_rtt_nom_mask & 4)
8341                                        mp1.s.rtt_nom_10 = auto_rtt_nom;
8342                                if (dyn_rtt_nom_mask & 8)
8343                                        mp1.s.rtt_nom_11 = auto_rtt_nom;
8344                        } else {
8345                                /*
8346                                 * restore the manual settings to the register
8347                                 */
8348                                mp1.s.rtt_nom_00 = default_rtt_nom[0];
8349                                mp1.s.rtt_nom_01 = default_rtt_nom[1];
8350                                mp1.s.rtt_nom_10 = default_rtt_nom[2];
8351                                mp1.s.rtt_nom_11 = default_rtt_nom[3];
8352                        }
8353
8354                        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8355                               mp1.u64);
8356                        debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8357                              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8358                              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8359                              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8360                              imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8361                              mp1.s.rtt_nom_11,
8362                              mp1.s.rtt_nom_10,
8363                              mp1.s.rtt_nom_01,
8364                              mp1.s.rtt_nom_00);
8365
8366                        debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8367                              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
8368                              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
8369                              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
8370                              imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
8371                              extr_wr(mp1.u64, 3),
8372                              extr_wr(mp1.u64, 2),
8373                              extr_wr(mp1.u64, 1),
8374                              extr_wr(mp1.u64, 0));
8375
8376                        debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8377                              imp_val->dic_ohms[mp1.s.dic_11],
8378                              imp_val->dic_ohms[mp1.s.dic_10],
8379                              imp_val->dic_ohms[mp1.s.dic_01],
8380                              imp_val->dic_ohms[mp1.s.dic_00],
8381                              mp1.s.dic_11,
8382                              mp1.s.dic_10,
8383                              mp1.s.dic_01,
8384                              mp1.s.dic_00);
8385
8386                        if (ddr_type == DDR4_DRAM) {
8387                                union cvmx_lmcx_modereg_params2 mp2;
8388                                /*
8389                                 * We must read the CSR, and not depend on
8390                                 * odt_config[odt_idx].odt_mask2, since we could
8391                                 * have overridden values with envvars.
8392                                 * NOTE: this corrects the printout, since the
8393                                 * CSR is not written with the old values...
8394                                 */
8395                                mp2.u64 = lmc_rd(priv,
8396                                                 CVMX_LMCX_MODEREG_PARAMS2(if_num));
8397
8398                                debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8399                                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
8400                                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
8401                                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
8402                                      imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
8403                                      mp2.s.rtt_park_11,
8404                                      mp2.s.rtt_park_10,
8405                                      mp2.s.rtt_park_01,
8406                                      mp2.s.rtt_park_00);
8407
8408                                debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
8409                                      "VREF_RANGE",
8410                                      mp2.s.vref_range_11,
8411                                      mp2.s.vref_range_10,
8412                                      mp2.s.vref_range_01,
8413                                      mp2.s.vref_range_00);
8414
8415                                debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
8416                                      "VREF_VALUE",
8417                                      mp2.s.vref_value_11,
8418                                      mp2.s.vref_value_10,
8419                                      mp2.s.vref_value_01,
8420                                      mp2.s.vref_value_00);
8421                        }
8422
8423                        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8424                        if (ddr_rodt_ctl_auto) {
8425                                cc2.cn78xx.rodt_ctl = auto_rodt_ctl;
8426                        } else {
8427                                // back to the original setting
8428                                cc2.cn78xx.rodt_ctl = default_rodt_ctl;
8429                        }
8430                        lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8431                        cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8432                        debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
8433                              cc2.cn78xx.rodt_ctl,
8434                              imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
8435
8436                        /*
8437                         * Use the delays associated with the best score for
8438                         * each individual rank
8439                         */
8440                        debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n");
8441
8442                        // this is the the RANK MAJOR LOOP
8443                        for (rankx = 0; rankx < dimm_count * 4; rankx++)
8444                                rank_major_loop(priv, rankx, rl_score);
8445                }  /* Evaluation block */
8446        } /* while(rl_dbg_loops--) */
8447
8448        ctl.cn78xx.ddr2t = save_ddr2t;
8449        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8450        ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8451        /* Display final 2T value */
8452        debug("DDR2T                                         : %6d\n",
8453              ctl.cn78xx.ddr2t);
8454
8455        ddr_init_seq(priv, rank_mask, if_num);
8456
8457        for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8458                u64 value;
8459                int parameter_set = 0;
8460
8461                if (!(rank_mask & (1 << rankx)))
8462                        continue;
8463
8464                rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
8465                                                                  if_num));
8466
8467                for (i = 0; i < 9; ++i) {
8468                        s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d",
8469                                       if_num, rankx, i);
8470                        if (s) {
8471                                parameter_set |= 1;
8472                                value = simple_strtoul(s, NULL, 0);
8473
8474                                upd_rl_rank(&rl_rank, i, value);
8475                        }
8476                }
8477
8478                s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx);
8479                if (s) {
8480                        parameter_set |= 1;
8481                        value = simple_strtoull(s, NULL, 0);
8482                        rl_rank.u64 = value;
8483                }
8484
8485                if (parameter_set) {
8486                        lmc_wr(priv,
8487                               CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
8488                               rl_rank.u64);
8489                        rl_rank.u64 = lmc_rd(priv,
8490                                             CVMX_LMCX_RLEVEL_RANKX(rankx,
8491                                                                    if_num));
8492                        display_rl(if_num, rl_rank, rankx);
8493                }
8494        }
8495}
8496
8497int init_octeon3_ddr3_interface(struct ddr_priv *priv,
8498                                struct ddr_conf *_ddr_conf, u32 _ddr_hertz,
8499                                u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num,
8500                                u32 _if_mask)
8501{
8502        union cvmx_lmcx_control ctrl;
8503        int ret;
8504        char *s;
8505        int i;
8506
8507        if_num = _if_num;
8508        ddr_hertz = _ddr_hertz;
8509        ddr_conf = _ddr_conf;
8510        if_mask = _if_mask;
8511        odt_1rank_config = ddr_conf->odt_1rank_config;
8512        odt_2rank_config = ddr_conf->odt_2rank_config;
8513        odt_4rank_config = ddr_conf->odt_4rank_config;
8514        dimm_config_table = ddr_conf->dimm_config_table;
8515        c_cfg = &ddr_conf->custom_lmc_config;
8516
8517        /*
8518         * Compute clock rates to the nearest picosecond.
8519         */
8520        tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
8521        eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
8522
8523        dimm_count = 0;
8524        /* Accumulate and report all the errors before giving up */
8525        fatal_error = 0;
8526
8527        /* Flag that indicates safe DDR settings should be used */
8528        safe_ddr_flag = 0;
8529        if_64b = 1;             /* Octeon II Default: 64bit interface width */
8530        mem_size_mbytes = 0;
8531        bank_bits = 0;
8532        column_bits_start = 1;
8533        use_ecc = 1;
8534        min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
8535        spd_package = 0;
8536        spd_rawcard = 0;
8537        spd_rawcard_aorb = 0;
8538        spd_rdimm_registers = 0;
8539        is_stacked_die = 0;
8540        is_3ds_dimm = 0;        // 3DS
8541        lranks_per_prank = 1;   // 3DS: logical ranks per package rank
8542        lranks_bits = 0;        // 3DS: logical ranks bits
8543        die_capacity = 0;       // in Mbits; only used for 3DS
8544
8545        wl_mask_err = 0;
8546        dyn_rtt_nom_mask = 0;
8547        ddr_disable_chip_reset = 1;
8548        match_wl_rtt_nom = 0;
8549
8550        internal_retries = 0;
8551
8552        disable_deskew_training = 0;
8553        restart_if_dsk_incomplete = 0;
8554        last_lane = ((if_64b) ? 8 : 4) + use_ecc;
8555
8556        disable_sequential_delay_check = 0;
8557        wl_print = WLEVEL_PRINTALL_DEFAULT;
8558
8559        enable_by_rank_init = 1;        // FIXME: default by-rank ON
8560        saved_rank_mask = 0;
8561
8562        node = 0;
8563
8564        memset(hwl_alts, 0, sizeof(hwl_alts));
8565
8566        /*
8567         * Initialize these to shut up the compiler. They are configured
8568         * and used only for DDR4
8569         */
8570        ddr4_trrd_lmin = 6000;
8571        ddr4_tccd_lmin = 6000;
8572
8573        debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n",
8574              node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid());
8575
8576        if (dimm_config_table[0].spd_addrs[0] == 0 &&
8577            !dimm_config_table[0].spd_ptrs[0]) {
8578                printf("ERROR: No dimms specified in the dimm_config_table.\n");
8579                return -1;
8580        }
8581
8582        // allow some overrides to be done
8583
8584        // this one controls several things related to DIMM geometry: HWL and RL
8585        disable_sequential_delay_check = c_cfg->disable_sequential_delay_check;
8586        s = lookup_env(priv, "ddr_disable_sequential_delay_check");
8587        if (s)
8588                disable_sequential_delay_check = strtoul(s, NULL, 0);
8589
8590        // this one controls whether chip RESET is done, or LMC init restarted
8591        // from step 6.9.6
8592        s = lookup_env(priv, "ddr_disable_chip_reset");
8593        if (s)
8594                ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
8595
8596        // this one controls whether Deskew Training is performed
8597        s = lookup_env(priv, "ddr_disable_deskew_training");
8598        if (s)
8599                disable_deskew_training = !!strtoul(s, NULL, 0);
8600
8601        if (ddr_verbose(priv)) {
8602                printf("DDR SPD Table:");
8603                for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8604                        if (dimm_config_table[didx].spd_addrs[0] == 0)
8605                                break;
8606
8607                        printf(" --ddr%dspd=0x%02x", if_num,
8608                               dimm_config_table[didx].spd_addrs[0]);
8609                        if (dimm_config_table[didx].spd_addrs[1] != 0)
8610                                printf(",0x%02x",
8611                                       dimm_config_table[didx].spd_addrs[1]);
8612                }
8613                printf("\n");
8614        }
8615
8616        /*
8617         * Walk the DRAM Socket Configuration Table to see what is installed.
8618         */
8619        for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8620                /* Check for lower DIMM socket populated */
8621                if (validate_dimm(priv, &dimm_config_table[didx], 0)) {
8622                        if (ddr_verbose(priv))
8623                                report_dimm(&dimm_config_table[didx], 0,
8624                                            dimm_count, if_num);
8625                        ++dimm_count;
8626                } else {
8627                        break;
8628                }               /* Finished when there is no lower DIMM */
8629        }
8630
8631        initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz,
8632                             ddr_ref_hertz, if_num, if_mask);
8633
8634        if (!odt_1rank_config)
8635                odt_1rank_config = disable_odt_config;
8636        if (!odt_2rank_config)
8637                odt_2rank_config = disable_odt_config;
8638        if (!odt_4rank_config)
8639                odt_4rank_config = disable_odt_config;
8640
8641        s = env_get("ddr_safe");
8642        if (s) {
8643                safe_ddr_flag = !!simple_strtoul(s, NULL, 0);
8644                printf("Parameter found in environment. ddr_safe = %d\n",
8645                       safe_ddr_flag);
8646        }
8647
8648        if (dimm_count == 0) {
8649                printf("ERROR: DIMM 0 not detected.\n");
8650                return (-1);
8651        }
8652
8653        if (c_cfg->mode32b)
8654                if_64b = 0;
8655
8656        s = lookup_env(priv, "if_64b");
8657        if (s)
8658                if_64b = !!simple_strtoul(s, NULL, 0);
8659
8660        if (if_64b == 1) {
8661                if (octeon_is_cpuid(OCTEON_CN70XX)) {
8662                        printf("64-bit interface width is not supported for this Octeon model\n");
8663                        ++fatal_error;
8664                }
8665        }
8666
8667        /* ddr_type only indicates DDR4 or DDR3 */
8668        ddr_type = (read_spd(&dimm_config_table[0], 0,
8669                             DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3;
8670        debug("DRAM Device Type: DDR%d\n", ddr_type);
8671
8672        if (ddr_type == DDR4_DRAM) {
8673                int spd_module_type;
8674                int asymmetric;
8675                const char *signal_load[4] = { "", "MLS", "3DS", "RSV" };
8676
8677                imp_val = &ddr4_impedence_val;
8678
8679                spd_addr =
8680                    read_spd(&dimm_config_table[0], 0,
8681                             DDR4_SPD_ADDRESSING_ROW_COL_BITS);
8682                spd_org =
8683                    read_spd(&dimm_config_table[0], 0,
8684                             DDR4_SPD_MODULE_ORGANIZATION);
8685                spd_banks =
8686                    0xFF & read_spd(&dimm_config_table[0], 0,
8687                                    DDR4_SPD_DENSITY_BANKS);
8688
8689                bank_bits =
8690                    (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
8691                /* Controller can only address 4 bits. */
8692                bank_bits = min((int)bank_bits, 4);
8693
8694                spd_package =
8695                    0XFF & read_spd(&dimm_config_table[0], 0,
8696                                    DDR4_SPD_PACKAGE_TYPE);
8697                if (spd_package & 0x80) {       // non-monolithic device
8698                        is_stacked_die = ((spd_package & 0x73) == 0x11);
8699                        debug("DDR4: Package Type 0x%02x (%s), %d die\n",
8700                              spd_package, signal_load[(spd_package & 3)],
8701                              ((spd_package >> 4) & 7) + 1);
8702                        is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
8703                        if (is_3ds_dimm) {      // is it 3DS?
8704                                lranks_per_prank = ((spd_package >> 4) & 7) + 1;
8705                                // FIXME: should make sure it is only 2H or 4H
8706                                // or 8H?
8707                                lranks_bits = lranks_per_prank >> 1;
8708                                if (lranks_bits == 4)
8709                                        lranks_bits = 3;
8710                        }
8711                } else if (spd_package != 0) {
8712                        // FIXME: print non-zero monolithic device definition
8713                        debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
8714                              ((spd_package >> 4) & 7) + 1, (spd_package & 3));
8715                }
8716
8717                asymmetric = (spd_org >> 6) & 1;
8718                if (asymmetric) {
8719                        int spd_secondary_pkg =
8720                            read_spd(&dimm_config_table[0], 0,
8721                                     DDR4_SPD_SECONDARY_PACKAGE_TYPE);
8722                        debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n",
8723                              spd_secondary_pkg);
8724                } else {
8725                        u64 bus_width =
8726                                8 << (0x07 &
8727                                read_spd(&dimm_config_table[0], 0,
8728                                         DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
8729                        u64 ddr_width = 4 << ((spd_org >> 0) & 0x7);
8730                        u64 module_cap;
8731                        int shift = (spd_banks & 0x0F);
8732
8733                        die_capacity = (shift < 8) ? (256UL << shift) :
8734                                ((12UL << (shift & 1)) << 10);
8735                        debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
8736                              (die_capacity > 512) ? (die_capacity >> 10) :
8737                              die_capacity, (die_capacity > 512) ? 'G' : 'M');
8738                        module_cap = ((u64)die_capacity << 20) / 8UL *
8739                                bus_width / ddr_width *
8740                                (1UL + ((spd_org >> 3) & 0x7));
8741
8742                        // is it 3DS?
8743                        if (is_3ds_dimm) {
8744                                module_cap *= (u64)(((spd_package >> 4) & 7) +
8745                                                    1);
8746                        }
8747                        debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
8748                              module_cap >> 30);
8749                }
8750
8751                spd_rawcard =
8752                    0xFF & read_spd(&dimm_config_table[0], 0,
8753                                    DDR4_SPD_REFERENCE_RAW_CARD);
8754                debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard);
8755
8756                spd_module_type =
8757                    read_spd(&dimm_config_table[0], 0,
8758                             DDR4_SPD_KEY_BYTE_MODULE_TYPE);
8759                if (spd_module_type & 0x80) {   // HYBRID module
8760                        debug("DDR4: HYBRID module, type %s\n",
8761                              ((spd_module_type & 0x70) ==
8762                               0x10) ? "NVDIMM" : "UNKNOWN");
8763                }
8764                spd_thermal_sensor =
8765                    read_spd(&dimm_config_table[0], 0,
8766                             DDR4_SPD_MODULE_THERMAL_SENSOR);
8767                spd_dimm_type = spd_module_type & 0x0F;
8768                spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8769                        (spd_dimm_type == 8);
8770                if (spd_rdimm) {
8771                        u16 spd_mfgr_id, spd_register_rev, spd_mod_attr;
8772                        static const u16 manu_ids[4] = {
8773                                0xb380, 0x3286, 0x9780, 0xb304
8774                        };
8775                        static const char *manu_names[4] = {
8776                                "XXX", "XXXXXXX", "XX", "XXXXX"
8777                        };
8778                        int mc;
8779
8780                        spd_mfgr_id =
8781                            (0xFFU &
8782                             read_spd(&dimm_config_table[0], 0,
8783                                      DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8784                            ((0xFFU &
8785                              read_spd(&dimm_config_table[0], 0,
8786                                       DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB))
8787                             << 8);
8788                        spd_register_rev =
8789                            0xFFU & read_spd(&dimm_config_table[0], 0,
8790                                             DDR4_SPD_REGISTER_REVISION_NUMBER);
8791                        for (mc = 0; mc < 4; mc++)
8792                                if (manu_ids[mc] == spd_mfgr_id)
8793                                        break;
8794
8795                        debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n",
8796                              (mc >= 4) ? "UNKNOWN" : manu_names[mc],
8797                              spd_register_rev);
8798
8799                        // RAWCARD A or B must be bit 7=0 and bits 4-0
8800                        // either 00000(A) or 00001(B)
8801                        spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1);
8802                        // RDIMM Module Attributes
8803                        spd_mod_attr =
8804                            0xFFU & read_spd(&dimm_config_table[0], 0,
8805                                        DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE);
8806                        spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8807                        debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n",
8808                              spd_mod_attr, (spd_mod_attr >> 4) + 1,
8809                              ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8810                              spd_rdimm_registers);
8811                }
8812                dimm_type_name = ddr4_dimm_types[spd_dimm_type];
8813        } else {                /* if (ddr_type == DDR4_DRAM) */
8814                const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" };
8815
8816                imp_val = &ddr3_impedence_val;
8817
8818                spd_addr =
8819                    read_spd(&dimm_config_table[0], 0,
8820                             DDR3_SPD_ADDRESSING_ROW_COL_BITS);
8821                spd_org =
8822                    read_spd(&dimm_config_table[0], 0,
8823                             DDR3_SPD_MODULE_ORGANIZATION);
8824                spd_banks =
8825                    read_spd(&dimm_config_table[0], 0,
8826                             DDR3_SPD_DENSITY_BANKS) & 0xff;
8827
8828                bank_bits = 3 + ((spd_banks >> 4) & 0x7);
8829                /* Controller can only address 3 bits. */
8830                bank_bits = min((int)bank_bits, 3);
8831                spd_dimm_type =
8832                    0x0f & read_spd(&dimm_config_table[0], 0,
8833                                    DDR3_SPD_KEY_BYTE_MODULE_TYPE);
8834                spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8835                        (spd_dimm_type == 9);
8836
8837                spd_package =
8838                    0xFF & read_spd(&dimm_config_table[0], 0,
8839                                    DDR3_SPD_SDRAM_DEVICE_TYPE);
8840                if (spd_package & 0x80) {       // non-standard device
8841                        debug("DDR3: Device Type 0x%02x (%s), %d die\n",
8842                              spd_package, signal_load[(spd_package & 3)],
8843                              ((1 << ((spd_package >> 4) & 7)) >> 1));
8844                } else if (spd_package != 0) {
8845                        // FIXME: print non-zero monolithic device definition
8846                        debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n",
8847                              ((1 << (spd_package >> 4) & 7) >> 1),
8848                              (spd_package & 3));
8849                }
8850
8851                spd_rawcard =
8852                    0xFF & read_spd(&dimm_config_table[0], 0,
8853                                    DDR3_SPD_REFERENCE_RAW_CARD);
8854                debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard);
8855                spd_thermal_sensor =
8856                    read_spd(&dimm_config_table[0], 0,
8857                             DDR3_SPD_MODULE_THERMAL_SENSOR);
8858
8859                if (spd_rdimm) {
8860                        int spd_mfgr_id, spd_register_rev, spd_mod_attr;
8861
8862                        spd_mfgr_id =
8863                            (0xFFU &
8864                             read_spd(&dimm_config_table[0], 0,
8865                                      DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8866                            ((0xFFU &
8867                              read_spd(&dimm_config_table[0], 0,
8868                                       DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB))
8869                             << 8);
8870                        spd_register_rev =
8871                            0xFFU & read_spd(&dimm_config_table[0], 0,
8872                                             DDR3_SPD_REGISTER_REVISION_NUMBER);
8873                        debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n",
8874                              spd_mfgr_id, spd_register_rev);
8875                        // Module Attributes
8876                        spd_mod_attr =
8877                            0xFFU & read_spd(&dimm_config_table[0], 0,
8878                                             DDR3_SPD_ADDRESS_MAPPING);
8879                        spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8880                        debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n",
8881                              spd_mod_attr,
8882                              ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8883                              spd_rdimm_registers);
8884                }
8885                dimm_type_name = ddr3_dimm_types[spd_dimm_type];
8886        }
8887
8888        if (spd_thermal_sensor & 0x80) {
8889                debug("DDR%d: SPD: Thermal Sensor PRESENT\n",
8890                      (ddr_type == DDR4_DRAM) ? 4 : 3);
8891        }
8892
8893        debug("spd_addr        : %#06x\n", spd_addr);
8894        debug("spd_org         : %#06x\n", spd_org);
8895        debug("spd_banks       : %#06x\n", spd_banks);
8896
8897        row_bits = 12 + ((spd_addr >> 3) & 0x7);
8898        col_bits = 9 + ((spd_addr >> 0) & 0x7);
8899
8900        num_ranks = 1 + ((spd_org >> 3) & 0x7);
8901        dram_width = 4 << ((spd_org >> 0) & 0x7);
8902        num_banks = 1 << bank_bits;
8903
8904        s = lookup_env(priv, "ddr_num_ranks");
8905        if (s)
8906                num_ranks = simple_strtoul(s, NULL, 0);
8907
8908        s = lookup_env(priv, "ddr_enable_by_rank_init");
8909        if (s)
8910                enable_by_rank_init = !!simple_strtoul(s, NULL, 0);
8911
8912        // FIXME: for now, we can only handle a DDR4 2rank-1slot config
8913        // FIXME: also, by-rank init does not work correctly if 32-bit mode...
8914        if (enable_by_rank_init && (ddr_type != DDR4_DRAM ||
8915                                    dimm_count != 1 || if_64b != 1 ||
8916                                    num_ranks != 2))
8917                enable_by_rank_init = 0;
8918
8919        if (enable_by_rank_init) {
8920                struct dimm_odt_config *odt_config;
8921                union cvmx_lmcx_modereg_params1 mp1;
8922                union cvmx_lmcx_modereg_params2 modereg_params2;
8923                int by_rank_rodt, by_rank_wr, by_rank_park;
8924
8925                // Do ODT settings changes which work best for 2R-1S configs
8926                debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n");
8927
8928                // setup for modifying config table values - 2 ranks and 1 DIMM
8929                odt_config =
8930                    (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0];
8931
8932                // original was 80, first try was 60
8933                by_rank_rodt = ddr4_rodt_ctl_48_ohm;
8934                s = lookup_env(priv, "ddr_by_rank_rodt");
8935                if (s)
8936                        by_rank_rodt = strtoul(s, NULL, 0);
8937
8938                odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt;
8939
8940                // this is for MODEREG_PARAMS1 fields
8941                // fetch the original settings
8942                mp1.u64 = odt_config->modereg_params1.u64;
8943
8944                by_rank_wr = ddr4_rttwr_80ohm;  // originals were 240
8945                s = lookup_env(priv, "ddr_by_rank_wr");
8946                if (s)
8947                        by_rank_wr = simple_strtoul(s, NULL, 0);
8948
8949                // change specific settings here...
8950                insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr);
8951                insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr);
8952
8953                // save final settings
8954                odt_config->modereg_params1.u64 = mp1.u64;
8955
8956                // this is for MODEREG_PARAMS2 fields
8957                // fetch the original settings
8958                modereg_params2.u64 = odt_config->modereg_params2.u64;
8959
8960                by_rank_park = ddr4_rttpark_none;       // originals were 120
8961                s = lookup_env(priv, "ddr_by_rank_park");
8962                if (s)
8963                        by_rank_park = simple_strtoul(s, NULL, 0);
8964
8965                // change specific settings here...
8966                modereg_params2.s.rtt_park_00 = by_rank_park;
8967                modereg_params2.s.rtt_park_01 = by_rank_park;
8968
8969                // save final settings
8970                odt_config->modereg_params2.u64 = modereg_params2.u64;
8971        }
8972
8973        /*
8974         * FIX
8975         * Check that values are within some theoretical limits.
8976         * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) =
8977         *   14 - 3 - 4 = 7
8978         * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) =
8979         *   18 - 2 - 3 = 13
8980         */
8981        if (col_bits > 13 || col_bits < 7) {
8982                printf("Unsupported number of Col Bits: %d\n", col_bits);
8983                ++fatal_error;
8984        }
8985
8986        /*
8987         * FIX
8988         * Check that values are within some theoretical limits.
8989         * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits =
8990         *   26 - 18 - 1 = 7
8991         * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits =
8992         *   33 - 14 - 1 = 18
8993         */
8994        if (row_bits > 18 || row_bits < 7) {
8995                printf("Unsupported number of Row Bits: %d\n", row_bits);
8996                ++fatal_error;
8997        }
8998
8999        s = lookup_env(priv, "ddr_rdimm_ena");
9000        if (s)
9001                spd_rdimm = !!simple_strtoul(s, NULL, 0);
9002
9003        wl_loops = WLEVEL_LOOPS_DEFAULT;
9004        // accept generic or interface-specific override
9005        s = lookup_env(priv, "ddr_wlevel_loops");
9006        if (!s)
9007                s = lookup_env(priv, "ddr%d_wlevel_loops", if_num);
9008
9009        if (s)
9010                wl_loops = strtoul(s, NULL, 0);
9011
9012        s = lookup_env(priv, "ddr_ranks");
9013        if (s)
9014                num_ranks = simple_strtoul(s, NULL, 0);
9015
9016        bunk_enable = (num_ranks > 1);
9017
9018        if (octeon_is_cpuid(OCTEON_CN7XXX))
9019                column_bits_start = 3;
9020        else
9021                printf("ERROR: Unsupported Octeon model: 0x%x\n",
9022                       read_c0_prid());
9023
9024        row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b);
9025        debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n",
9026              row_lsb);
9027
9028        pbank_lsb = row_lsb + row_bits + bunk_enable;
9029        debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
9030
9031        if (lranks_per_prank > 1) {
9032                pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
9033                debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
9034                      row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
9035        }
9036
9037        mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
9038        if (num_ranks == 4) {
9039                /*
9040                 * Quad rank dimm capacity is equivalent to two dual-rank
9041                 * dimms.
9042                 */
9043                mem_size_mbytes *= 2;
9044        }
9045
9046        /*
9047         * Mask with 1 bits set for for each active rank, allowing 2 bits
9048         * per dimm. This makes later calculations simpler, as a variety
9049         * of CSRs use this layout. This init needs to be updated for dual
9050         * configs (ie non-identical DIMMs).
9051         *
9052         * Bit 0 = dimm0, rank 0
9053         * Bit 1 = dimm0, rank 1
9054         * Bit 2 = dimm1, rank 0
9055         * Bit 3 = dimm1, rank 1
9056         * ...
9057         */
9058        rank_mask = 0x1;
9059        if (num_ranks > 1)
9060                rank_mask = 0x3;
9061        if (num_ranks > 2)
9062                rank_mask = 0xf;
9063
9064        for (i = 1; i < dimm_count; i++)
9065                rank_mask |= ((rank_mask & 0x3) << (2 * i));
9066
9067        /*
9068         * If we are booting from RAM, the DRAM controller is
9069         * already set up.  Just return the memory size
9070         */
9071        if (priv->flags & FLAG_RAM_RESIDENT) {
9072                debug("Ram Boot: Skipping LMC config\n");
9073                return mem_size_mbytes;
9074        }
9075
9076        if (ddr_type == DDR4_DRAM) {
9077                spd_ecc =
9078                    !!(read_spd
9079                       (&dimm_config_table[0], 0,
9080                        DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8);
9081        } else {
9082                spd_ecc =
9083                    !!(read_spd
9084                       (&dimm_config_table[0], 0,
9085                        DDR3_SPD_MEMORY_BUS_WIDTH) & 8);
9086        }
9087
9088        char rank_spec[8];
9089
9090        printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
9091        debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n",
9092              dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
9093              rank_spec,
9094              (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits);
9095
9096        if (ddr_type == DDR4_DRAM) {
9097                spd_cas_latency =
9098                    ((0xff &
9099                      read_spd(&dimm_config_table[0], 0,
9100                               DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
9101                spd_cas_latency |=
9102                    ((0xff &
9103                      read_spd(&dimm_config_table[0], 0,
9104                               DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
9105                spd_cas_latency |=
9106                    ((0xff &
9107                      read_spd(&dimm_config_table[0], 0,
9108                               DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
9109                spd_cas_latency |=
9110                    ((0xff &
9111                      read_spd(&dimm_config_table[0], 0,
9112                               DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
9113        } else {
9114                spd_cas_latency =
9115                    0xff & read_spd(&dimm_config_table[0], 0,
9116                                    DDR3_SPD_CAS_LATENCIES_LSB);
9117                spd_cas_latency |=
9118                    ((0xff &
9119                      read_spd(&dimm_config_table[0], 0,
9120                               DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
9121        }
9122        debug("spd_cas_latency : %#06x\n", spd_cas_latency);
9123
9124        if (ddr_type == DDR4_DRAM) {
9125                /*
9126                 * No other values for DDR4 MTB and FTB are specified at the
9127                 * current time so don't bother reading them. Can't speculate
9128                 * how new values will be represented.
9129                 */
9130                int spdmtb = 125;
9131                int spdftb = 1;
9132
9133                taamin = spdmtb * read_spd(&dimm_config_table[0], 0,
9134                                           DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) +
9135                         spdftb * (signed char)read_spd(&dimm_config_table[0],
9136                         0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
9137
9138                ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9139                        DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) +
9140                        spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9141                        DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
9142
9143                ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0,
9144                        DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) +
9145                        spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9146                        DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
9147
9148                ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9149                        DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) +
9150                        spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9151                        DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
9152
9153                ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9154                        DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) +
9155                        spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9156                        DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
9157
9158                ddr4_trasmin = spdmtb *
9159                        (((read_spd
9160                           (&dimm_config_table[0], 0,
9161                            DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
9162                         (read_spd
9163                          (&dimm_config_table[0], 0,
9164                           DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
9165
9166                ddr4_trcmin = spdmtb *
9167                        ((((read_spd
9168                            (&dimm_config_table[0], 0,
9169                             DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) <<
9170                          8) + (read_spd
9171                                (&dimm_config_table[0], 0,
9172                                 DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) &
9173                                0xff))
9174                        + spdftb * (signed char)read_spd(&dimm_config_table[0],
9175                                                         0,
9176                        DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
9177
9178                ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9179                        DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) <<
9180                        8) + (read_spd(&dimm_config_table[0], 0,
9181                        DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
9182
9183                ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9184                        DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) <<
9185                        8) + (read_spd(&dimm_config_table[0], 0,
9186                        DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
9187
9188                ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9189                        DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) <<
9190                        8) + (read_spd(&dimm_config_table[0], 0,
9191                        DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
9192
9193                ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9194                        DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) <<
9195                        8) + (read_spd(&dimm_config_table[0], 0,
9196                        DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
9197
9198                ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0,
9199                        DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) +
9200                        spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9201                        DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
9202
9203                ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9204                        DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) +
9205                        spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9206                        DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
9207
9208                ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9209                        DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) +
9210                        spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9211                        DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
9212
9213                debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb);
9214                debug("%-45s : %6d ps\n", "Fine Timebase   (FTB)", spdftb);
9215
9216                debug("%-45s : %6d ps (%ld MT/s)\n",
9217                      "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin,
9218                      pretty_psecs_to_mts(ddr4_tckavgmin));
9219                debug("%-45s : %6d ps\n",
9220                      "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax);
9221                debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)",
9222                      taamin);
9223                debug("%-45s : %6d ps\n",
9224                      "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin);
9225                debug("%-45s : %6d ps\n",
9226                      "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin);
9227                debug("%-45s : %6d ps\n",
9228                      "Minimum Active to Precharge Delay (tRASmin)",
9229                      ddr4_trasmin);
9230                debug("%-45s : %6d ps\n",
9231                      "Minimum Active to Active/Refr. Delay (tRCmin)",
9232                      ddr4_trcmin);
9233                debug("%-45s : %6d ps\n",
9234                      "Minimum Refresh Recovery Delay (tRFC1min)",
9235                      ddr4_trfc1min);
9236                debug("%-45s : %6d ps\n",
9237                      "Minimum Refresh Recovery Delay (tRFC2min)",
9238                      ddr4_trfc2min);
9239                debug("%-45s : %6d ps\n",
9240                      "Minimum Refresh Recovery Delay (tRFC4min)",
9241                      ddr4_trfc4min);
9242                debug("%-45s : %6d ps\n",
9243                      "Minimum Four Activate Window Time (tFAWmin)",
9244                      ddr4_tfawmin);
9245                debug("%-45s : %6d ps\n",
9246                      "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin);
9247                debug("%-45s : %6d ps\n",
9248                      "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin);
9249                debug("%-45s : %6d ps\n",
9250                      "Minimum CAS to CAS Delay Time (tCCD_Lmin)",
9251                      ddr4_tccd_lmin);
9252
9253#define DDR4_TWR 15000
9254#define DDR4_TWTR_S 2500
9255
9256                tckmin = ddr4_tckavgmin;
9257                twr = DDR4_TWR;
9258                trcd = ddr4_trdcmin;
9259                trrd = ddr4_trrd_smin;
9260                trp = ddr4_trpmin;
9261                tras = ddr4_trasmin;
9262                trc = ddr4_trcmin;
9263                trfc = ddr4_trfc1min;
9264                twtr = DDR4_TWTR_S;
9265                tfaw = ddr4_tfawmin;
9266
9267                if (spd_rdimm) {
9268                        spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9269                        DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) &
9270                        0x1;
9271                } else {
9272                        spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9273                                DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
9274                }
9275                debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9276        } else {
9277                spd_mtb_dividend =
9278                    0xff & read_spd(&dimm_config_table[0], 0,
9279                                    DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
9280                spd_mtb_divisor =
9281                    0xff & read_spd(&dimm_config_table[0], 0,
9282                                    DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
9283                spd_tck_min =
9284                    0xff & read_spd(&dimm_config_table[0], 0,
9285                                    DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
9286                spd_taa_min =
9287                    0xff & read_spd(&dimm_config_table[0], 0,
9288                                    DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
9289
9290                spd_twr =
9291                    0xff & read_spd(&dimm_config_table[0], 0,
9292                                    DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
9293                spd_trcd =
9294                    0xff & read_spd(&dimm_config_table[0], 0,
9295                                    DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
9296                spd_trrd =
9297                    0xff & read_spd(&dimm_config_table[0], 0,
9298                                    DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
9299                spd_trp =
9300                    0xff & read_spd(&dimm_config_table[0], 0,
9301                                    DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
9302                spd_tras =
9303                    0xff & read_spd(&dimm_config_table[0], 0,
9304                                    DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
9305                spd_tras |=
9306                    ((0xff &
9307                      read_spd(&dimm_config_table[0], 0,
9308                               DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8);
9309                spd_trc =
9310                    0xff & read_spd(&dimm_config_table[0], 0,
9311                                    DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
9312                spd_trc |=
9313                    ((0xff &
9314                      read_spd(&dimm_config_table[0], 0,
9315                               DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4);
9316                spd_trfc =
9317                    0xff & read_spd(&dimm_config_table[0], 0,
9318                                    DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
9319                spd_trfc |=
9320                    ((0xff &
9321                      read_spd(&dimm_config_table[0], 0,
9322                               DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) <<
9323                     8);
9324                spd_twtr =
9325                    0xff & read_spd(&dimm_config_table[0], 0,
9326                                DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
9327                spd_trtp =
9328                    0xff & read_spd(&dimm_config_table[0], 0,
9329                        DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
9330                spd_tfaw =
9331                    0xff & read_spd(&dimm_config_table[0], 0,
9332                                    DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
9333                spd_tfaw |=
9334                    ((0xff &
9335                      read_spd(&dimm_config_table[0], 0,
9336                               DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8);
9337                spd_addr_mirror =
9338                    0xff & read_spd(&dimm_config_table[0], 0,
9339                                    DDR3_SPD_ADDRESS_MAPPING) & 0x1;
9340                /* Only address mirror unbuffered dimms.  */
9341                spd_addr_mirror = spd_addr_mirror && !spd_rdimm;
9342                ftb_dividend =
9343                    read_spd(&dimm_config_table[0], 0,
9344                             DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
9345                ftb_divisor =
9346                    read_spd(&dimm_config_table[0], 0,
9347                             DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
9348                /* Make sure that it is not 0 */
9349                ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor;
9350
9351                debug("spd_twr         : %#06x\n", spd_twr);
9352                debug("spd_trcd        : %#06x\n", spd_trcd);
9353                debug("spd_trrd        : %#06x\n", spd_trrd);
9354                debug("spd_trp         : %#06x\n", spd_trp);
9355                debug("spd_tras        : %#06x\n", spd_tras);
9356                debug("spd_trc         : %#06x\n", spd_trc);
9357                debug("spd_trfc        : %#06x\n", spd_trfc);
9358                debug("spd_twtr        : %#06x\n", spd_twtr);
9359                debug("spd_trtp        : %#06x\n", spd_trtp);
9360                debug("spd_tfaw        : %#06x\n", spd_tfaw);
9361                debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9362
9363                mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
9364                taamin = mtb_psec * spd_taa_min;
9365                taamin += ftb_dividend *
9366                        (signed char)read_spd(&dimm_config_table[0],
9367                                0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) /
9368                        ftb_divisor;
9369                tckmin = mtb_psec * spd_tck_min;
9370                tckmin += ftb_dividend *
9371                        (signed char)read_spd(&dimm_config_table[0],
9372                                0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) /
9373                        ftb_divisor;
9374
9375                twr = spd_twr * mtb_psec;
9376                trcd = spd_trcd * mtb_psec;
9377                trrd = spd_trrd * mtb_psec;
9378                trp = spd_trp * mtb_psec;
9379                tras = spd_tras * mtb_psec;
9380                trc = spd_trc * mtb_psec;
9381                trfc = spd_trfc * mtb_psec;
9382                if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) {
9383                        // default to this - because it works...
9384                        int new_trfc = 260000;
9385
9386                        s = env_get("ddr_trfc");
9387                        if (s) {
9388                                new_trfc = simple_strtoul(s, NULL, 0);
9389                                printf("Parameter found in environment. ddr_trfc = %d\n",
9390                                       new_trfc);
9391                                if (new_trfc < 160000 || new_trfc > 260000) {
9392                                        // back to default if out of range
9393                                        new_trfc = 260000;
9394                                }
9395                        }
9396                        debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n",
9397                              node, if_num, trfc, new_trfc);
9398                        trfc = new_trfc;
9399                }
9400
9401                twtr = spd_twtr * mtb_psec;
9402                trtp = spd_trtp * mtb_psec;
9403                tfaw = spd_tfaw * mtb_psec;
9404
9405                debug("Medium Timebase (MTB)                         : %6d ps\n",
9406                      mtb_psec);
9407                debug("Minimum Cycle Time (tckmin)                   : %6d ps (%ld MT/s)\n",
9408                      tckmin, pretty_psecs_to_mts(tckmin));
9409                debug("Minimum CAS Latency Time (taamin)             : %6d ps\n",
9410                      taamin);
9411                debug("Write Recovery Time (tWR)                     : %6d ps\n",
9412                      twr);
9413                debug("Minimum RAS to CAS delay (tRCD)               : %6d ps\n",
9414                      trcd);
9415                debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n",
9416                      trrd);
9417                debug("Minimum Row Precharge Delay (tRP)             : %6d ps\n",
9418                      trp);
9419                debug("Minimum Active to Precharge (tRAS)            : %6d ps\n",
9420                      tras);
9421                debug("Minimum Active to Active/Refresh Delay (tRC)  : %6d ps\n",
9422                      trc);
9423                debug("Minimum Refresh Recovery Delay (tRFC)         : %6d ps\n",
9424                      trfc);
9425                debug("Internal write to read command delay (tWTR)   : %6d ps\n",
9426                      twtr);
9427                debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n",
9428                      trtp);
9429                debug("Minimum Four Activate Window Delay (tFAW)     : %6d ps\n",
9430                      tfaw);
9431        }
9432
9433        /*
9434         * When the cycle time is within 1 psec of the minimum accept it
9435         * as a slight rounding error and adjust it to exactly the minimum
9436         * cycle time. This avoids an unnecessary warning.
9437         */
9438        if (abs(tclk_psecs - tckmin) < 2)
9439                tclk_psecs = tckmin;
9440
9441        if (tclk_psecs < (u64)tckmin) {
9442                printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n",
9443                       tclk_psecs, (ulong)tckmin);
9444        }
9445
9446        debug("DDR Clock Rate (tCLK)                         : %6ld ps\n",
9447              tclk_psecs);
9448        debug("Core Clock Rate (eCLK)                        : %6ld ps\n",
9449              eclk_psecs);
9450
9451        s = env_get("ddr_use_ecc");
9452        if (s) {
9453                use_ecc = !!simple_strtoul(s, NULL, 0);
9454                printf("Parameter found in environment. ddr_use_ecc = %d\n",
9455                       use_ecc);
9456        }
9457        use_ecc = use_ecc && spd_ecc;
9458
9459        if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff)
9460            : (use_ecc ? 0x01f : 0x0f);
9461
9462        debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n",
9463              if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask);
9464
9465        debug("\n------ Board Custom Configuration Settings ------\n");
9466        debug("%-45s : %d\n", "MIN_RTT_NOM_IDX   ", c_cfg->min_rtt_nom_idx);
9467        debug("%-45s : %d\n", "MAX_RTT_NOM_IDX   ", c_cfg->max_rtt_nom_idx);
9468        debug("%-45s : %d\n", "MIN_RODT_CTL      ", c_cfg->min_rodt_ctl);
9469        debug("%-45s : %d\n", "MAX_RODT_CTL      ", c_cfg->max_rodt_ctl);
9470        debug("%-45s : %d\n", "MIN_CAS_LATENCY   ", c_cfg->min_cas_latency);
9471        debug("%-45s : %d\n", "OFFSET_EN         ", c_cfg->offset_en);
9472        debug("%-45s : %d\n", "OFFSET_UDIMM      ", c_cfg->offset_udimm);
9473        debug("%-45s : %d\n", "OFFSET_RDIMM      ", c_cfg->offset_rdimm);
9474        debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO  ", c_cfg->ddr_rtt_nom_auto);
9475        debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto);
9476        if (spd_rdimm)
9477                debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9478                      c_cfg->rlevel_comp_offset_rdimm);
9479        else
9480                debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9481                      c_cfg->rlevel_comp_offset_udimm);
9482        debug("%-45s : %d\n", "RLEVEL_COMPUTE    ", c_cfg->rlevel_compute);
9483        debug("%-45s : %d\n", "DDR2T_UDIMM       ", c_cfg->ddr2t_udimm);
9484        debug("%-45s : %d\n", "DDR2T_RDIMM       ", c_cfg->ddr2t_rdimm);
9485        debug("%-45s : %d\n", "FPRCH2            ", c_cfg->fprch2);
9486        debug("%-45s : %d\n", "PTUNE_OFFSET      ", c_cfg->ptune_offset);
9487        debug("%-45s : %d\n", "NTUNE_OFFSET      ", c_cfg->ntune_offset);
9488        debug("-------------------------------------------------\n");
9489
9490        cl = divide_roundup(taamin, tclk_psecs);
9491
9492        debug("Desired CAS Latency                           : %6d\n", cl);
9493
9494        min_cas_latency = c_cfg->min_cas_latency;
9495
9496        s = lookup_env(priv, "ddr_min_cas_latency");
9497        if (s)
9498                min_cas_latency = simple_strtoul(s, NULL, 0);
9499
9500        debug("CAS Latencies supported in DIMM               :");
9501        base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4;
9502        for (i = 0; i < 32; ++i) {
9503                if ((spd_cas_latency >> i) & 1) {
9504                        debug(" %d", i + base_cl);
9505                        max_cas_latency = i + base_cl;
9506                        if (min_cas_latency == 0)
9507                                min_cas_latency = i + base_cl;
9508                }
9509        }
9510        debug("\n");
9511
9512        /*
9513         * Use relaxed timing when running slower than the minimum
9514         * supported speed.  Adjust timing to match the smallest supported
9515         * CAS Latency.
9516         */
9517        if (min_cas_latency > cl) {
9518                ulong adjusted_tclk = taamin / min_cas_latency;
9519
9520                cl = min_cas_latency;
9521                debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n",
9522                      tclk_psecs, adjusted_tclk);
9523                tclk_psecs = adjusted_tclk;
9524        }
9525
9526        s = env_get("ddr_cas_latency");
9527        if (s) {
9528                override_cas_latency = simple_strtoul(s, NULL, 0);
9529                printf("Parameter found in environment. ddr_cas_latency = %d\n",
9530                       override_cas_latency);
9531        }
9532
9533        /* Make sure that the selected cas latency is legal */
9534        for (i = (cl - base_cl); i < 32; ++i) {
9535                if ((spd_cas_latency >> i) & 1) {
9536                        cl = i + base_cl;
9537                        break;
9538                }
9539        }
9540
9541        if (max_cas_latency < cl)
9542                cl = max_cas_latency;
9543
9544        if (override_cas_latency != 0)
9545                cl = override_cas_latency;
9546
9547        debug("CAS Latency                                   : %6d\n", cl);
9548
9549        if ((cl * tckmin) > 20000) {
9550                debug("(CLactual * tckmin) = %d exceeds 20 ns\n",
9551                      (cl * tckmin));
9552        }
9553
9554        if (tclk_psecs < (ulong)tckmin) {
9555                printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n",
9556                       tclk_psecs, (ulong)tckmin);
9557        }
9558
9559        if (num_banks != 4 && num_banks != 8 && num_banks != 16) {
9560                printf("Unsupported number of banks %d. Must be 4 or 8.\n",
9561                       num_banks);
9562                ++fatal_error;
9563        }
9564
9565        if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) {
9566                printf("Unsupported number of ranks: %d\n", num_ranks);
9567                ++fatal_error;
9568        }
9569
9570        if (octeon_is_cpuid(OCTEON_CN78XX) ||
9571            octeon_is_cpuid(OCTEON_CN73XX) ||
9572            octeon_is_cpuid(OCTEON_CNF75XX)) {
9573                if (dram_width != 8 && dram_width != 16 && dram_width != 4) {
9574                        printf("Unsupported SDRAM Width, %d.  Must be 4, 8 or 16.\n",
9575                               dram_width);
9576                        ++fatal_error;
9577                }
9578        } else if (dram_width != 8 && dram_width != 16) {
9579                printf("Unsupported SDRAM Width, %d.  Must be 8 or 16.\n",
9580                       dram_width);
9581                ++fatal_error;
9582        }
9583
9584        /*
9585         ** Bail out here if things are not copasetic.
9586         */
9587        if (fatal_error)
9588                return (-1);
9589
9590        /*
9591         * 4.8.4 LMC RESET Initialization
9592         *
9593         * The purpose of this step is to assert/deassert the RESET# pin at the
9594         * DDR3/DDR4 parts.
9595         *
9596         * This LMC RESET step is done for all enabled LMCs.
9597         */
9598        perform_lmc_reset(priv, node, if_num);
9599
9600        // Make sure scrambling is disabled during init...
9601        ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
9602        ctrl.s.scramble_ena = 0;
9603        lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
9604
9605        lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0);
9606        lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0);
9607        if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
9608                lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0);
9609
9610        odt_idx = min(dimm_count - 1, 3);
9611
9612        switch (num_ranks) {
9613        case 1:
9614                odt_config = odt_1rank_config;
9615                break;
9616        case 2:
9617                odt_config = odt_2rank_config;
9618                break;
9619        case 4:
9620                odt_config = odt_4rank_config;
9621                break;
9622        default:
9623                odt_config = disable_odt_config;
9624                printf("Unsupported number of ranks: %d\n", num_ranks);
9625                ++fatal_error;
9626        }
9627
9628        /*
9629         * 4.8.5 Early LMC Initialization
9630         *
9631         * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
9632         * completed prior to starting this LMC initialization sequence.
9633         *
9634         * Perform the following five substeps for early LMC initialization:
9635         *
9636         * 1. Software must ensure there are no pending DRAM transactions.
9637         *
9638         * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
9639         *    LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
9640         *    LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
9641         *    LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
9642         *    LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
9643         *    appropriate values. All sections in this chapter can be used to
9644         *    derive proper register settings.
9645         */
9646
9647        /* LMC(0)_CONFIG */
9648        lmc_config(priv);
9649
9650        /* LMC(0)_CONTROL */
9651        lmc_control(priv);
9652
9653        /* LMC(0)_TIMING_PARAMS0 */
9654        lmc_timing_params0(priv);
9655
9656        /* LMC(0)_TIMING_PARAMS1 */
9657        lmc_timing_params1(priv);
9658
9659        /* LMC(0)_TIMING_PARAMS2 */
9660        lmc_timing_params2(priv);
9661
9662        /* LMC(0)_MODEREG_PARAMS0 */
9663        lmc_modereg_params0(priv);
9664
9665        /* LMC(0)_MODEREG_PARAMS1 */
9666        lmc_modereg_params1(priv);
9667
9668        /* LMC(0)_MODEREG_PARAMS2 */
9669        lmc_modereg_params2(priv);
9670
9671        /* LMC(0)_MODEREG_PARAMS3 */
9672        lmc_modereg_params3(priv);
9673
9674        /* LMC(0)_NXM */
9675        lmc_nxm(priv);
9676
9677        /* LMC(0)_WODT_MASK */
9678        lmc_wodt_mask(priv);
9679
9680        /* LMC(0)_RODT_MASK */
9681        lmc_rodt_mask(priv);
9682
9683        /* LMC(0)_COMP_CTL2 */
9684        lmc_comp_ctl2(priv);
9685
9686        /* LMC(0)_PHY_CTL */
9687        lmc_phy_ctl(priv);
9688
9689        /* LMC(0)_EXT_CONFIG */
9690        lmc_ext_config(priv);
9691
9692        /* LMC(0)_EXT_CONFIG2 */
9693        lmc_ext_config2(priv);
9694
9695        /* LMC(0)_DIMM0/1_PARAMS */
9696        lmc_dimm01_params(priv);
9697
9698        ret = lmc_rank_init(priv);
9699        if (ret < 0)
9700                return 0;       /* 0 indicates problem */
9701
9702        lmc_config_2(priv);
9703
9704        lmc_write_leveling(priv);
9705
9706        lmc_read_leveling(priv);
9707
9708        lmc_workaround(priv);
9709
9710        ret = lmc_sw_write_leveling(priv);
9711        if (ret < 0)
9712                return 0;       /* 0 indicates problem */
9713
9714        // this sometimes causes stack overflow crashes..
9715        // display only for DDR4 RDIMMs.
9716        if (ddr_type == DDR4_DRAM && spd_rdimm) {
9717                int i;
9718
9719                for (i = 0; i < 3; i += 2)      // just pages 0 and 2 for now..
9720                        display_mpr_page(priv, rank_mask, if_num, i);
9721        }
9722
9723        lmc_dll(priv);
9724
9725        lmc_workaround_2(priv);
9726
9727        lmc_final(priv);
9728
9729        lmc_scrambling(priv);
9730
9731        return mem_size_mbytes;
9732}
9733
9734/////    HW-assist byte DLL offset tuning   //////
9735
9736static int cvmx_dram_get_num_lmc(struct ddr_priv *priv)
9737{
9738        union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9739
9740        if (octeon_is_cpuid(OCTEON_CN70XX))
9741                return 1;
9742
9743        if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) {
9744                // sample LMC1
9745                lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1));
9746                if (lmcx_dll_ctl2.cn78xx.intf_en)
9747                        return 2;
9748                else
9749                        return 1;
9750        }
9751
9752        // for CN78XX, LMCs are always active in pairs, and always LMC0/1
9753        // so, we sample LMC2 to see if 2 and 3 are active
9754        lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2));
9755        if (lmcx_dll_ctl2.cn78xx.intf_en)
9756                return 4;
9757        else
9758                return 2;
9759}
9760
9761// got to do these here, even though already defined in BDK
9762
9763// all DDR3, and DDR4 x16 today, use only 3 bank bits;
9764// DDR4 x4 and x8 always have 4 bank bits
9765// NOTE: this will change in the future, when DDR4 x16 devices can
9766// come with 16 banks!! FIXME!!
9767static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc)
9768{
9769        union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9770        union cvmx_lmcx_config lmcx_config;
9771        union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl;
9772        int bank_width;
9773
9774        // can always read this
9775        lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9776
9777        if (lmcx_dll_ctl2.cn78xx.dreset)        // check LMCn
9778                return 0;
9779
9780        lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9781        lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc));
9782
9783        bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) &&
9784                      (lmcx_config.s.bg2_enable)) ? 4 : 3;
9785
9786        return bank_width;
9787}
9788
9789#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
9790#define ADDRESS_HOLE 0x10000000ULL
9791
9792static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address,
9793                                           int *node, int *lmc, int *dimm,
9794                                           int *prank, int *lrank, int *bank,
9795                                           int *row, int *col)
9796{
9797        int bank_lsb, xbits;
9798        union cvmx_l2c_ctl l2c_ctl;
9799        union cvmx_lmcx_config lmcx_config;
9800        union cvmx_lmcx_control lmcx_control;
9801        union cvmx_lmcx_ext_config ext_config;
9802        int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
9803        int bank_width;
9804        int dimm_lsb;
9805        int dimm_width;
9806        int prank_lsb, lrank_lsb;
9807        int prank_width, lrank_width;
9808        int row_lsb;
9809        int row_width;
9810        int col_hi_lsb;
9811        int col_hi_width;
9812        int col_hi;
9813
9814        if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
9815                bitno = 18;
9816
9817        *node = EXTRACT(address, 40, 2);        /* Address bits [41:40] */
9818
9819        address &= (1ULL << 40) - 1;    // lop off any node bits or above
9820        if (address >= ADDRESS_HOLE)    // adjust down if at HOLE or above
9821                address -= ADDRESS_HOLE;
9822
9823        /* Determine the LMC controllers */
9824        l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
9825
9826        /* xbits depends on number of LMCs */
9827        xbits = cvmx_dram_get_num_lmc(priv) >> 1;       // 4->2, 2->1, 1->0
9828        bank_lsb = 7 + xbits;
9829
9830        /* LMC number is probably aliased */
9831        if (l2c_ctl.s.disidxalias) {
9832                *lmc = EXTRACT(address, 7, xbits);
9833        }  else {
9834                *lmc = EXTRACT(address, 7, xbits) ^
9835                        EXTRACT(address, bitno, xbits) ^
9836                        EXTRACT(address, 12, xbits);
9837        }
9838
9839        /* Figure out the bank field width */
9840        lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc));
9841        ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc));
9842        bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc);
9843
9844        /* Extract additional info from the LMC_CONFIG CSR */
9845        dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
9846        dimm_width = 40 - dimm_lsb;
9847        prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
9848        prank_width = dimm_lsb - prank_lsb;
9849        lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
9850        lrank_width = prank_lsb - lrank_lsb;
9851        row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
9852        row_width = lrank_lsb - row_lsb;
9853        col_hi_lsb = bank_lsb + bank_width;
9854        col_hi_width = row_lsb - col_hi_lsb;
9855
9856        /* Extract the parts of the address */
9857        *dimm = EXTRACT(address, dimm_lsb, dimm_width);
9858        *prank = EXTRACT(address, prank_lsb, prank_width);
9859        *lrank = EXTRACT(address, lrank_lsb, lrank_width);
9860        *row = EXTRACT(address, row_lsb, row_width);
9861
9862        /* bank calculation may be aliased... */
9863        lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc));
9864        if (lmcx_control.s.xor_bank) {
9865                *bank = EXTRACT(address, bank_lsb, bank_width) ^
9866                        EXTRACT(address, 12 + xbits, bank_width);
9867        } else {
9868                *bank = EXTRACT(address, bank_lsb, bank_width);
9869        }
9870
9871        /* LMC number already extracted */
9872        col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
9873        *col = EXTRACT(address, 3, 4) | (col_hi << 4);
9874        /* Bus byte is address bits [2:0]. Unused here */
9875}
9876
9877// end of added workarounds
9878
9879// NOTE: "mode" argument:
9880//         DBTRAIN_TEST: for testing using GP patterns, includes ECC
9881//         DBTRAIN_DBI:  for DBI deskew training behavior (uses GP patterns)
9882//         DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
9883// NOTE: trust the caller to specify the correct/supported mode
9884//
9885static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p,
9886                             int mode, u64 *xor_data)
9887{
9888        u64 p1;
9889        u64 k;
9890        int errors = 0;
9891
9892        u64 mpr_data0, mpr_data1;
9893        u64 bad_bits[2] = { 0, 0 };
9894
9895        int node_address, lmc, dimm;
9896        int prank, lrank;
9897        int bank, row, col;
9898        int save_or_dis;
9899        int byte;
9900        int ba_loop, ba_bits;
9901
9902        union cvmx_lmcx_rlevel_ctl rlevel_ctl;
9903        union cvmx_lmcx_dbtrain_ctl dbtrain_ctl;
9904        union cvmx_lmcx_phy_ctl phy_ctl;
9905
9906        int biter_errs;
9907
9908        // FIXME: K iterations set to 4 for now.
9909        // FIXME: decrement to increase interations.
9910        // FIXME: must be no less than 22 to stay above an LMC hash field.
9911        int kshift = 27;
9912
9913        const char *s;
9914        int node = 0;
9915
9916        // allow override default setting for kshift
9917        s = env_get("ddr_tune_set_kshift");
9918        if (s) {
9919                int temp = simple_strtoul(s, NULL, 0);
9920
9921                if (temp < 22 || temp > 28) {
9922                        debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
9923                              node, if_num, temp, kshift);
9924                } else {
9925                        debug("N%d.LMC%d: overriding kshift (%d) to %d\n",
9926                              node, if_num, kshift, temp);
9927                        kshift = temp;
9928                }
9929        }
9930
9931        /*
9932         * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
9933         */
9934        rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
9935        save_or_dis = rlevel_ctl.s.or_dis;
9936        /* or_dis must be disabled for this sequence */
9937        rlevel_ctl.s.or_dis = 0;
9938        lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
9939
9940        /*
9941         * NOTE: this step done in the calling routine(s)...
9942         * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
9943         * of choice.
9944         * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
9945         * (rising edge) 64 bits of data.
9946         * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
9947         * (falling edge) 64 bits of data.
9948         * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
9949         * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data.
9950         */
9951
9952        // final address must include LMC and node
9953        p |= (if_num << 7);     /* Map address into proper interface */
9954        p |= (u64)node << CVMX_NODE_MEM_SHIFT;  // map to node
9955
9956        /*
9957         * Add base offset to both test regions to not clobber u-boot stuff
9958         * when running from L2 for NAND boot.
9959         */
9960        p += 0x20000000;        // offset to 512MB, ie above THE HOLE!!!
9961        p |= 1ull << 63;        // needed for OCTEON
9962
9963        errors = 0;
9964
9965        cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm,
9966                                       &prank, &lrank, &bank, &row, &col);
9967        debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n",
9968              __func__, p, node_address, lmc, dimm, prank, lrank, bank,
9969              row, col);
9970
9971        // only check once per call, and ignore if no match...
9972        if ((int)node != node_address) {
9973                printf("ERROR: Node address mismatch\n");
9974                return 0;
9975        }
9976        if (lmc != if_num) {
9977                printf("ERROR: LMC address mismatch\n");
9978                return 0;
9979        }
9980
9981        /*
9982         * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as
9983         * it’s a one-shot operation). This is to get into the habit of
9984         * resetting PHY’s SILO to the original 0 location.
9985         */
9986        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
9987        phy_ctl.s.phy_reset = 1;
9988        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
9989
9990        /*
9991         * Walk through a range of addresses avoiding bits that alias
9992         * interfaces on the CN88XX.
9993         */
9994
9995        // FIXME: want to try to keep the K increment from affecting the
9996        // LMC via hash, so keep it above bit 21 we also want to keep k
9997        // less than the base offset of bit 29 (512MB)
9998
9999        for (k = 0; k < (1UL << 29); k += (1UL << kshift)) {
10000                // FIXME: the sequence will interate over 1/2 cacheline
10001                // FIXME: for each unit specified in "read_cmd_count",
10002                // FIXME: so, we setup each sequence to do the max cachelines
10003                // it can
10004
10005                p1 = p + k;
10006
10007                cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc,
10008                                               &dimm, &prank, &lrank, &bank,
10009                                               &row, &col);
10010
10011                /*
10012                 * 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
10013                 * a. COL, ROW, BA, BG, PRANK points to the starting point
10014                 * of the address.
10015                 * You can just set them to all 0.
10016                 * b. RW_TRAIN – set this to 1.
10017                 * c. TCCD_L – set this to 0.
10018                 * d. READ_CMD_COUNT – instruct the sequence to the how many
10019                 * writes/reads.
10020                 * It is 5 bits field, so set to 31 of maximum # of r/w.
10021                 */
10022                dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num));
10023                dbtrain_ctl.s.column_a = col;
10024                dbtrain_ctl.s.row_a = row;
10025                dbtrain_ctl.s.bg = (bank >> 2) & 3;
10026                dbtrain_ctl.s.prank = (dimm * 2) + prank;       // FIXME?
10027                dbtrain_ctl.s.lrank = lrank;    // FIXME?
10028                dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
10029                dbtrain_ctl.s.write_ena = 1;
10030                dbtrain_ctl.s.read_cmd_count = 31;      // max count pass 1.x
10031                if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
10032                    octeon_is_cpuid(OCTEON_CNF75XX)) {
10033                        // max count on chips that support it
10034                        dbtrain_ctl.s.cmd_count_ext = 3;
10035                } else {
10036                        // max count pass 1.x
10037                        dbtrain_ctl.s.cmd_count_ext = 0;
10038                }
10039
10040                dbtrain_ctl.s.rw_train = 1;
10041                dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
10042                // LFSR should only be on when chip supports it...
10043                dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
10044
10045                biter_errs = 0;
10046
10047                // for each address, iterate over the 4 "banks" in the BA
10048                for (ba_loop = 0, ba_bits = bank & 3;
10049                     ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) {
10050                        dbtrain_ctl.s.ba = ba_bits;
10051                        lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num),
10052                               dbtrain_ctl.u64);
10053
10054                        /*
10055                         * We will use the RW_TRAINING sequence (14) for
10056                         * this task.
10057                         *
10058                         * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14,
10059                         *    SEQ_CTL[INIT_START] = 1).
10060                         * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
10061                         */
10062                        oct3_ddr3_seq(priv, prank, if_num, 14);
10063
10064                        /*
10065                         * 6) Read MPR_DATA0 and MPR_DATA1 for results.
10066                         * a. MPR_DATA0[MPR_DATA<63:0>] – comparison results
10067                         *    for DQ63:DQ0. (1 means MATCH, 0 means FAIL).
10068                         * b. MPR_DATA1[MPR_DATA<7:0>] – comparison results
10069                         *    for ECC bit7:0.
10070                         */
10071                        mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
10072                        mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num));
10073
10074                        /*
10075                         * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically
10076                         * clears this as it’s a one-shot operation).
10077                         * This is to get into the habit of resetting PHY’s
10078                         * SILO to the original 0 location.
10079                         */
10080                        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
10081                        phy_ctl.s.phy_reset = 1;
10082                        lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
10083
10084                        // bypass any error checking or updating when DBI mode
10085                        if (mode == DBTRAIN_DBI)
10086                                continue;
10087
10088                        // data bytes
10089                        if (~mpr_data0) {
10090                                for (byte = 0; byte < 8; byte++) {
10091                                        if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
10092                                                biter_errs |= (1 << byte);
10093                                }
10094                                // accumulate bad bits
10095                                bad_bits[0] |= ~mpr_data0;
10096                        }
10097
10098                        // include ECC byte errors
10099                        if (~mpr_data1 & 0xffUL) {
10100                                biter_errs |= (1 << 8);
10101                                bad_bits[1] |= ~mpr_data1 & 0xffUL;
10102                        }
10103                }
10104
10105                errors |= biter_errs;
10106        }                       /* end for (k=...) */
10107
10108        rlevel_ctl.s.or_dis = save_or_dis;
10109        lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
10110
10111        // send the bad bits back...
10112        if (mode != DBTRAIN_DBI && xor_data) {
10113                xor_data[0] = bad_bits[0];
10114                xor_data[1] = bad_bits[1];
10115        }
10116
10117        return errors;
10118}
10119
10120// setup default for byte test pattern array
10121// take these from the HRM section 6.9.13
10122static const u64 byte_pattern_0[] = {
10123        0xFFAAFFFFFF55FFFFULL,  // GP0
10124        0x55555555AAAAAAAAULL,  // GP1
10125        0xAA55AAAAULL,          // GP2
10126};
10127
10128static const u64 byte_pattern_1[] = {
10129        0xFBF7EFDFBF7FFEFDULL,  // GP0
10130        0x0F1E3C78F0E1C387ULL,  // GP1
10131        0xF0E1BF7FULL,          // GP2
10132};
10133
10134// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
10135static const u64 byte_pattern_2[] = {
10136        0xEE55AADDEE55AADDULL,  // GP0
10137        0x55AADDEE55AADDEEULL,  // GP1
10138        0x55EEULL,              // GP2
10139};
10140
10141// this is from Mike via LFSR with PRBS=0x4A519909
10142static const u64 byte_pattern_3[] = {
10143        0x0088CCEE0088CCEEULL,  // GP0
10144        0xBB552211BB552211ULL,  // GP1
10145        0xBB00ULL,              // GP2
10146};
10147
10148static const u64 *byte_patterns[4] = {
10149        byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3
10150};
10151
10152static const u32 lfsr_patterns[4] = {
10153        0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL
10154};
10155
10156#define NUM_BYTE_PATTERNS 4
10157
10158#define DEFAULT_BYTE_BURSTS 32  // compromise between time and rigor
10159
10160static void setup_hw_pattern(struct ddr_priv *priv, int lmc,
10161                             const u64 *pattern_p)
10162{
10163        /*
10164         * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
10165         * of choice.
10166         * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
10167         *    (rising edge) 64 bits of data.
10168         * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
10169         *    (falling edge) 64 bits of data.
10170         * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
10171         *    (rising edge <7:0>) and upper
10172         * (falling edge <15:8>) ECC data.
10173         */
10174        lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
10175        lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
10176        lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
10177}
10178
10179static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data)
10180{
10181        union cvmx_lmcx_char_ctl char_ctl;
10182        u32 prbs;
10183        const char *s;
10184
10185        s = env_get("ddr_lfsr_prbs");
10186        if (s)
10187                prbs = simple_strtoul(s, NULL, 0);
10188        else
10189                prbs = data;
10190
10191        /*
10192         * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10193         * here data comes from the LFSR generating a PRBS pattern
10194         * CHAR_CTL.EN = 0
10195         * CHAR_CTL.SEL = 0; // for PRBS
10196         * CHAR_CTL.DR = 1;
10197         * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10198         * CHAR_CTL.SKEW_ON = 1;
10199         */
10200        char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc));
10201        char_ctl.s.en = 0;
10202        char_ctl.s.sel = 0;
10203        char_ctl.s.dr = 1;
10204        char_ctl.s.prbs = prbs;
10205        char_ctl.s.skew_on = 1;
10206        lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64);
10207}
10208
10209static int choose_best_hw_patterns(int lmc, int mode)
10210{
10211        int new_mode = mode;
10212        const char *s;
10213
10214        switch (mode) {
10215        case DBTRAIN_TEST:      // always choose LFSR if chip supports it
10216                if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10217                        int lfsr_enable = 1;
10218
10219                        s = env_get("ddr_allow_lfsr");
10220                        if (s) {
10221                                // override?
10222                                lfsr_enable = !!strtoul(s, NULL, 0);
10223                        }
10224
10225                        if (lfsr_enable)
10226                                new_mode = DBTRAIN_LFSR;
10227                }
10228                break;
10229
10230        case DBTRAIN_DBI:       // possibly can allow LFSR use?
10231                break;
10232
10233        case DBTRAIN_LFSR:      // forced already
10234                if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10235                        debug("ERROR: illegal HW assist mode %d\n", mode);
10236                        new_mode = DBTRAIN_TEST;
10237                }
10238                break;
10239
10240        default:
10241                debug("ERROR: unknown HW assist mode %d\n", mode);
10242        }
10243
10244        if (new_mode != mode)
10245                debug("%s: changing mode %d to %d\n", __func__, mode, new_mode);
10246
10247        return new_mode;
10248}
10249
10250int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
10251                         int mode, u64 *xor_data)
10252{
10253        int pattern;
10254        const u64 *pattern_p;
10255        int errs, errors = 0;
10256
10257        // FIXME? always choose LFSR if chip supports it???
10258        mode = choose_best_hw_patterns(lmc, mode);
10259
10260        for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10261                if (mode == DBTRAIN_LFSR) {
10262                        setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10263                } else {
10264                        pattern_p = byte_patterns[pattern];
10265                        setup_hw_pattern(priv, lmc, pattern_p);
10266                }
10267                errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data);
10268
10269                debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
10270                      __func__, pattern, phys_addr, errs);
10271
10272                errors |= errs;
10273        }
10274
10275        return errors;
10276}
10277
10278static void hw_assist_test_dll_offset(struct ddr_priv *priv,
10279                                      int dll_offset_mode, int lmc,
10280                                      int bytelane,
10281                                      int if_64b,
10282                                      u64 dram_tune_rank_offset,
10283                                      int dram_tune_byte_bursts)
10284{
10285        int byte_offset, new_best_offset[9];
10286        int rank_delay_start[4][9];
10287        int rank_delay_count[4][9];
10288        int rank_delay_best_start[4][9];
10289        int rank_delay_best_count[4][9];
10290        int errors[4], off_errors, tot_errors;
10291        int rank_mask, rankx, active_ranks;
10292        int pattern;
10293        const u64 *pattern_p;
10294        int byte;
10295        char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
10296        int pat_best_offset[9];
10297        u64 phys_addr;
10298        int pat_beg, pat_end;
10299        int rank_beg, rank_end;
10300        int byte_lo, byte_hi;
10301        union cvmx_lmcx_config lmcx_config;
10302        u64 hw_rank_offset;
10303        int num_lmcs = cvmx_dram_get_num_lmc(priv);
10304        // FIXME? always choose LFSR if chip supports it???
10305        int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST);
10306        int node = 0;
10307
10308        if (bytelane == 0x0A) { // all bytelanes
10309                byte_lo = 0;
10310                byte_hi = 8;
10311        } else {                // just 1
10312                byte_lo = bytelane;
10313                byte_hi = bytelane;
10314        }
10315
10316        lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10317        rank_mask = lmcx_config.s.init_status;
10318
10319        // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10320        hw_rank_offset =
10321            1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena +
10322                     (num_lmcs / 2));
10323
10324        debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n",
10325              node, __func__, lmc, (unsigned long long)hw_rank_offset);
10326
10327        // start of pattern loop
10328        // we do the set of tests for each pattern supplied...
10329
10330        memset(new_best_offset, 0, sizeof(new_best_offset));
10331        for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10332                memset(pat_best_offset, 0, sizeof(pat_best_offset));
10333
10334                if (mode == DBTRAIN_TEST) {
10335                        pattern_p = byte_patterns[pattern];
10336                        setup_hw_pattern(priv, lmc, pattern_p);
10337                } else {
10338                        setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10339                }
10340
10341                // now loop through all legal values for the DLL byte offset...
10342
10343#define BYTE_OFFSET_INCR 3      // FIXME: make this tunable?
10344
10345                tot_errors = 0;
10346
10347                memset(rank_delay_count, 0, sizeof(rank_delay_count));
10348                memset(rank_delay_start, 0, sizeof(rank_delay_start));
10349                memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
10350                memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
10351
10352                for (byte_offset = -63; byte_offset < 64;
10353                     byte_offset += BYTE_OFFSET_INCR) {
10354                        // do the setup on the active LMC
10355                        // set the bytelanes DLL offsets
10356                        change_dll_offset_enable(priv, lmc, 0);
10357                        // FIXME? bytelane?
10358                        load_dll_offset(priv, lmc, dll_offset_mode,
10359                                        byte_offset, bytelane);
10360                        change_dll_offset_enable(priv, lmc, 1);
10361
10362                        //bdk_watchdog_poke();
10363
10364                        // run the test on each rank
10365                        // only 1 call per rank should be enough, let the
10366                        // bursts, loops, etc, control the load...
10367
10368                        // errors for this byte_offset, all ranks
10369                        off_errors = 0;
10370
10371                        active_ranks = 0;
10372
10373                        for (rankx = 0; rankx < 4; rankx++) {
10374                                if (!(rank_mask & (1 << rankx)))
10375                                        continue;
10376
10377                                phys_addr = hw_rank_offset * active_ranks;
10378                                // FIXME: now done by test_dram_byte_hw()
10379                                //phys_addr |= (lmc << 7);
10380                                //phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT;
10381
10382                                active_ranks++;
10383
10384                                // NOTE: return is a now a bitmask of the
10385                                // erroring bytelanes.
10386                                errors[rankx] =
10387                                    test_dram_byte_hw(priv, lmc, phys_addr,
10388                                                      mode, NULL);
10389
10390                                // process any errors in the bytelane(s) that
10391                                // are being tested
10392                                for (byte = byte_lo; byte <= byte_hi; byte++) {
10393                                        // check errors
10394                                        // yes, an error in the byte lane in
10395                                        // this rank
10396                                        if (errors[rankx] & (1 << byte)) {
10397                                                off_errors |= (1 << byte);
10398
10399                                                debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n",
10400                                                      node, lmc, rankx, byte,
10401                                                      mode_str, byte_offset,
10402                                                      phys_addr);
10403
10404                                                // had started run
10405                                                if (rank_delay_count
10406                                                    [rankx][byte] > 0) {
10407                                                        debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
10408                                                              node, lmc, rankx,
10409                                                              byte, mode_str,
10410                                                              byte_offset);
10411                                                        // stop now
10412                                                        rank_delay_count
10413                                                                [rankx][byte] =
10414                                                                0;
10415                                                }
10416                                                // FIXME: else had not started
10417                                                // run - nothing else to do?
10418                                        } else {
10419                                                // no error in the byte lane
10420                                                // first success, set run start
10421                                                if (rank_delay_count[rankx]
10422                                                    [byte] == 0) {
10423                                                        debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
10424                                                              node, lmc, rankx,
10425                                                              byte, mode_str,
10426                                                              byte_offset);
10427                                                        rank_delay_start[rankx]
10428                                                                [byte] =
10429                                                                byte_offset;
10430                                                }
10431                                                // bump run length
10432                                                rank_delay_count[rankx][byte]
10433                                                        += BYTE_OFFSET_INCR;
10434
10435                                                // is this now the biggest
10436                                                // window?
10437                                                if (rank_delay_count[rankx]
10438                                                    [byte] >
10439                                                    rank_delay_best_count[rankx]
10440                                                    [byte]) {
10441                                                        rank_delay_best_count
10442                                                            [rankx][byte] =
10443                                                            rank_delay_count
10444                                                            [rankx][byte];
10445                                                        rank_delay_best_start
10446                                                            [rankx][byte] =
10447                                                            rank_delay_start
10448                                                            [rankx][byte];
10449                                                        debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
10450                                                              node, lmc, rankx,
10451                                                              byte, mode_str,
10452                                                              byte_offset,
10453                                                              rank_delay_best_start
10454                                                              [rankx][byte],
10455                                                              rank_delay_best_count
10456                                                              [rankx][byte]);
10457                                                }
10458                                        }
10459                                }
10460                        } /* for (rankx = 0; rankx < 4; rankx++) */
10461
10462                        tot_errors |= off_errors;
10463                }
10464
10465                // set the bytelanes DLL offsets all back to 0
10466                change_dll_offset_enable(priv, lmc, 0);
10467                load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane);
10468                change_dll_offset_enable(priv, lmc, 1);
10469
10470                // now choose the best byte_offsets for this pattern
10471                // according to the best windows of the tested ranks
10472                // calculate offset by constructing an average window
10473                // from the rank windows
10474                for (byte = byte_lo; byte <= byte_hi; byte++) {
10475                        pat_beg = -999;
10476                        pat_end = 999;
10477
10478                        for (rankx = 0; rankx < 4; rankx++) {
10479                                if (!(rank_mask & (1 << rankx)))
10480                                        continue;
10481
10482                                rank_beg = rank_delay_best_start[rankx][byte];
10483                                pat_beg = max(pat_beg, rank_beg);
10484                                rank_end = rank_beg +
10485                                        rank_delay_best_count[rankx][byte] -
10486                                        BYTE_OFFSET_INCR;
10487                                pat_end = min(pat_end, rank_end);
10488
10489                                debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test:  Rank Window %3d:%3d\n",
10490                                      node, lmc, rankx, byte, mode_str,
10491                                      rank_beg, rank_end);
10492
10493                        }       /* for (rankx = 0; rankx < 4; rankx++) */
10494
10495                        pat_best_offset[byte] = (pat_end + pat_beg) / 2;
10496
10497                        // sum the pattern averages
10498                        new_best_offset[byte] += pat_best_offset[byte];
10499                }
10500
10501                // now print them on 1 line, descending order...
10502                debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :",
10503                      node, lmc, mode_str, pattern);
10504                for (byte = byte_hi; byte >= byte_lo; --byte)
10505                        debug(" %4d", pat_best_offset[byte]);
10506                debug("\n");
10507        }
10508        // end of pattern loop
10509
10510        debug("N%d.LMC%d: HW DLL %s Offset Average  : ", node, lmc, mode_str);
10511
10512        // print in decending byte index order
10513        for (byte = byte_hi; byte >= byte_lo; --byte) {
10514                // create the new average NINT
10515                new_best_offset[byte] = divide_nint(new_best_offset[byte],
10516                                                    NUM_BYTE_PATTERNS);
10517
10518                // print the best offsets from all patterns
10519
10520                // print just the offset of all the bytes
10521                if (bytelane == 0x0A)
10522                        debug("%4d ", new_best_offset[byte]);
10523                else            // print the bytelanes also
10524                        debug("(byte %d) %4d ", byte, new_best_offset[byte]);
10525
10526                // done with testing, load up the best offsets we found...
10527                // disable offsets while we load...
10528                change_dll_offset_enable(priv, lmc, 0);
10529                load_dll_offset(priv, lmc, dll_offset_mode,
10530                                new_best_offset[byte], byte);
10531                // re-enable the offsets now that we are done loading
10532                change_dll_offset_enable(priv, lmc, 1);
10533        }
10534
10535        debug("\n");
10536}
10537
10538/*
10539 * Automatically adjust the DLL offset for the selected bytelane using
10540 * hardware-assist
10541 */
10542static int perform_HW_dll_offset_tuning(struct ddr_priv *priv,
10543                                        int dll_offset_mode, int bytelane)
10544{
10545        int if_64b;
10546        int save_ecc_ena[4];
10547        union cvmx_lmcx_config lmc_config;
10548        int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv);
10549        const char *s;
10550        int loops = 1, loop;
10551        int by;
10552        u64 dram_tune_rank_offset;
10553        int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
10554        int node = 0;
10555
10556        // see if we want to do the tuning more than once per LMC...
10557        s = env_get("ddr_tune_ecc_loops");
10558        if (s)
10559                loops = strtoul(s, NULL, 0);
10560
10561        // allow override of the test repeats (bursts)
10562        s = env_get("ddr_tune_byte_bursts");
10563        if (s)
10564                dram_tune_byte_bursts = strtoul(s, NULL, 10);
10565
10566        // print current working values
10567        debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
10568              node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS);
10569
10570        // FIXME? get flag from LMC0 only
10571        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));
10572        if_64b = !lmc_config.s.mode32b;
10573
10574        // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10575        dram_tune_rank_offset =
10576            1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena +
10577                     (num_lmcs / 2));
10578
10579        // do once for each active LMC
10580
10581        for (lmc = 0; lmc < num_lmcs; lmc++) {
10582                debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n",
10583                      node, lmc, bytelane);
10584
10585                /* Enable ECC for the HW tests */
10586                // NOTE: we do enable ECC, but the HW tests used will not
10587                // generate "visible" errors
10588                lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10589                save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
10590                lmc_config.s.ecc_ena = 1;
10591                lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10592                lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10593
10594                // testing is done on a single LMC at a time
10595                // FIXME: for now, loop here to show what happens multiple times
10596                for (loop = 0; loop < loops; loop++) {
10597                        /* Perform DLL offset tuning */
10598                        hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc,
10599                                                  bytelane,
10600                                                  if_64b, dram_tune_rank_offset,
10601                                                  dram_tune_byte_bursts);
10602                }
10603
10604                // perform cleanup on active LMC
10605                debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n",
10606                      node, lmc, bytelane);
10607
10608                /* Restore ECC for DRAM tests */
10609                lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10610                lmc_config.s.ecc_ena = save_ecc_ena[lmc];
10611                lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10612                lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10613
10614                // finally, see if there are any read offset overrides
10615                // after tuning
10616                for (by = 0; by < 9; by++) {
10617                        s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by);
10618                        if (s) {
10619                                int dllro = strtoul(s, NULL, 10);
10620
10621                                change_dll_offset_enable(priv, lmc, 0);
10622                                load_dll_offset(priv, lmc, 2, dllro, by);
10623                                change_dll_offset_enable(priv, lmc, 1);
10624                        }
10625                }
10626
10627        }                       /* for (lmc = 0; lmc < num_lmcs; lmc++) */
10628
10629        // finish up...
10630
10631        return 0;
10632
10633}                               /* perform_HW_dll_offset_tuning */
10634
10635// this routine simply makes the calls to the tuning routine and returns
10636// any errors
10637static int cvmx_tune_node(struct ddr_priv *priv)
10638{
10639        int errs, tot_errs;
10640        int do_dllwo = 0;       // default to NO
10641        const char *str;
10642        int node = 0;
10643
10644        // Automatically tune the data and ECC byte DLL read offsets
10645        debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
10646        errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */);
10647        debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n",
10648              node, errs);
10649        tot_errs = errs;
10650
10651        // disabled by default for now, does not seem to be needed?
10652        // Automatically tune the data and ECC byte DLL write offsets
10653        // allow override of default setting
10654        str = env_get("ddr_tune_write_offsets");
10655        if (str)
10656                do_dllwo = !!strtoul(str, NULL, 0);
10657        if (do_dllwo) {
10658                debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
10659                errs =
10660                    perform_HW_dll_offset_tuning(priv, 1,
10661                                                 0x0A /* all bytelanes */);
10662                debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n",
10663                      node, errs);
10664                tot_errs += errs;
10665        }
10666
10667        return tot_errs;
10668}
10669
10670// this routine makes the calls to the tuning routines when criteria are met
10671// intended to be called for automated tuning, to apply filtering...
10672
10673#define IS_DDR4  1
10674#define IS_DDR3  0
10675#define IS_RDIMM 1
10676#define IS_UDIMM 0
10677#define IS_1SLOT 1
10678#define IS_2SLOT 0
10679
10680// FIXME: DDR3 is not tuned
10681static const u32 ddr_speed_filter[2][2][2] = {
10682        [IS_DDR4] = {
10683                     [IS_RDIMM] = {
10684                                   [IS_1SLOT] = 940,
10685                                   [IS_2SLOT] = 800},
10686                     [IS_UDIMM] = {
10687                                   [IS_1SLOT] = 1050,
10688                                   [IS_2SLOT] = 940},
10689                      },
10690        [IS_DDR3] = {
10691                     [IS_RDIMM] = {
10692                                   [IS_1SLOT] = 0,      // disabled
10693                                   [IS_2SLOT] = 0       // disabled
10694                                   },
10695                     [IS_UDIMM] = {
10696                                   [IS_1SLOT] = 0,      // disabled
10697                                   [IS_2SLOT] = 0       // disabled
10698                                }
10699                }
10700};
10701
10702void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed)
10703{
10704        const char *s;
10705        union cvmx_lmcx_config lmc_config;
10706        union cvmx_lmcx_control lmc_control;
10707        union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl;
10708        int is_ddr4;
10709        int is_rdimm;
10710        int is_1slot;
10711        int do_tune = 0;
10712        u32 ddr_min_speed;
10713        int node = 0;
10714
10715        // scale it down from Hz to MHz
10716        ddr_speed = divide_nint(ddr_speed, 1000000);
10717
10718        // FIXME: allow an override here so that all configs can be tuned
10719        // or none
10720        // If the envvar is defined, always either force it or avoid it
10721        // accordingly
10722        s = env_get("ddr_tune_all_configs");
10723        if (s) {
10724                do_tune = !!strtoul(s, NULL, 0);
10725                printf("N%d: DRAM auto-tuning %s.\n", node,
10726                       (do_tune) ? "forced" : "disabled");
10727                if (do_tune)
10728                        cvmx_tune_node(priv);
10729
10730                return;
10731        }
10732
10733        // filter the tuning calls here...
10734        // determine if we should/can run automatically for this configuration
10735        //
10736        // FIXME: tune only when the configuration indicates it will help:
10737        //    DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
10738        //
10739        lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));     // sample LMC0
10740        lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0));   // sample LMC0
10741        // sample LMC0
10742        lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10743
10744        is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0);
10745        is_rdimm = (lmc_control.s.rdimm_ena != 0);
10746        // HACK, should do better
10747        is_1slot = (lmc_config.s.init_status < 4);
10748
10749        ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
10750        do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed));
10751
10752        debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
10753              node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U',
10754              (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not");
10755
10756        // call the tuning routine, filtering is done...
10757        if (do_tune)
10758                cvmx_tune_node(priv);
10759}
10760
10761/*
10762 * first pattern example:
10763 * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10764 * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10765 * GENERAL_PURPOSE0.DATA == 16'h0000;
10766 */
10767
10768static const u64 dbi_pattern[3] = {
10769        0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
10770
10771// Perform switchover to DBI
10772static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc)
10773{
10774        union cvmx_lmcx_modereg_params0 modereg_params0;
10775        union cvmx_lmcx_modereg_params3 modereg_params3;
10776        union cvmx_lmcx_phy_ctl phy_ctl;
10777        union cvmx_lmcx_config lmcx_config;
10778        union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
10779        int rank_mask, rankx, active_ranks;
10780        u64 phys_addr, rank_offset;
10781        int num_lmcs, errors;
10782        int dbi_settings[9], byte, unlocked, retries;
10783        int ecc_ena;
10784        int rank_max = 1;       // FIXME: make this 4 to try all the ranks
10785        int node = 0;
10786
10787        ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10788
10789        lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10790        rank_mask = lmcx_config.s.init_status;
10791        ecc_ena = lmcx_config.s.ecc_ena;
10792
10793        // FIXME: must filter out any non-supported configs
10794        //        ie, no DDR3, no x4 devices
10795        if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) {
10796                debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
10797                      node, lmc);
10798                return;
10799        }
10800
10801        // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10802        num_lmcs = cvmx_dram_get_num_lmc(priv);
10803        rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb -
10804                               lmcx_config.s.rank_ena + (num_lmcs / 2));
10805
10806        debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
10807              node, lmc, rank_mask, (unsigned long long)rank_offset);
10808
10809        /*
10810         * 1. conduct the current init sequence as usual all the way
10811         * after software write leveling.
10812         */
10813
10814        read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10815
10816        display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
10817                                 " INIT");
10818
10819        /*
10820         * 2. set DBI related CSRs as below and issue MR write.
10821         * MODEREG_PARAMS3.WR_DBI=1
10822         * MODEREG_PARAMS3.RD_DBI=1
10823         * PHY_CTL.DBI_MODE_ENA=1
10824         */
10825        modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc));
10826
10827        modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc));
10828        modereg_params3.s.wr_dbi = 1;
10829        modereg_params3.s.rd_dbi = 1;
10830        lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64);
10831
10832        phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc));
10833        phy_ctl.s.dbi_mode_ena = 1;
10834        lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64);
10835
10836        /*
10837         * there are two options for data to send.  Lets start with (1)
10838         * and could move to (2) in the future:
10839         *
10840         * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where
10841         * this does not exist) set data directly in these reigsters.
10842         * this will yield a clk/2 pattern:
10843         * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10844         * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10845         * GENERAL_PURPOSE0.DATA == 16'h0000;
10846         * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10847         * here data comes from the LFSR generating a PRBS pattern
10848         * CHAR_CTL.EN = 0
10849         * CHAR_CTL.SEL = 0; // for PRBS
10850         * CHAR_CTL.DR = 1;
10851         * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10852         * CHAR_CTL.SKEW_ON = 1;
10853         */
10854        lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
10855        lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
10856        lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
10857
10858        /*
10859         * 3. adjust cas_latency (only necessary if RD_DBI is set).
10860         * here is my code for doing this:
10861         *
10862         * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
10863         * case (csr_model.MODEREG_PARAMS0.CL.value)
10864         * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2;
10865         * // CL 9-13 -> 11-15
10866         * 5: begin
10867         * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10868         * if((csr_model.MODEREG_PARAMS0.CWL.value==1 ||
10869         * csr_model.MODEREG_PARAMS0.CWL.value==3))
10870         * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
10871         * else
10872         * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
10873         * end
10874         * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
10875         * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
10876         * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
10877         * default:
10878         * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1,
10879         * I am not sure what to do.",
10880         * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
10881         * endcase
10882         * end
10883         */
10884
10885        if (modereg_params3.s.rd_dbi == 1) {
10886                int old_cl, new_cl, old_cwl;
10887
10888                old_cl = modereg_params0.s.cl;
10889                old_cwl = modereg_params0.s.cwl;
10890
10891                switch (old_cl) {
10892                case 0:
10893                case 1:
10894                case 2:
10895                case 3:
10896                case 4:
10897                        new_cl = old_cl + 2;
10898                        break;  // 9-13->11-15
10899                        // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10900                case 5:
10901                        new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13;
10902                        break;
10903                case 6:
10904                        new_cl = 8;
10905                        break;  // 15->18
10906                case 7:
10907                        new_cl = 14;
10908                        break;  // 16->19
10909                case 8:
10910                        new_cl = 15;
10911                        break;  // 18->21
10912                default:
10913                        printf("ERROR: Bad CL value (%d) for DBI switchover.\n",
10914                               old_cl);
10915                        // FIXME: need to error exit here...
10916                        old_cl = -1;
10917                        new_cl = -1;
10918                        break;
10919                }
10920                debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
10921                      node, lmc, old_cl, old_cwl, new_cl);
10922                modereg_params0.s.cl = new_cl;
10923                lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc),
10924                       modereg_params0.u64);
10925        }
10926
10927        /*
10928         * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence
10929         * SEQ_CTL[SEQ_SEL] = MRW.
10930         */
10931        // Use the default values, from the CSRs fields
10932        // also, do B-sides for RDIMMs...
10933
10934        for (rankx = 0; rankx < 4; rankx++) {
10935                if (!(rank_mask & (1 << rankx)))
10936                        continue;
10937
10938                // for RDIMMs, B-side writes should get done automatically
10939                // when the A-side is written
10940                ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10941                         0 /*MRreg */, 0 /*A-side */);  /* MR0 */
10942                ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10943                         5 /*MRreg */, 0 /*A-side */);  /* MR5 */
10944        }
10945
10946        /*
10947         * 5. conduct DBI bit deskew training via the General Purpose
10948         * R/W sequence (dbtrain). may need to run this over and over to get
10949         * a lock (I need up to 5 in simulation):
10950         * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
10951         * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
10952         * DBTRAIN_CTL.READ_CMD_COUNT = all 1's
10953         * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
10954         * DBTRAIN_CTL.RW_TRAIN = 1
10955         * DBTRAIN_CTL.READ_DQ_COUNT = dont care
10956         * DBTRAIN_CTL.WRITE_ENA = 1;
10957         * DBTRAIN_CTL.ACTIVATE = 1;
10958         * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a
10959         * valid address
10960         */
10961
10962        // NOW - do the training
10963        debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc);
10964
10965        active_ranks = 0;
10966        for (rankx = 0; rankx < rank_max; rankx++) {
10967                if (!(rank_mask & (1 << rankx)))
10968                        continue;
10969
10970                phys_addr = rank_offset * active_ranks;
10971                // FIXME: now done by test_dram_byte_hw()
10972
10973                active_ranks++;
10974
10975                retries = 0;
10976
10977restart_training:
10978
10979                // NOTE: return is a bitmask of the erroring bytelanes -
10980                // we only print it
10981                errors =
10982                    test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL);
10983
10984                debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
10985                      node, lmc, rankx, (unsigned long long)phys_addr, errors);
10986
10987                // NEXT - check for locking
10988                unlocked = 0;
10989                read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10990
10991                for (byte = 0; byte < (8 + ecc_ena); byte++)
10992                        unlocked += (dbi_settings[byte] & 1) ^ 1;
10993
10994                // FIXME: print out the DBI settings array after each rank?
10995                if (rank_max > 1)       // only when doing more than 1 rank
10996                        display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena,
10997                                                 dbi_settings, " RANK");
10998
10999                if (unlocked > 0) {
11000                        debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
11001                              node, lmc, unlocked);
11002                        retries++;
11003                        if (retries < 10) {
11004                                goto restart_training;
11005                        } else {
11006                                debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
11007                                      node, lmc, retries);
11008                        }
11009                }
11010        }                       /* for (rankx = 0; rankx < 4; rankx++) */
11011
11012        // print out the final DBI settings array
11013        display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
11014                                 "FINAL");
11015}
11016
11017void cvmx_dbi_switchover(struct ddr_priv *priv)
11018{
11019        int lmc;
11020        int num_lmcs = cvmx_dram_get_num_lmc(priv);
11021
11022        for (lmc = 0; lmc < num_lmcs; lmc++)
11023                cvmx_dbi_switchover_interface(priv, lmc);
11024}
11025