uboot/drivers/ddr/altera/sequencer.c
<<
>>
Prefs
   1/*
   2 * Copyright Altera Corporation (C) 2012-2015
   3 *
   4 * SPDX-License-Identifier:    BSD-3-Clause
   5 */
   6
   7#include <common.h>
   8#include <asm/io.h>
   9#include <asm/arch/sdram.h>
  10#include <errno.h>
  11#include "sequencer.h"
  12
  13static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
  14        (struct socfpga_sdr_rw_load_manager *)
  15                (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
  16static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
  17        (struct socfpga_sdr_rw_load_jump_manager *)
  18                (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
  19static struct socfpga_sdr_reg_file *sdr_reg_file =
  20        (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
  21static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
  22        (struct socfpga_sdr_scc_mgr *)
  23                (SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
  24static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
  25        (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
  26static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
  27        (struct socfpga_phy_mgr_cfg *)
  28                (SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
  29static struct socfpga_data_mgr *data_mgr =
  30        (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
  31static struct socfpga_sdr_ctrl *sdr_ctrl =
  32        (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
  33
  34const struct socfpga_sdram_rw_mgr_config *rwcfg;
  35const struct socfpga_sdram_io_config *iocfg;
  36const struct socfpga_sdram_misc_config *misccfg;
  37
  38#define DELTA_D         1
  39
  40/*
  41 * In order to reduce ROM size, most of the selectable calibration steps are
  42 * decided at compile time based on the user's calibration mode selection,
  43 * as captured by the STATIC_CALIB_STEPS selection below.
  44 *
  45 * However, to support simulation-time selection of fast simulation mode, where
  46 * we skip everything except the bare minimum, we need a few of the steps to
  47 * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
  48 * check, which is based on the rtl-supplied value, or we dynamically compute
  49 * the value to use based on the dynamically-chosen calibration mode
  50 */
  51
  52#define DLEVEL 0
  53#define STATIC_IN_RTL_SIM 0
  54#define STATIC_SKIP_DELAY_LOOPS 0
  55
  56#define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
  57        STATIC_SKIP_DELAY_LOOPS)
  58
  59/* calibration steps requested by the rtl */
  60static u16 dyn_calib_steps;
  61
  62/*
  63 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
  64 * instead of static, we use boolean logic to select between
  65 * non-skip and skip values
  66 *
  67 * The mask is set to include all bits when not-skipping, but is
  68 * zero when skipping
  69 */
  70
  71static u16 skip_delay_mask;     /* mask off bits when skipping/not-skipping */
  72
  73#define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
  74        ((non_skip_value) & skip_delay_mask)
  75
  76static struct gbl_type *gbl;
  77static struct param_type *param;
  78
  79static void set_failing_group_stage(u32 group, u32 stage,
  80        u32 substage)
  81{
  82        /*
  83         * Only set the global stage if there was not been any other
  84         * failing group
  85         */
  86        if (gbl->error_stage == CAL_STAGE_NIL)  {
  87                gbl->error_substage = substage;
  88                gbl->error_stage = stage;
  89                gbl->error_group = group;
  90        }
  91}
  92
  93static void reg_file_set_group(u16 set_group)
  94{
  95        clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
  96}
  97
  98static void reg_file_set_stage(u8 set_stage)
  99{
 100        clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
 101}
 102
 103static void reg_file_set_sub_stage(u8 set_sub_stage)
 104{
 105        set_sub_stage &= 0xff;
 106        clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
 107}
 108
 109/**
 110 * phy_mgr_initialize() - Initialize PHY Manager
 111 *
 112 * Initialize PHY Manager.
 113 */
 114static void phy_mgr_initialize(void)
 115{
 116        u32 ratio;
 117
 118        debug("%s:%d\n", __func__, __LINE__);
 119        /* Calibration has control over path to memory */
 120        /*
 121         * In Hard PHY this is a 2-bit control:
 122         * 0: AFI Mux Select
 123         * 1: DDIO Mux Select
 124         */
 125        writel(0x3, &phy_mgr_cfg->mux_sel);
 126
 127        /* USER memory clock is not stable we begin initialization  */
 128        writel(0, &phy_mgr_cfg->reset_mem_stbl);
 129
 130        /* USER calibration status all set to zero */
 131        writel(0, &phy_mgr_cfg->cal_status);
 132
 133        writel(0, &phy_mgr_cfg->cal_debug_info);
 134
 135        /* Init params only if we do NOT skip calibration. */
 136        if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL)
 137                return;
 138
 139        ratio = rwcfg->mem_dq_per_read_dqs /
 140                rwcfg->mem_virtual_groups_per_read_dqs;
 141        param->read_correct_mask_vg = (1 << ratio) - 1;
 142        param->write_correct_mask_vg = (1 << ratio) - 1;
 143        param->read_correct_mask = (1 << rwcfg->mem_dq_per_read_dqs) - 1;
 144        param->write_correct_mask = (1 << rwcfg->mem_dq_per_write_dqs) - 1;
 145}
 146
 147/**
 148 * set_rank_and_odt_mask() - Set Rank and ODT mask
 149 * @rank:       Rank mask
 150 * @odt_mode:   ODT mode, OFF or READ_WRITE
 151 *
 152 * Set Rank and ODT mask (On-Die Termination).
 153 */
 154static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode)
 155{
 156        u32 odt_mask_0 = 0;
 157        u32 odt_mask_1 = 0;
 158        u32 cs_and_odt_mask;
 159
 160        if (odt_mode == RW_MGR_ODT_MODE_OFF) {
 161                odt_mask_0 = 0x0;
 162                odt_mask_1 = 0x0;
 163        } else {        /* RW_MGR_ODT_MODE_READ_WRITE */
 164                switch (rwcfg->mem_number_of_ranks) {
 165                case 1: /* 1 Rank */
 166                        /* Read: ODT = 0 ; Write: ODT = 1 */
 167                        odt_mask_0 = 0x0;
 168                        odt_mask_1 = 0x1;
 169                        break;
 170                case 2: /* 2 Ranks */
 171                        if (rwcfg->mem_number_of_cs_per_dimm == 1) {
 172                                /*
 173                                 * - Dual-Slot , Single-Rank (1 CS per DIMM)
 174                                 *   OR
 175                                 * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM)
 176                                 *
 177                                 * Since MEM_NUMBER_OF_RANKS is 2, they
 178                                 * are both single rank with 2 CS each
 179                                 * (special for RDIMM).
 180                                 *
 181                                 * Read: Turn on ODT on the opposite rank
 182                                 * Write: Turn on ODT on all ranks
 183                                 */
 184                                odt_mask_0 = 0x3 & ~(1 << rank);
 185                                odt_mask_1 = 0x3;
 186                        } else {
 187                                /*
 188                                 * - Single-Slot , Dual-Rank (2 CS per DIMM)
 189                                 *
 190                                 * Read: Turn on ODT off on all ranks
 191                                 * Write: Turn on ODT on active rank
 192                                 */
 193                                odt_mask_0 = 0x0;
 194                                odt_mask_1 = 0x3 & (1 << rank);
 195                        }
 196                        break;
 197                case 4: /* 4 Ranks */
 198                        /* Read:
 199                         * ----------+-----------------------+
 200                         *           |         ODT           |
 201                         * Read From +-----------------------+
 202                         *   Rank    |  3  |  2  |  1  |  0  |
 203                         * ----------+-----+-----+-----+-----+
 204                         *     0     |  0  |  1  |  0  |  0  |
 205                         *     1     |  1  |  0  |  0  |  0  |
 206                         *     2     |  0  |  0  |  0  |  1  |
 207                         *     3     |  0  |  0  |  1  |  0  |
 208                         * ----------+-----+-----+-----+-----+
 209                         *
 210                         * Write:
 211                         * ----------+-----------------------+
 212                         *           |         ODT           |
 213                         * Write To  +-----------------------+
 214                         *   Rank    |  3  |  2  |  1  |  0  |
 215                         * ----------+-----+-----+-----+-----+
 216                         *     0     |  0  |  1  |  0  |  1  |
 217                         *     1     |  1  |  0  |  1  |  0  |
 218                         *     2     |  0  |  1  |  0  |  1  |
 219                         *     3     |  1  |  0  |  1  |  0  |
 220                         * ----------+-----+-----+-----+-----+
 221                         */
 222                        switch (rank) {
 223                        case 0:
 224                                odt_mask_0 = 0x4;
 225                                odt_mask_1 = 0x5;
 226                                break;
 227                        case 1:
 228                                odt_mask_0 = 0x8;
 229                                odt_mask_1 = 0xA;
 230                                break;
 231                        case 2:
 232                                odt_mask_0 = 0x1;
 233                                odt_mask_1 = 0x5;
 234                                break;
 235                        case 3:
 236                                odt_mask_0 = 0x2;
 237                                odt_mask_1 = 0xA;
 238                                break;
 239                        }
 240                        break;
 241                }
 242        }
 243
 244        cs_and_odt_mask = (0xFF & ~(1 << rank)) |
 245                          ((0xFF & odt_mask_0) << 8) |
 246                          ((0xFF & odt_mask_1) << 16);
 247        writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
 248                                RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
 249}
 250
 251/**
 252 * scc_mgr_set() - Set SCC Manager register
 253 * @off:        Base offset in SCC Manager space
 254 * @grp:        Read/Write group
 255 * @val:        Value to be set
 256 *
 257 * This function sets the SCC Manager (Scan Chain Control Manager) register.
 258 */
 259static void scc_mgr_set(u32 off, u32 grp, u32 val)
 260{
 261        writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
 262}
 263
 264/**
 265 * scc_mgr_initialize() - Initialize SCC Manager registers
 266 *
 267 * Initialize SCC Manager registers.
 268 */
 269static void scc_mgr_initialize(void)
 270{
 271        /*
 272         * Clear register file for HPS. 16 (2^4) is the size of the
 273         * full register file in the scc mgr:
 274         *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
 275         *                             MEM_IF_READ_DQS_WIDTH - 1);
 276         */
 277        int i;
 278
 279        for (i = 0; i < 16; i++) {
 280                debug_cond(DLEVEL >= 1, "%s:%d: Clearing SCC RFILE index %u\n",
 281                           __func__, __LINE__, i);
 282                scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, i, 0);
 283        }
 284}
 285
 286static void scc_mgr_set_dqdqs_output_phase(u32 write_group, u32 phase)
 287{
 288        scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
 289}
 290
 291static void scc_mgr_set_dqs_bus_in_delay(u32 read_group, u32 delay)
 292{
 293        scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
 294}
 295
 296static void scc_mgr_set_dqs_en_phase(u32 read_group, u32 phase)
 297{
 298        scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
 299}
 300
 301static void scc_mgr_set_dqs_en_delay(u32 read_group, u32 delay)
 302{
 303        scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
 304}
 305
 306static void scc_mgr_set_dq_in_delay(u32 dq_in_group, u32 delay)
 307{
 308        scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
 309}
 310
 311static void scc_mgr_set_dqs_io_in_delay(u32 delay)
 312{
 313        scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs,
 314                    delay);
 315}
 316
 317static void scc_mgr_set_dm_in_delay(u32 dm, u32 delay)
 318{
 319        scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET,
 320                    rwcfg->mem_dq_per_write_dqs + 1 + dm,
 321                    delay);
 322}
 323
 324static void scc_mgr_set_dq_out1_delay(u32 dq_in_group, u32 delay)
 325{
 326        scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
 327}
 328
 329static void scc_mgr_set_dqs_out1_delay(u32 delay)
 330{
 331        scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs,
 332                    delay);
 333}
 334
 335static void scc_mgr_set_dm_out1_delay(u32 dm, u32 delay)
 336{
 337        scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
 338                    rwcfg->mem_dq_per_write_dqs + 1 + dm,
 339                    delay);
 340}
 341
 342/* load up dqs config settings */
 343static void scc_mgr_load_dqs(u32 dqs)
 344{
 345        writel(dqs, &sdr_scc_mgr->dqs_ena);
 346}
 347
 348/* load up dqs io config settings */
 349static void scc_mgr_load_dqs_io(void)
 350{
 351        writel(0, &sdr_scc_mgr->dqs_io_ena);
 352}
 353
 354/* load up dq config settings */
 355static void scc_mgr_load_dq(u32 dq_in_group)
 356{
 357        writel(dq_in_group, &sdr_scc_mgr->dq_ena);
 358}
 359
 360/* load up dm config settings */
 361static void scc_mgr_load_dm(u32 dm)
 362{
 363        writel(dm, &sdr_scc_mgr->dm_ena);
 364}
 365
 366/**
 367 * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
 368 * @off:        Base offset in SCC Manager space
 369 * @grp:        Read/Write group
 370 * @val:        Value to be set
 371 * @update:     If non-zero, trigger SCC Manager update for all ranks
 372 *
 373 * This function sets the SCC Manager (Scan Chain Control Manager) register
 374 * and optionally triggers the SCC update for all ranks.
 375 */
 376static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
 377                                  const int update)
 378{
 379        u32 r;
 380
 381        for (r = 0; r < rwcfg->mem_number_of_ranks;
 382             r += NUM_RANKS_PER_SHADOW_REG) {
 383                scc_mgr_set(off, grp, val);
 384
 385                if (update || (r == 0)) {
 386                        writel(grp, &sdr_scc_mgr->dqs_ena);
 387                        writel(0, &sdr_scc_mgr->update);
 388                }
 389        }
 390}
 391
 392static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
 393{
 394        /*
 395         * USER although the h/w doesn't support different phases per
 396         * shadow register, for simplicity our scc manager modeling
 397         * keeps different phase settings per shadow reg, and it's
 398         * important for us to keep them in sync to match h/w.
 399         * for efficiency, the scan chain update should occur only
 400         * once to sr0.
 401         */
 402        scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
 403                              read_group, phase, 0);
 404}
 405
 406static void scc_mgr_set_dqdqs_output_phase_all_ranks(u32 write_group,
 407                                                     u32 phase)
 408{
 409        /*
 410         * USER although the h/w doesn't support different phases per
 411         * shadow register, for simplicity our scc manager modeling
 412         * keeps different phase settings per shadow reg, and it's
 413         * important for us to keep them in sync to match h/w.
 414         * for efficiency, the scan chain update should occur only
 415         * once to sr0.
 416         */
 417        scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
 418                              write_group, phase, 0);
 419}
 420
 421static void scc_mgr_set_dqs_en_delay_all_ranks(u32 read_group,
 422                                               u32 delay)
 423{
 424        /*
 425         * In shadow register mode, the T11 settings are stored in
 426         * registers in the core, which are updated by the DQS_ENA
 427         * signals. Not issuing the SCC_MGR_UPD command allows us to
 428         * save lots of rank switching overhead, by calling
 429         * select_shadow_regs_for_update with update_scan_chains
 430         * set to 0.
 431         */
 432        scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
 433                              read_group, delay, 1);
 434}
 435
 436/**
 437 * scc_mgr_set_oct_out1_delay() - Set OCT output delay
 438 * @write_group:        Write group
 439 * @delay:              Delay value
 440 *
 441 * This function sets the OCT output delay in SCC manager.
 442 */
 443static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
 444{
 445        const int ratio = rwcfg->mem_if_read_dqs_width /
 446                          rwcfg->mem_if_write_dqs_width;
 447        const int base = write_group * ratio;
 448        int i;
 449        /*
 450         * Load the setting in the SCC manager
 451         * Although OCT affects only write data, the OCT delay is controlled
 452         * by the DQS logic block which is instantiated once per read group.
 453         * For protocols where a write group consists of multiple read groups,
 454         * the setting must be set multiple times.
 455         */
 456        for (i = 0; i < ratio; i++)
 457                scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
 458}
 459
 460/**
 461 * scc_mgr_set_hhp_extras() - Set HHP extras.
 462 *
 463 * Load the fixed setting in the SCC manager HHP extras.
 464 */
 465static void scc_mgr_set_hhp_extras(void)
 466{
 467        /*
 468         * Load the fixed setting in the SCC manager
 469         * bits: 0:0 = 1'b1     - DQS bypass
 470         * bits: 1:1 = 1'b1     - DQ bypass
 471         * bits: 4:2 = 3'b001   - rfifo_mode
 472         * bits: 6:5 = 2'b01    - rfifo clock_select
 473         * bits: 7:7 = 1'b0     - separate gating from ungating setting
 474         * bits: 8:8 = 1'b0     - separate OE from Output delay setting
 475         */
 476        const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
 477                          (1 << 2) | (1 << 1) | (1 << 0);
 478        const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
 479                         SCC_MGR_HHP_GLOBALS_OFFSET |
 480                         SCC_MGR_HHP_EXTRAS_OFFSET;
 481
 482        debug_cond(DLEVEL >= 1, "%s:%d Setting HHP Extras\n",
 483                   __func__, __LINE__);
 484        writel(value, addr);
 485        debug_cond(DLEVEL >= 1, "%s:%d Done Setting HHP Extras\n",
 486                   __func__, __LINE__);
 487}
 488
 489/**
 490 * scc_mgr_zero_all() - Zero all DQS config
 491 *
 492 * Zero all DQS config.
 493 */
 494static void scc_mgr_zero_all(void)
 495{
 496        int i, r;
 497
 498        /*
 499         * USER Zero all DQS config settings, across all groups and all
 500         * shadow registers
 501         */
 502        for (r = 0; r < rwcfg->mem_number_of_ranks;
 503             r += NUM_RANKS_PER_SHADOW_REG) {
 504                for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
 505                        /*
 506                         * The phases actually don't exist on a per-rank basis,
 507                         * but there's no harm updating them several times, so
 508                         * let's keep the code simple.
 509                         */
 510                        scc_mgr_set_dqs_bus_in_delay(i, iocfg->dqs_in_reserve);
 511                        scc_mgr_set_dqs_en_phase(i, 0);
 512                        scc_mgr_set_dqs_en_delay(i, 0);
 513                }
 514
 515                for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) {
 516                        scc_mgr_set_dqdqs_output_phase(i, 0);
 517                        /* Arria V/Cyclone V don't have out2. */
 518                        scc_mgr_set_oct_out1_delay(i, iocfg->dqs_out_reserve);
 519                }
 520        }
 521
 522        /* Multicast to all DQS group enables. */
 523        writel(0xff, &sdr_scc_mgr->dqs_ena);
 524        writel(0, &sdr_scc_mgr->update);
 525}
 526
 527/**
 528 * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
 529 * @write_group:        Write group
 530 *
 531 * Set bypass mode and trigger SCC update.
 532 */
 533static void scc_set_bypass_mode(const u32 write_group)
 534{
 535        /* Multicast to all DQ enables. */
 536        writel(0xff, &sdr_scc_mgr->dq_ena);
 537        writel(0xff, &sdr_scc_mgr->dm_ena);
 538
 539        /* Update current DQS IO enable. */
 540        writel(0, &sdr_scc_mgr->dqs_io_ena);
 541
 542        /* Update the DQS logic. */
 543        writel(write_group, &sdr_scc_mgr->dqs_ena);
 544
 545        /* Hit update. */
 546        writel(0, &sdr_scc_mgr->update);
 547}
 548
 549/**
 550 * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
 551 * @write_group:        Write group
 552 *
 553 * Load DQS settings for Write Group, do not trigger SCC update.
 554 */
 555static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
 556{
 557        const int ratio = rwcfg->mem_if_read_dqs_width /
 558                          rwcfg->mem_if_write_dqs_width;
 559        const int base = write_group * ratio;
 560        int i;
 561        /*
 562         * Load the setting in the SCC manager
 563         * Although OCT affects only write data, the OCT delay is controlled
 564         * by the DQS logic block which is instantiated once per read group.
 565         * For protocols where a write group consists of multiple read groups,
 566         * the setting must be set multiple times.
 567         */
 568        for (i = 0; i < ratio; i++)
 569                writel(base + i, &sdr_scc_mgr->dqs_ena);
 570}
 571
 572/**
 573 * scc_mgr_zero_group() - Zero all configs for a group
 574 *
 575 * Zero DQ, DM, DQS and OCT configs for a group.
 576 */
 577static void scc_mgr_zero_group(const u32 write_group, const int out_only)
 578{
 579        int i, r;
 580
 581        for (r = 0; r < rwcfg->mem_number_of_ranks;
 582             r += NUM_RANKS_PER_SHADOW_REG) {
 583                /* Zero all DQ config settings. */
 584                for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) {
 585                        scc_mgr_set_dq_out1_delay(i, 0);
 586                        if (!out_only)
 587                                scc_mgr_set_dq_in_delay(i, 0);
 588                }
 589
 590                /* Multicast to all DQ enables. */
 591                writel(0xff, &sdr_scc_mgr->dq_ena);
 592
 593                /* Zero all DM config settings. */
 594                for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
 595                        if (!out_only)
 596                                scc_mgr_set_dm_in_delay(i, 0);
 597                        scc_mgr_set_dm_out1_delay(i, 0);
 598                }
 599
 600                /* Multicast to all DM enables. */
 601                writel(0xff, &sdr_scc_mgr->dm_ena);
 602
 603                /* Zero all DQS IO settings. */
 604                if (!out_only)
 605                        scc_mgr_set_dqs_io_in_delay(0);
 606
 607                /* Arria V/Cyclone V don't have out2. */
 608                scc_mgr_set_dqs_out1_delay(iocfg->dqs_out_reserve);
 609                scc_mgr_set_oct_out1_delay(write_group, iocfg->dqs_out_reserve);
 610                scc_mgr_load_dqs_for_write_group(write_group);
 611
 612                /* Multicast to all DQS IO enables (only 1 in total). */
 613                writel(0, &sdr_scc_mgr->dqs_io_ena);
 614
 615                /* Hit update to zero everything. */
 616                writel(0, &sdr_scc_mgr->update);
 617        }
 618}
 619
 620/*
 621 * apply and load a particular input delay for the DQ pins in a group
 622 * group_bgn is the index of the first dq pin (in the write group)
 623 */
 624static void scc_mgr_apply_group_dq_in_delay(u32 group_bgn, u32 delay)
 625{
 626        u32 i, p;
 627
 628        for (i = 0, p = group_bgn; i < rwcfg->mem_dq_per_read_dqs; i++, p++) {
 629                scc_mgr_set_dq_in_delay(p, delay);
 630                scc_mgr_load_dq(p);
 631        }
 632}
 633
 634/**
 635 * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
 636 * @delay:              Delay value
 637 *
 638 * Apply and load a particular output delay for the DQ pins in a group.
 639 */
 640static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
 641{
 642        int i;
 643
 644        for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) {
 645                scc_mgr_set_dq_out1_delay(i, delay);
 646                scc_mgr_load_dq(i);
 647        }
 648}
 649
 650/* apply and load a particular output delay for the DM pins in a group */
 651static void scc_mgr_apply_group_dm_out1_delay(u32 delay1)
 652{
 653        u32 i;
 654
 655        for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
 656                scc_mgr_set_dm_out1_delay(i, delay1);
 657                scc_mgr_load_dm(i);
 658        }
 659}
 660
 661
 662/* apply and load delay on both DQS and OCT out1 */
 663static void scc_mgr_apply_group_dqs_io_and_oct_out1(u32 write_group,
 664                                                    u32 delay)
 665{
 666        scc_mgr_set_dqs_out1_delay(delay);
 667        scc_mgr_load_dqs_io();
 668
 669        scc_mgr_set_oct_out1_delay(write_group, delay);
 670        scc_mgr_load_dqs_for_write_group(write_group);
 671}
 672
 673/**
 674 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT
 675 * @write_group:        Write group
 676 * @delay:              Delay value
 677 *
 678 * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
 679 */
 680static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
 681                                                  const u32 delay)
 682{
 683        u32 i, new_delay;
 684
 685        /* DQ shift */
 686        for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++)
 687                scc_mgr_load_dq(i);
 688
 689        /* DM shift */
 690        for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
 691                scc_mgr_load_dm(i);
 692
 693        /* DQS shift */
 694        new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
 695        if (new_delay > iocfg->io_out2_delay_max) {
 696                debug_cond(DLEVEL >= 1,
 697                           "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
 698                           __func__, __LINE__, write_group, delay, new_delay,
 699                           iocfg->io_out2_delay_max,
 700                           new_delay - iocfg->io_out2_delay_max);
 701                new_delay -= iocfg->io_out2_delay_max;
 702                scc_mgr_set_dqs_out1_delay(new_delay);
 703        }
 704
 705        scc_mgr_load_dqs_io();
 706
 707        /* OCT shift */
 708        new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
 709        if (new_delay > iocfg->io_out2_delay_max) {
 710                debug_cond(DLEVEL >= 1,
 711                           "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
 712                           __func__, __LINE__, write_group, delay,
 713                           new_delay, iocfg->io_out2_delay_max,
 714                           new_delay - iocfg->io_out2_delay_max);
 715                new_delay -= iocfg->io_out2_delay_max;
 716                scc_mgr_set_oct_out1_delay(write_group, new_delay);
 717        }
 718
 719        scc_mgr_load_dqs_for_write_group(write_group);
 720}
 721
 722/**
 723 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks
 724 * @write_group:        Write group
 725 * @delay:              Delay value
 726 *
 727 * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
 728 */
 729static void
 730scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group,
 731                                                const u32 delay)
 732{
 733        int r;
 734
 735        for (r = 0; r < rwcfg->mem_number_of_ranks;
 736             r += NUM_RANKS_PER_SHADOW_REG) {
 737                scc_mgr_apply_group_all_out_delay_add(write_group, delay);
 738                writel(0, &sdr_scc_mgr->update);
 739        }
 740}
 741
 742/**
 743 * set_jump_as_return() - Return instruction optimization
 744 *
 745 * Optimization used to recover some slots in ddr3 inst_rom could be
 746 * applied to other protocols if we wanted to
 747 */
 748static void set_jump_as_return(void)
 749{
 750        /*
 751         * To save space, we replace return with jump to special shared
 752         * RETURN instruction so we set the counter to large value so that
 753         * we always jump.
 754         */
 755        writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
 756        writel(rwcfg->rreturn, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
 757}
 758
 759/**
 760 * delay_for_n_mem_clocks() - Delay for N memory clocks
 761 * @clocks:     Length of the delay
 762 *
 763 * Delay for N memory clocks.
 764 */
 765static void delay_for_n_mem_clocks(const u32 clocks)
 766{
 767        u32 afi_clocks;
 768        u16 c_loop;
 769        u8 inner;
 770        u8 outer;
 771
 772        debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
 773
 774        /* Scale (rounding up) to get afi clocks. */
 775        afi_clocks = DIV_ROUND_UP(clocks, misccfg->afi_rate_ratio);
 776        if (afi_clocks) /* Temporary underflow protection */
 777                afi_clocks--;
 778
 779        /*
 780         * Note, we don't bother accounting for being off a little
 781         * bit because of a few extra instructions in outer loops.
 782         * Note, the loops have a test at the end, and do the test
 783         * before the decrement, and so always perform the loop
 784         * 1 time more than the counter value
 785         */
 786        c_loop = afi_clocks >> 16;
 787        outer = c_loop ? 0xff : (afi_clocks >> 8);
 788        inner = outer ? 0xff : afi_clocks;
 789
 790        /*
 791         * rom instructions are structured as follows:
 792         *
 793         *    IDLE_LOOP2: jnz cntr0, TARGET_A
 794         *    IDLE_LOOP1: jnz cntr1, TARGET_B
 795         *                return
 796         *
 797         * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
 798         * TARGET_B is set to IDLE_LOOP2 as well
 799         *
 800         * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
 801         * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
 802         *
 803         * a little confusing, but it helps save precious space in the inst_rom
 804         * and sequencer rom and keeps the delays more accurate and reduces
 805         * overhead
 806         */
 807        if (afi_clocks < 0x100) {
 808                writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
 809                       &sdr_rw_load_mgr_regs->load_cntr1);
 810
 811                writel(rwcfg->idle_loop1,
 812                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
 813
 814                writel(rwcfg->idle_loop1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
 815                                          RW_MGR_RUN_SINGLE_GROUP_OFFSET);
 816        } else {
 817                writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
 818                       &sdr_rw_load_mgr_regs->load_cntr0);
 819
 820                writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
 821                       &sdr_rw_load_mgr_regs->load_cntr1);
 822
 823                writel(rwcfg->idle_loop2,
 824                       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
 825
 826                writel(rwcfg->idle_loop2,
 827                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
 828
 829                do {
 830                        writel(rwcfg->idle_loop2,
 831                               SDR_PHYGRP_RWMGRGRP_ADDRESS |
 832                               RW_MGR_RUN_SINGLE_GROUP_OFFSET);
 833                } while (c_loop-- != 0);
 834        }
 835        debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
 836}
 837
 838/**
 839 * rw_mgr_mem_init_load_regs() - Load instruction registers
 840 * @cntr0:      Counter 0 value
 841 * @cntr1:      Counter 1 value
 842 * @cntr2:      Counter 2 value
 843 * @jump:       Jump instruction value
 844 *
 845 * Load instruction registers.
 846 */
 847static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
 848{
 849        u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
 850                           RW_MGR_RUN_SINGLE_GROUP_OFFSET;
 851
 852        /* Load counters */
 853        writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
 854               &sdr_rw_load_mgr_regs->load_cntr0);
 855        writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
 856               &sdr_rw_load_mgr_regs->load_cntr1);
 857        writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
 858               &sdr_rw_load_mgr_regs->load_cntr2);
 859
 860        /* Load jump address */
 861        writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
 862        writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
 863        writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
 864
 865        /* Execute count instruction */
 866        writel(jump, grpaddr);
 867}
 868
 869/**
 870 * rw_mgr_mem_load_user() - Load user calibration values
 871 * @fin1:       Final instruction 1
 872 * @fin2:       Final instruction 2
 873 * @precharge:  If 1, precharge the banks at the end
 874 *
 875 * Load user calibration values and optionally precharge the banks.
 876 */
 877static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2,
 878                                 const int precharge)
 879{
 880        u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
 881                      RW_MGR_RUN_SINGLE_GROUP_OFFSET;
 882        u32 r;
 883
 884        for (r = 0; r < rwcfg->mem_number_of_ranks; r++) {
 885                /* set rank */
 886                set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
 887
 888                /* precharge all banks ... */
 889                if (precharge)
 890                        writel(rwcfg->precharge_all, grpaddr);
 891
 892                /*
 893                 * USER Use Mirror-ed commands for odd ranks if address
 894                 * mirrorring is on
 895                 */
 896                if ((rwcfg->mem_address_mirroring >> r) & 0x1) {
 897                        set_jump_as_return();
 898                        writel(rwcfg->mrs2_mirr, grpaddr);
 899                        delay_for_n_mem_clocks(4);
 900                        set_jump_as_return();
 901                        writel(rwcfg->mrs3_mirr, grpaddr);
 902                        delay_for_n_mem_clocks(4);
 903                        set_jump_as_return();
 904                        writel(rwcfg->mrs1_mirr, grpaddr);
 905                        delay_for_n_mem_clocks(4);
 906                        set_jump_as_return();
 907                        writel(fin1, grpaddr);
 908                } else {
 909                        set_jump_as_return();
 910                        writel(rwcfg->mrs2, grpaddr);
 911                        delay_for_n_mem_clocks(4);
 912                        set_jump_as_return();
 913                        writel(rwcfg->mrs3, grpaddr);
 914                        delay_for_n_mem_clocks(4);
 915                        set_jump_as_return();
 916                        writel(rwcfg->mrs1, grpaddr);
 917                        set_jump_as_return();
 918                        writel(fin2, grpaddr);
 919                }
 920
 921                if (precharge)
 922                        continue;
 923
 924                set_jump_as_return();
 925                writel(rwcfg->zqcl, grpaddr);
 926
 927                /* tZQinit = tDLLK = 512 ck cycles */
 928                delay_for_n_mem_clocks(512);
 929        }
 930}
 931
 932/**
 933 * rw_mgr_mem_initialize() - Initialize RW Manager
 934 *
 935 * Initialize RW Manager.
 936 */
 937static void rw_mgr_mem_initialize(void)
 938{
 939        debug("%s:%d\n", __func__, __LINE__);
 940
 941        /* The reset / cke part of initialization is broadcasted to all ranks */
 942        writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
 943                                RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
 944
 945        /*
 946         * Here's how you load register for a loop
 947         * Counters are located @ 0x800
 948         * Jump address are located @ 0xC00
 949         * For both, registers 0 to 3 are selected using bits 3 and 2, like
 950         * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
 951         * I know this ain't pretty, but Avalon bus throws away the 2 least
 952         * significant bits
 953         */
 954
 955        /* Start with memory RESET activated */
 956
 957        /* tINIT = 200us */
 958
 959        /*
 960         * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
 961         * If a and b are the number of iteration in 2 nested loops
 962         * it takes the following number of cycles to complete the operation:
 963         * number_of_cycles = ((2 + n) * a + 2) * b
 964         * where n is the number of instruction in the inner loop
 965         * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
 966         * b = 6A
 967         */
 968        rw_mgr_mem_init_load_regs(misccfg->tinit_cntr0_val,
 969                                  misccfg->tinit_cntr1_val,
 970                                  misccfg->tinit_cntr2_val,
 971                                  rwcfg->init_reset_0_cke_0);
 972
 973        /* Indicate that memory is stable. */
 974        writel(1, &phy_mgr_cfg->reset_mem_stbl);
 975
 976        /*
 977         * transition the RESET to high
 978         * Wait for 500us
 979         */
 980
 981        /*
 982         * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
 983         * If a and b are the number of iteration in 2 nested loops
 984         * it takes the following number of cycles to complete the operation
 985         * number_of_cycles = ((2 + n) * a + 2) * b
 986         * where n is the number of instruction in the inner loop
 987         * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
 988         * b = FF
 989         */
 990        rw_mgr_mem_init_load_regs(misccfg->treset_cntr0_val,
 991                                  misccfg->treset_cntr1_val,
 992                                  misccfg->treset_cntr2_val,
 993                                  rwcfg->init_reset_1_cke_0);
 994
 995        /* Bring up clock enable. */
 996
 997        /* tXRP < 250 ck cycles */
 998        delay_for_n_mem_clocks(250);
 999
1000        rw_mgr_mem_load_user(rwcfg->mrs0_dll_reset_mirr, rwcfg->mrs0_dll_reset,
1001                             0);
1002}
1003
1004/**
1005 * rw_mgr_mem_handoff() - Hand off the memory to user
1006 *
1007 * At the end of calibration we have to program the user settings in
1008 * and hand off the memory to the user.
1009 */
1010static void rw_mgr_mem_handoff(void)
1011{
1012        rw_mgr_mem_load_user(rwcfg->mrs0_user_mirr, rwcfg->mrs0_user, 1);
1013        /*
1014         * Need to wait tMOD (12CK or 15ns) time before issuing other
1015         * commands, but we will have plenty of NIOS cycles before actual
1016         * handoff so its okay.
1017         */
1018}
1019
1020/**
1021 * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command
1022 * @group:      Write Group
1023 * @use_dm:     Use DM
1024 *
1025 * Issue write test command. Two variants are provided, one that just tests
1026 * a write pattern and another that tests datamask functionality.
1027 */
1028static void rw_mgr_mem_calibrate_write_test_issue(u32 group,
1029                                                  u32 test_dm)
1030{
1031        const u32 quick_write_mode =
1032                (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) &&
1033                misccfg->enable_super_quick_calibration;
1034        u32 mcc_instruction;
1035        u32 rw_wl_nop_cycles;
1036
1037        /*
1038         * Set counter and jump addresses for the right
1039         * number of NOP cycles.
1040         * The number of supported NOP cycles can range from -1 to infinity
1041         * Three different cases are handled:
1042         *
1043         * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
1044         *    mechanism will be used to insert the right number of NOPs
1045         *
1046         * 2. For a number of NOP cycles equals to 0, the micro-instruction
1047         *    issuing the write command will jump straight to the
1048         *    micro-instruction that turns on DQS (for DDRx), or outputs write
1049         *    data (for RLD), skipping
1050         *    the NOP micro-instruction all together
1051         *
1052         * 3. A number of NOP cycles equal to -1 indicates that DQS must be
1053         *    turned on in the same micro-instruction that issues the write
1054         *    command. Then we need
1055         *    to directly jump to the micro-instruction that sends out the data
1056         *
1057         * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
1058         *       (2 and 3). One jump-counter (0) is used to perform multiple
1059         *       write-read operations.
1060         *       one counter left to issue this command in "multiple-group" mode
1061         */
1062
1063        rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
1064
1065        if (rw_wl_nop_cycles == -1) {
1066                /*
1067                 * CNTR 2 - We want to execute the special write operation that
1068                 * turns on DQS right away and then skip directly to the
1069                 * instruction that sends out the data. We set the counter to a
1070                 * large number so that the jump is always taken.
1071                 */
1072                writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1073
1074                /* CNTR 3 - Not used */
1075                if (test_dm) {
1076                        mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0_wl_1;
1077                        writel(rwcfg->lfsr_wr_rd_dm_bank_0_data,
1078                               &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1079                        writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1080                               &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1081                } else {
1082                        mcc_instruction = rwcfg->lfsr_wr_rd_bank_0_wl_1;
1083                        writel(rwcfg->lfsr_wr_rd_bank_0_data,
1084                               &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1085                        writel(rwcfg->lfsr_wr_rd_bank_0_nop,
1086                               &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1087                }
1088        } else if (rw_wl_nop_cycles == 0) {
1089                /*
1090                 * CNTR 2 - We want to skip the NOP operation and go straight
1091                 * to the DQS enable instruction. We set the counter to a large
1092                 * number so that the jump is always taken.
1093                 */
1094                writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1095
1096                /* CNTR 3 - Not used */
1097                if (test_dm) {
1098                        mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0;
1099                        writel(rwcfg->lfsr_wr_rd_dm_bank_0_dqs,
1100                               &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1101                } else {
1102                        mcc_instruction = rwcfg->lfsr_wr_rd_bank_0;
1103                        writel(rwcfg->lfsr_wr_rd_bank_0_dqs,
1104                               &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1105                }
1106        } else {
1107                /*
1108                 * CNTR 2 - In this case we want to execute the next instruction
1109                 * and NOT take the jump. So we set the counter to 0. The jump
1110                 * address doesn't count.
1111                 */
1112                writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
1113                writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1114
1115                /*
1116                 * CNTR 3 - Set the nop counter to the number of cycles we
1117                 * need to loop for, minus 1.
1118                 */
1119                writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
1120                if (test_dm) {
1121                        mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0;
1122                        writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1123                               &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1124                } else {
1125                        mcc_instruction = rwcfg->lfsr_wr_rd_bank_0;
1126                        writel(rwcfg->lfsr_wr_rd_bank_0_nop,
1127                               &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1128                }
1129        }
1130
1131        writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1132                  RW_MGR_RESET_READ_DATAPATH_OFFSET);
1133
1134        if (quick_write_mode)
1135                writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
1136        else
1137                writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
1138
1139        writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1140
1141        /*
1142         * CNTR 1 - This is used to ensure enough time elapses
1143         * for read data to come back.
1144         */
1145        writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
1146
1147        if (test_dm) {
1148                writel(rwcfg->lfsr_wr_rd_dm_bank_0_wait,
1149                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1150        } else {
1151                writel(rwcfg->lfsr_wr_rd_bank_0_wait,
1152                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1153        }
1154
1155        writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS |
1156                                RW_MGR_RUN_SINGLE_GROUP_OFFSET) +
1157                                (group << 2));
1158}
1159
1160/**
1161 * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple pass
1162 * @rank_bgn:           Rank number
1163 * @write_group:        Write Group
1164 * @use_dm:             Use DM
1165 * @all_correct:        All bits must be correct in the mask
1166 * @bit_chk:            Resulting bit mask after the test
1167 * @all_ranks:          Test all ranks
1168 *
1169 * Test writes, can check for a single bit pass or multiple bit pass.
1170 */
1171static int
1172rw_mgr_mem_calibrate_write_test(const u32 rank_bgn, const u32 write_group,
1173                                const u32 use_dm, const u32 all_correct,
1174                                u32 *bit_chk, const u32 all_ranks)
1175{
1176        const u32 rank_end = all_ranks ?
1177                                rwcfg->mem_number_of_ranks :
1178                                (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1179        const u32 shift_ratio = rwcfg->mem_dq_per_write_dqs /
1180                                rwcfg->mem_virtual_groups_per_write_dqs;
1181        const u32 correct_mask_vg = param->write_correct_mask_vg;
1182
1183        u32 tmp_bit_chk, base_rw_mgr;
1184        int vg, r;
1185
1186        *bit_chk = param->write_correct_mask;
1187
1188        for (r = rank_bgn; r < rank_end; r++) {
1189                /* Set rank */
1190                set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1191
1192                tmp_bit_chk = 0;
1193                for (vg = rwcfg->mem_virtual_groups_per_write_dqs - 1;
1194                     vg >= 0; vg--) {
1195                        /* Reset the FIFOs to get pointers to known state. */
1196                        writel(0, &phy_mgr_cmd->fifo_reset);
1197
1198                        rw_mgr_mem_calibrate_write_test_issue(
1199                                write_group *
1200                                rwcfg->mem_virtual_groups_per_write_dqs + vg,
1201                                use_dm);
1202
1203                        base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1204                        tmp_bit_chk <<= shift_ratio;
1205                        tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr));
1206                }
1207
1208                *bit_chk &= tmp_bit_chk;
1209        }
1210
1211        set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1212        if (all_correct) {
1213                debug_cond(DLEVEL >= 2,
1214                           "write_test(%u,%u,ALL) : %u == %u => %i\n",
1215                           write_group, use_dm, *bit_chk,
1216                           param->write_correct_mask,
1217                           *bit_chk == param->write_correct_mask);
1218                return *bit_chk == param->write_correct_mask;
1219        } else {
1220                debug_cond(DLEVEL >= 2,
1221                           "write_test(%u,%u,ONE) : %u != %i => %i\n",
1222                           write_group, use_dm, *bit_chk, 0, *bit_chk != 0);
1223                return *bit_chk != 0x00;
1224        }
1225}
1226
1227/**
1228 * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns
1229 * @rank_bgn:   Rank number
1230 * @group:      Read/Write Group
1231 * @all_ranks:  Test all ranks
1232 *
1233 * Performs a guaranteed read on the patterns we are going to use during a
1234 * read test to ensure memory works.
1235 */
1236static int
1237rw_mgr_mem_calibrate_read_test_patterns(const u32 rank_bgn, const u32 group,
1238                                        const u32 all_ranks)
1239{
1240        const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1241                         RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1242        const u32 addr_offset =
1243                         (group * rwcfg->mem_virtual_groups_per_read_dqs) << 2;
1244        const u32 rank_end = all_ranks ?
1245                                rwcfg->mem_number_of_ranks :
1246                                (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1247        const u32 shift_ratio = rwcfg->mem_dq_per_read_dqs /
1248                                rwcfg->mem_virtual_groups_per_read_dqs;
1249        const u32 correct_mask_vg = param->read_correct_mask_vg;
1250
1251        u32 tmp_bit_chk, base_rw_mgr, bit_chk;
1252        int vg, r;
1253        int ret = 0;
1254
1255        bit_chk = param->read_correct_mask;
1256
1257        for (r = rank_bgn; r < rank_end; r++) {
1258                /* Set rank */
1259                set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1260
1261                /* Load up a constant bursts of read commands */
1262                writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1263                writel(rwcfg->guaranteed_read,
1264                       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1265
1266                writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1267                writel(rwcfg->guaranteed_read_cont,
1268                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1269
1270                tmp_bit_chk = 0;
1271                for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1;
1272                     vg >= 0; vg--) {
1273                        /* Reset the FIFOs to get pointers to known state. */
1274                        writel(0, &phy_mgr_cmd->fifo_reset);
1275                        writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1276                                  RW_MGR_RESET_READ_DATAPATH_OFFSET);
1277                        writel(rwcfg->guaranteed_read,
1278                               addr + addr_offset + (vg << 2));
1279
1280                        base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1281                        tmp_bit_chk <<= shift_ratio;
1282                        tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr;
1283                }
1284
1285                bit_chk &= tmp_bit_chk;
1286        }
1287
1288        writel(rwcfg->clear_dqs_enable, addr + (group << 2));
1289
1290        set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1291
1292        if (bit_chk != param->read_correct_mask)
1293                ret = -EIO;
1294
1295        debug_cond(DLEVEL >= 1,
1296                   "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n",
1297                   __func__, __LINE__, group, bit_chk,
1298                   param->read_correct_mask, ret);
1299
1300        return ret;
1301}
1302
1303/**
1304 * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read test
1305 * @rank_bgn:   Rank number
1306 * @all_ranks:  Test all ranks
1307 *
1308 * Load up the patterns we are going to use during a read test.
1309 */
1310static void rw_mgr_mem_calibrate_read_load_patterns(const u32 rank_bgn,
1311                                                    const int all_ranks)
1312{
1313        const u32 rank_end = all_ranks ?
1314                        rwcfg->mem_number_of_ranks :
1315                        (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1316        u32 r;
1317
1318        debug("%s:%d\n", __func__, __LINE__);
1319
1320        for (r = rank_bgn; r < rank_end; r++) {
1321                /* set rank */
1322                set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1323
1324                /* Load up a constant bursts */
1325                writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1326
1327                writel(rwcfg->guaranteed_write_wait0,
1328                       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1329
1330                writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1331
1332                writel(rwcfg->guaranteed_write_wait1,
1333                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1334
1335                writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1336
1337                writel(rwcfg->guaranteed_write_wait2,
1338                       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1339
1340                writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1341
1342                writel(rwcfg->guaranteed_write_wait3,
1343                       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1344
1345                writel(rwcfg->guaranteed_write, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1346                                                RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1347        }
1348
1349        set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1350}
1351
1352/**
1353 * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank
1354 * @rank_bgn:           Rank number
1355 * @group:              Read/Write group
1356 * @num_tries:          Number of retries of the test
1357 * @all_correct:        All bits must be correct in the mask
1358 * @bit_chk:            Resulting bit mask after the test
1359 * @all_groups:         Test all R/W groups
1360 * @all_ranks:          Test all ranks
1361 *
1362 * Try a read and see if it returns correct data back. Test has dummy reads
1363 * inserted into the mix used to align DQS enable. Test has more thorough
1364 * checks than the regular read test.
1365 */
1366static int
1367rw_mgr_mem_calibrate_read_test(const u32 rank_bgn, const u32 group,
1368                               const u32 num_tries, const u32 all_correct,
1369                               u32 *bit_chk,
1370                               const u32 all_groups, const u32 all_ranks)
1371{
1372        const u32 rank_end = all_ranks ? rwcfg->mem_number_of_ranks :
1373                (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1374        const u32 quick_read_mode =
1375                ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) &&
1376                 misccfg->enable_super_quick_calibration);
1377        u32 correct_mask_vg = param->read_correct_mask_vg;
1378        u32 tmp_bit_chk;
1379        u32 base_rw_mgr;
1380        u32 addr;
1381
1382        int r, vg, ret;
1383
1384        *bit_chk = param->read_correct_mask;
1385
1386        for (r = rank_bgn; r < rank_end; r++) {
1387                /* set rank */
1388                set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1389
1390                writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1391
1392                writel(rwcfg->read_b2b_wait1,
1393                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1394
1395                writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1396                writel(rwcfg->read_b2b_wait2,
1397                       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1398
1399                if (quick_read_mode)
1400                        writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1401                        /* need at least two (1+1) reads to capture failures */
1402                else if (all_groups)
1403                        writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1404                else
1405                        writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1406
1407                writel(rwcfg->read_b2b,
1408                       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1409                if (all_groups)
1410                        writel(rwcfg->mem_if_read_dqs_width *
1411                               rwcfg->mem_virtual_groups_per_read_dqs - 1,
1412                               &sdr_rw_load_mgr_regs->load_cntr3);
1413                else
1414                        writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1415
1416                writel(rwcfg->read_b2b,
1417                       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1418
1419                tmp_bit_chk = 0;
1420                for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; vg >= 0;
1421                     vg--) {
1422                        /* Reset the FIFOs to get pointers to known state. */
1423                        writel(0, &phy_mgr_cmd->fifo_reset);
1424                        writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1425                                  RW_MGR_RESET_READ_DATAPATH_OFFSET);
1426
1427                        if (all_groups) {
1428                                addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1429                                       RW_MGR_RUN_ALL_GROUPS_OFFSET;
1430                        } else {
1431                                addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1432                                       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1433                        }
1434
1435                        writel(rwcfg->read_b2b, addr +
1436                               ((group *
1437                                 rwcfg->mem_virtual_groups_per_read_dqs +
1438                                 vg) << 2));
1439
1440                        base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1441                        tmp_bit_chk <<= rwcfg->mem_dq_per_read_dqs /
1442                                        rwcfg->mem_virtual_groups_per_read_dqs;
1443                        tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr);
1444                }
1445
1446                *bit_chk &= tmp_bit_chk;
1447        }
1448
1449        addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1450        writel(rwcfg->clear_dqs_enable, addr + (group << 2));
1451
1452        set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1453
1454        if (all_correct) {
1455                ret = (*bit_chk == param->read_correct_mask);
1456                debug_cond(DLEVEL >= 2,
1457                           "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n",
1458                           __func__, __LINE__, group, all_groups, *bit_chk,
1459                           param->read_correct_mask, ret);
1460        } else  {
1461                ret = (*bit_chk != 0x00);
1462                debug_cond(DLEVEL >= 2,
1463                           "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n",
1464                           __func__, __LINE__, group, all_groups, *bit_chk,
1465                           0, ret);
1466        }
1467
1468        return ret;
1469}
1470
1471/**
1472 * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks
1473 * @grp:                Read/Write group
1474 * @num_tries:          Number of retries of the test
1475 * @all_correct:        All bits must be correct in the mask
1476 * @all_groups:         Test all R/W groups
1477 *
1478 * Perform a READ test across all memory ranks.
1479 */
1480static int
1481rw_mgr_mem_calibrate_read_test_all_ranks(const u32 grp, const u32 num_tries,
1482                                         const u32 all_correct,
1483                                         const u32 all_groups)
1484{
1485        u32 bit_chk;
1486        return rw_mgr_mem_calibrate_read_test(0, grp, num_tries, all_correct,
1487                                              &bit_chk, all_groups, 1);
1488}
1489
1490/**
1491 * rw_mgr_incr_vfifo() - Increase VFIFO value
1492 * @grp:        Read/Write group
1493 *
1494 * Increase VFIFO value.
1495 */
1496static void rw_mgr_incr_vfifo(const u32 grp)
1497{
1498        writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1499}
1500
1501/**
1502 * rw_mgr_decr_vfifo() - Decrease VFIFO value
1503 * @grp:        Read/Write group
1504 *
1505 * Decrease VFIFO value.
1506 */
1507static void rw_mgr_decr_vfifo(const u32 grp)
1508{
1509        u32 i;
1510
1511        for (i = 0; i < misccfg->read_valid_fifo_size - 1; i++)
1512                rw_mgr_incr_vfifo(grp);
1513}
1514
1515/**
1516 * find_vfifo_failing_read() - Push VFIFO to get a failing read
1517 * @grp:        Read/Write group
1518 *
1519 * Push VFIFO until a failing read happens.
1520 */
1521static int find_vfifo_failing_read(const u32 grp)
1522{
1523        u32 v, ret, fail_cnt = 0;
1524
1525        for (v = 0; v < misccfg->read_valid_fifo_size; v++) {
1526                debug_cond(DLEVEL >= 2, "%s:%d: vfifo %u\n",
1527                           __func__, __LINE__, v);
1528                ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1529                                                PASS_ONE_BIT, 0);
1530                if (!ret) {
1531                        fail_cnt++;
1532
1533                        if (fail_cnt == 2)
1534                                return v;
1535                }
1536
1537                /* Fiddle with FIFO. */
1538                rw_mgr_incr_vfifo(grp);
1539        }
1540
1541        /* No failing read found! Something must have gone wrong. */
1542        debug_cond(DLEVEL >= 2, "%s:%d: vfifo failed\n", __func__, __LINE__);
1543        return 0;
1544}
1545
1546/**
1547 * sdr_find_phase_delay() - Find DQS enable phase or delay
1548 * @working:    If 1, look for working phase/delay, if 0, look for non-working
1549 * @delay:      If 1, look for delay, if 0, look for phase
1550 * @grp:        Read/Write group
1551 * @work:       Working window position
1552 * @work_inc:   Working window increment
1553 * @pd:         DQS Phase/Delay Iterator
1554 *
1555 * Find working or non-working DQS enable phase setting.
1556 */
1557static int sdr_find_phase_delay(int working, int delay, const u32 grp,
1558                                u32 *work, const u32 work_inc, u32 *pd)
1559{
1560        const u32 max = delay ? iocfg->dqs_en_delay_max :
1561                                iocfg->dqs_en_phase_max;
1562        u32 ret;
1563
1564        for (; *pd <= max; (*pd)++) {
1565                if (delay)
1566                        scc_mgr_set_dqs_en_delay_all_ranks(grp, *pd);
1567                else
1568                        scc_mgr_set_dqs_en_phase_all_ranks(grp, *pd);
1569
1570                ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1571                                        PASS_ONE_BIT, 0);
1572                if (!working)
1573                        ret = !ret;
1574
1575                if (ret)
1576                        return 0;
1577
1578                if (work)
1579                        *work += work_inc;
1580        }
1581
1582        return -EINVAL;
1583}
1584/**
1585 * sdr_find_phase() - Find DQS enable phase
1586 * @working:    If 1, look for working phase, if 0, look for non-working phase
1587 * @grp:        Read/Write group
1588 * @work:       Working window position
1589 * @i:          Iterator
1590 * @p:          DQS Phase Iterator
1591 *
1592 * Find working or non-working DQS enable phase setting.
1593 */
1594static int sdr_find_phase(int working, const u32 grp, u32 *work,
1595                          u32 *i, u32 *p)
1596{
1597        const u32 end = misccfg->read_valid_fifo_size + (working ? 0 : 1);
1598        int ret;
1599
1600        for (; *i < end; (*i)++) {
1601                if (working)
1602                        *p = 0;
1603
1604                ret = sdr_find_phase_delay(working, 0, grp, work,
1605                                           iocfg->delay_per_opa_tap, p);
1606                if (!ret)
1607                        return 0;
1608
1609                if (*p > iocfg->dqs_en_phase_max) {
1610                        /* Fiddle with FIFO. */
1611                        rw_mgr_incr_vfifo(grp);
1612                        if (!working)
1613                                *p = 0;
1614                }
1615        }
1616
1617        return -EINVAL;
1618}
1619
1620/**
1621 * sdr_working_phase() - Find working DQS enable phase
1622 * @grp:        Read/Write group
1623 * @work_bgn:   Working window start position
1624 * @d:          dtaps output value
1625 * @p:          DQS Phase Iterator
1626 * @i:          Iterator
1627 *
1628 * Find working DQS enable phase setting.
1629 */
1630static int sdr_working_phase(const u32 grp, u32 *work_bgn, u32 *d,
1631                             u32 *p, u32 *i)
1632{
1633        const u32 dtaps_per_ptap = iocfg->delay_per_opa_tap /
1634                                   iocfg->delay_per_dqs_en_dchain_tap;
1635        int ret;
1636
1637        *work_bgn = 0;
1638
1639        for (*d = 0; *d <= dtaps_per_ptap; (*d)++) {
1640                *i = 0;
1641                scc_mgr_set_dqs_en_delay_all_ranks(grp, *d);
1642                ret = sdr_find_phase(1, grp, work_bgn, i, p);
1643                if (!ret)
1644                        return 0;
1645                *work_bgn += iocfg->delay_per_dqs_en_dchain_tap;
1646        }
1647
1648        /* Cannot find working solution */
1649        debug_cond(DLEVEL >= 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n",
1650                   __func__, __LINE__);
1651        return -EINVAL;
1652}
1653
1654/**
1655 * sdr_backup_phase() - Find DQS enable backup phase
1656 * @grp:        Read/Write group
1657 * @work_bgn:   Working window start position
1658 * @p:          DQS Phase Iterator
1659 *
1660 * Find DQS enable backup phase setting.
1661 */
1662static void sdr_backup_phase(const u32 grp, u32 *work_bgn, u32 *p)
1663{
1664        u32 tmp_delay, d;
1665        int ret;
1666
1667        /* Special case code for backing up a phase */
1668        if (*p == 0) {
1669                *p = iocfg->dqs_en_phase_max;
1670                rw_mgr_decr_vfifo(grp);
1671        } else {
1672                (*p)--;
1673        }
1674        tmp_delay = *work_bgn - iocfg->delay_per_opa_tap;
1675        scc_mgr_set_dqs_en_phase_all_ranks(grp, *p);
1676
1677        for (d = 0; d <= iocfg->dqs_en_delay_max && tmp_delay < *work_bgn;
1678             d++) {
1679                scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1680
1681                ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1682                                        PASS_ONE_BIT, 0);
1683                if (ret) {
1684                        *work_bgn = tmp_delay;
1685                        break;
1686                }
1687
1688                tmp_delay += iocfg->delay_per_dqs_en_dchain_tap;
1689        }
1690
1691        /* Restore VFIFO to old state before we decremented it (if needed). */
1692        (*p)++;
1693        if (*p > iocfg->dqs_en_phase_max) {
1694                *p = 0;
1695                rw_mgr_incr_vfifo(grp);
1696        }
1697
1698        scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1699}
1700
1701/**
1702 * sdr_nonworking_phase() - Find non-working DQS enable phase
1703 * @grp:        Read/Write group
1704 * @work_end:   Working window end position
1705 * @p:          DQS Phase Iterator
1706 * @i:          Iterator
1707 *
1708 * Find non-working DQS enable phase setting.
1709 */
1710static int sdr_nonworking_phase(const u32 grp, u32 *work_end, u32 *p, u32 *i)
1711{
1712        int ret;
1713
1714        (*p)++;
1715        *work_end += iocfg->delay_per_opa_tap;
1716        if (*p > iocfg->dqs_en_phase_max) {
1717                /* Fiddle with FIFO. */
1718                *p = 0;
1719                rw_mgr_incr_vfifo(grp);
1720        }
1721
1722        ret = sdr_find_phase(0, grp, work_end, i, p);
1723        if (ret) {
1724                /* Cannot see edge of failing read. */
1725                debug_cond(DLEVEL >= 2, "%s:%d: end: failed\n",
1726                           __func__, __LINE__);
1727        }
1728
1729        return ret;
1730}
1731
1732/**
1733 * sdr_find_window_center() - Find center of the working DQS window.
1734 * @grp:        Read/Write group
1735 * @work_bgn:   First working settings
1736 * @work_end:   Last working settings
1737 *
1738 * Find center of the working DQS enable window.
1739 */
1740static int sdr_find_window_center(const u32 grp, const u32 work_bgn,
1741                                  const u32 work_end)
1742{
1743        u32 work_mid;
1744        int tmp_delay = 0;
1745        int i, p, d;
1746
1747        work_mid = (work_bgn + work_end) / 2;
1748
1749        debug_cond(DLEVEL >= 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1750                   work_bgn, work_end, work_mid);
1751        /* Get the middle delay to be less than a VFIFO delay */
1752        tmp_delay = (iocfg->dqs_en_phase_max + 1) * iocfg->delay_per_opa_tap;
1753
1754        debug_cond(DLEVEL >= 2, "vfifo ptap delay %d\n", tmp_delay);
1755        work_mid %= tmp_delay;
1756        debug_cond(DLEVEL >= 2, "new work_mid %d\n", work_mid);
1757
1758        tmp_delay = rounddown(work_mid, iocfg->delay_per_opa_tap);
1759        if (tmp_delay > iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap)
1760                tmp_delay = iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap;
1761        p = tmp_delay / iocfg->delay_per_opa_tap;
1762
1763        debug_cond(DLEVEL >= 2, "new p %d, tmp_delay=%d\n", p, tmp_delay);
1764
1765        d = DIV_ROUND_UP(work_mid - tmp_delay,
1766                         iocfg->delay_per_dqs_en_dchain_tap);
1767        if (d > iocfg->dqs_en_delay_max)
1768                d = iocfg->dqs_en_delay_max;
1769        tmp_delay += d * iocfg->delay_per_dqs_en_dchain_tap;
1770
1771        debug_cond(DLEVEL >= 2, "new d %d, tmp_delay=%d\n", d, tmp_delay);
1772
1773        scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1774        scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1775
1776        /*
1777         * push vfifo until we can successfully calibrate. We can do this
1778         * because the largest possible margin in 1 VFIFO cycle.
1779         */
1780        for (i = 0; i < misccfg->read_valid_fifo_size; i++) {
1781                debug_cond(DLEVEL >= 2, "find_dqs_en_phase: center\n");
1782                if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1783                                                             PASS_ONE_BIT,
1784                                                             0)) {
1785                        debug_cond(DLEVEL >= 2,
1786                                   "%s:%d center: found: ptap=%u dtap=%u\n",
1787                                   __func__, __LINE__, p, d);
1788                        return 0;
1789                }
1790
1791                /* Fiddle with FIFO. */
1792                rw_mgr_incr_vfifo(grp);
1793        }
1794
1795        debug_cond(DLEVEL >= 2, "%s:%d center: failed.\n",
1796                   __func__, __LINE__);
1797        return -EINVAL;
1798}
1799
1800/**
1801 * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to use
1802 * @grp:        Read/Write Group
1803 *
1804 * Find a good DQS enable to use.
1805 */
1806static int rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(const u32 grp)
1807{
1808        u32 d, p, i;
1809        u32 dtaps_per_ptap;
1810        u32 work_bgn, work_end;
1811        u32 found_passing_read, found_failing_read = 0, initial_failing_dtap;
1812        int ret;
1813
1814        debug("%s:%d %u\n", __func__, __LINE__, grp);
1815
1816        reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1817
1818        scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1819        scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1820
1821        /* Step 0: Determine number of delay taps for each phase tap. */
1822        dtaps_per_ptap = iocfg->delay_per_opa_tap /
1823                         iocfg->delay_per_dqs_en_dchain_tap;
1824
1825        /* Step 1: First push vfifo until we get a failing read. */
1826        find_vfifo_failing_read(grp);
1827
1828        /* Step 2: Find first working phase, increment in ptaps. */
1829        work_bgn = 0;
1830        ret = sdr_working_phase(grp, &work_bgn, &d, &p, &i);
1831        if (ret)
1832                return ret;
1833
1834        work_end = work_bgn;
1835
1836        /*
1837         * If d is 0 then the working window covers a phase tap and we can
1838         * follow the old procedure. Otherwise, we've found the beginning
1839         * and we need to increment the dtaps until we find the end.
1840         */
1841        if (d == 0) {
1842                /*
1843                 * Step 3a: If we have room, back off by one and
1844                 *          increment in dtaps.
1845                 */
1846                sdr_backup_phase(grp, &work_bgn, &p);
1847
1848                /*
1849                 * Step 4a: go forward from working phase to non working
1850                 * phase, increment in ptaps.
1851                 */
1852                ret = sdr_nonworking_phase(grp, &work_end, &p, &i);
1853                if (ret)
1854                        return ret;
1855
1856                /* Step 5a: Back off one from last, increment in dtaps. */
1857
1858                /* Special case code for backing up a phase */
1859                if (p == 0) {
1860                        p = iocfg->dqs_en_phase_max;
1861                        rw_mgr_decr_vfifo(grp);
1862                } else {
1863                        p = p - 1;
1864                }
1865
1866                work_end -= iocfg->delay_per_opa_tap;
1867                scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1868
1869                d = 0;
1870
1871                debug_cond(DLEVEL >= 2, "%s:%d p: ptap=%u\n",
1872                           __func__, __LINE__, p);
1873        }
1874
1875        /* The dtap increment to find the failing edge is done here. */
1876        sdr_find_phase_delay(0, 1, grp, &work_end,
1877                             iocfg->delay_per_dqs_en_dchain_tap, &d);
1878
1879        /* Go back to working dtap */
1880        if (d != 0)
1881                work_end -= iocfg->delay_per_dqs_en_dchain_tap;
1882
1883        debug_cond(DLEVEL >= 2,
1884                   "%s:%d p/d: ptap=%u dtap=%u end=%u\n",
1885                   __func__, __LINE__, p, d - 1, work_end);
1886
1887        if (work_end < work_bgn) {
1888                /* nil range */
1889                debug_cond(DLEVEL >= 2, "%s:%d end-2: failed\n",
1890                           __func__, __LINE__);
1891                return -EINVAL;
1892        }
1893
1894        debug_cond(DLEVEL >= 2, "%s:%d found range [%u,%u]\n",
1895                   __func__, __LINE__, work_bgn, work_end);
1896
1897        /*
1898         * We need to calculate the number of dtaps that equal a ptap.
1899         * To do that we'll back up a ptap and re-find the edge of the
1900         * window using dtaps
1901         */
1902        debug_cond(DLEVEL >= 2, "%s:%d calculate dtaps_per_ptap for tracking\n",
1903                   __func__, __LINE__);
1904
1905        /* Special case code for backing up a phase */
1906        if (p == 0) {
1907                p = iocfg->dqs_en_phase_max;
1908                rw_mgr_decr_vfifo(grp);
1909                debug_cond(DLEVEL >= 2, "%s:%d backedup cycle/phase: p=%u\n",
1910                           __func__, __LINE__, p);
1911        } else {
1912                p = p - 1;
1913                debug_cond(DLEVEL >= 2, "%s:%d backedup phase only: p=%u",
1914                           __func__, __LINE__, p);
1915        }
1916
1917        scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1918
1919        /*
1920         * Increase dtap until we first see a passing read (in case the
1921         * window is smaller than a ptap), and then a failing read to
1922         * mark the edge of the window again.
1923         */
1924
1925        /* Find a passing read. */
1926        debug_cond(DLEVEL >= 2, "%s:%d find passing read\n",
1927                   __func__, __LINE__);
1928
1929        initial_failing_dtap = d;
1930
1931        found_passing_read = !sdr_find_phase_delay(1, 1, grp, NULL, 0, &d);
1932        if (found_passing_read) {
1933                /* Find a failing read. */
1934                debug_cond(DLEVEL >= 2, "%s:%d find failing read\n",
1935                           __func__, __LINE__);
1936                d++;
1937                found_failing_read = !sdr_find_phase_delay(0, 1, grp, NULL, 0,
1938                                                           &d);
1939        } else {
1940                debug_cond(DLEVEL >= 1,
1941                           "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n",
1942                           __func__, __LINE__);
1943        }
1944
1945        /*
1946         * The dynamically calculated dtaps_per_ptap is only valid if we
1947         * found a passing/failing read. If we didn't, it means d hit the max
1948         * (iocfg->dqs_en_delay_max). Otherwise, dtaps_per_ptap retains its
1949         * statically calculated value.
1950         */
1951        if (found_passing_read && found_failing_read)
1952                dtaps_per_ptap = d - initial_failing_dtap;
1953
1954        writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1955        debug_cond(DLEVEL >= 2, "%s:%d dtaps_per_ptap=%u - %u = %u",
1956                   __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap);
1957
1958        /* Step 6: Find the centre of the window. */
1959        ret = sdr_find_window_center(grp, work_bgn, work_end);
1960
1961        return ret;
1962}
1963
1964/**
1965 * search_stop_check() - Check if the detected edge is valid
1966 * @write:              Perform read (Stage 2) or write (Stage 3) calibration
1967 * @d:                  DQS delay
1968 * @rank_bgn:           Rank number
1969 * @write_group:        Write Group
1970 * @read_group:         Read Group
1971 * @bit_chk:            Resulting bit mask after the test
1972 * @sticky_bit_chk:     Resulting sticky bit mask after the test
1973 * @use_read_test:      Perform read test
1974 *
1975 * Test if the found edge is valid.
1976 */
1977static u32 search_stop_check(const int write, const int d, const int rank_bgn,
1978                             const u32 write_group, const u32 read_group,
1979                             u32 *bit_chk, u32 *sticky_bit_chk,
1980                             const u32 use_read_test)
1981{
1982        const u32 ratio = rwcfg->mem_if_read_dqs_width /
1983                          rwcfg->mem_if_write_dqs_width;
1984        const u32 correct_mask = write ? param->write_correct_mask :
1985                                         param->read_correct_mask;
1986        const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
1987                                    rwcfg->mem_dq_per_read_dqs;
1988        u32 ret;
1989        /*
1990         * Stop searching when the read test doesn't pass AND when
1991         * we've seen a passing read on every bit.
1992         */
1993        if (write) {                    /* WRITE-ONLY */
1994                ret = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1995                                                         0, PASS_ONE_BIT,
1996                                                         bit_chk, 0);
1997        } else if (use_read_test) {     /* READ-ONLY */
1998                ret = !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group,
1999                                                        NUM_READ_PB_TESTS,
2000                                                        PASS_ONE_BIT, bit_chk,
2001                                                        0, 0);
2002        } else {                        /* READ-ONLY */
2003                rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0,
2004                                                PASS_ONE_BIT, bit_chk, 0);
2005                *bit_chk = *bit_chk >> (per_dqs *
2006                        (read_group - (write_group * ratio)));
2007                ret = (*bit_chk == 0);
2008        }
2009        *sticky_bit_chk = *sticky_bit_chk | *bit_chk;
2010        ret = ret && (*sticky_bit_chk == correct_mask);
2011        debug_cond(DLEVEL >= 2,
2012                   "%s:%d center(left): dtap=%u => %u == %u && %u",
2013                   __func__, __LINE__, d,
2014                   *sticky_bit_chk, correct_mask, ret);
2015        return ret;
2016}
2017
2018/**
2019 * search_left_edge() - Find left edge of DQ/DQS working phase
2020 * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2021 * @rank_bgn:           Rank number
2022 * @write_group:        Write Group
2023 * @read_group:         Read Group
2024 * @test_bgn:           Rank number to begin the test
2025 * @sticky_bit_chk:     Resulting sticky bit mask after the test
2026 * @left_edge:          Left edge of the DQ/DQS phase
2027 * @right_edge:         Right edge of the DQ/DQS phase
2028 * @use_read_test:      Perform read test
2029 *
2030 * Find left edge of DQ/DQS working phase.
2031 */
2032static void search_left_edge(const int write, const int rank_bgn,
2033        const u32 write_group, const u32 read_group, const u32 test_bgn,
2034        u32 *sticky_bit_chk,
2035        int *left_edge, int *right_edge, const u32 use_read_test)
2036{
2037        const u32 delay_max = write ? iocfg->io_out1_delay_max :
2038                                      iocfg->io_in_delay_max;
2039        const u32 dqs_max = write ? iocfg->io_out1_delay_max :
2040                                    iocfg->dqs_in_delay_max;
2041        const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2042                                    rwcfg->mem_dq_per_read_dqs;
2043        u32 stop, bit_chk;
2044        int i, d;
2045
2046        for (d = 0; d <= dqs_max; d++) {
2047                if (write)
2048                        scc_mgr_apply_group_dq_out1_delay(d);
2049                else
2050                        scc_mgr_apply_group_dq_in_delay(test_bgn, d);
2051
2052                writel(0, &sdr_scc_mgr->update);
2053
2054                stop = search_stop_check(write, d, rank_bgn, write_group,
2055                                         read_group, &bit_chk, sticky_bit_chk,
2056                                         use_read_test);
2057                if (stop == 1)
2058                        break;
2059
2060                /* stop != 1 */
2061                for (i = 0; i < per_dqs; i++) {
2062                        if (bit_chk & 1) {
2063                                /*
2064                                 * Remember a passing test as
2065                                 * the left_edge.
2066                                 */
2067                                left_edge[i] = d;
2068                        } else {
2069                                /*
2070                                 * If a left edge has not been seen
2071                                 * yet, then a future passing test
2072                                 * will mark this edge as the right
2073                                 * edge.
2074                                 */
2075                                if (left_edge[i] == delay_max + 1)
2076                                        right_edge[i] = -(d + 1);
2077                        }
2078                        bit_chk >>= 1;
2079                }
2080        }
2081
2082        /* Reset DQ delay chains to 0 */
2083        if (write)
2084                scc_mgr_apply_group_dq_out1_delay(0);
2085        else
2086                scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2087
2088        *sticky_bit_chk = 0;
2089        for (i = per_dqs - 1; i >= 0; i--) {
2090                debug_cond(DLEVEL >= 2,
2091                           "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n",
2092                           __func__, __LINE__, i, left_edge[i],
2093                           i, right_edge[i]);
2094
2095                /*
2096                 * Check for cases where we haven't found the left edge,
2097                 * which makes our assignment of the the right edge invalid.
2098                 * Reset it to the illegal value.
2099                 */
2100                if ((left_edge[i] == delay_max + 1) &&
2101                    (right_edge[i] != delay_max + 1)) {
2102                        right_edge[i] = delay_max + 1;
2103                        debug_cond(DLEVEL >= 2,
2104                                   "%s:%d vfifo_center: reset right_edge[%u]: %d\n",
2105                                   __func__, __LINE__, i, right_edge[i]);
2106                }
2107
2108                /*
2109                 * Reset sticky bit
2110                 * READ: except for bits where we have seen both
2111                 *       the left and right edge.
2112                 * WRITE: except for bits where we have seen the
2113                 *        left edge.
2114                 */
2115                *sticky_bit_chk <<= 1;
2116                if (write) {
2117                        if (left_edge[i] != delay_max + 1)
2118                                *sticky_bit_chk |= 1;
2119                } else {
2120                        if ((left_edge[i] != delay_max + 1) &&
2121                            (right_edge[i] != delay_max + 1))
2122                                *sticky_bit_chk |= 1;
2123                }
2124        }
2125}
2126
2127/**
2128 * search_right_edge() - Find right edge of DQ/DQS working phase
2129 * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2130 * @rank_bgn:           Rank number
2131 * @write_group:        Write Group
2132 * @read_group:         Read Group
2133 * @start_dqs:          DQS start phase
2134 * @start_dqs_en:       DQS enable start phase
2135 * @sticky_bit_chk:     Resulting sticky bit mask after the test
2136 * @left_edge:          Left edge of the DQ/DQS phase
2137 * @right_edge:         Right edge of the DQ/DQS phase
2138 * @use_read_test:      Perform read test
2139 *
2140 * Find right edge of DQ/DQS working phase.
2141 */
2142static int search_right_edge(const int write, const int rank_bgn,
2143        const u32 write_group, const u32 read_group,
2144        const int start_dqs, const int start_dqs_en,
2145        u32 *sticky_bit_chk,
2146        int *left_edge, int *right_edge, const u32 use_read_test)
2147{
2148        const u32 delay_max = write ? iocfg->io_out1_delay_max :
2149                                      iocfg->io_in_delay_max;
2150        const u32 dqs_max = write ? iocfg->io_out1_delay_max :
2151                                    iocfg->dqs_in_delay_max;
2152        const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2153                                    rwcfg->mem_dq_per_read_dqs;
2154        u32 stop, bit_chk;
2155        int i, d;
2156
2157        for (d = 0; d <= dqs_max - start_dqs; d++) {
2158                if (write) {    /* WRITE-ONLY */
2159                        scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2160                                                                d + start_dqs);
2161                } else {        /* READ-ONLY */
2162                        scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
2163                        if (iocfg->shift_dqs_en_when_shift_dqs) {
2164                                u32 delay = d + start_dqs_en;
2165                                if (delay > iocfg->dqs_en_delay_max)
2166                                        delay = iocfg->dqs_en_delay_max;
2167                                scc_mgr_set_dqs_en_delay(read_group, delay);
2168                        }
2169                        scc_mgr_load_dqs(read_group);
2170                }
2171
2172                writel(0, &sdr_scc_mgr->update);
2173
2174                stop = search_stop_check(write, d, rank_bgn, write_group,
2175                                         read_group, &bit_chk, sticky_bit_chk,
2176                                         use_read_test);
2177                if (stop == 1) {
2178                        if (write && (d == 0)) {        /* WRITE-ONLY */
2179                                for (i = 0; i < rwcfg->mem_dq_per_write_dqs;
2180                                     i++) {
2181                                        /*
2182                                         * d = 0 failed, but it passed when
2183                                         * testing the left edge, so it must be
2184                                         * marginal, set it to -1
2185                                         */
2186                                        if (right_edge[i] == delay_max + 1 &&
2187                                            left_edge[i] != delay_max + 1)
2188                                                right_edge[i] = -1;
2189                                }
2190                        }
2191                        break;
2192                }
2193
2194                /* stop != 1 */
2195                for (i = 0; i < per_dqs; i++) {
2196                        if (bit_chk & 1) {
2197                                /*
2198                                 * Remember a passing test as
2199                                 * the right_edge.
2200                                 */
2201                                right_edge[i] = d;
2202                        } else {
2203                                if (d != 0) {
2204                                        /*
2205                                         * If a right edge has not
2206                                         * been seen yet, then a future
2207                                         * passing test will mark this
2208                                         * edge as the left edge.
2209                                         */
2210                                        if (right_edge[i] == delay_max + 1)
2211                                                left_edge[i] = -(d + 1);
2212                                } else {
2213                                        /*
2214                                         * d = 0 failed, but it passed
2215                                         * when testing the left edge,
2216                                         * so it must be marginal, set
2217                                         * it to -1
2218                                         */
2219                                        if (right_edge[i] == delay_max + 1 &&
2220                                            left_edge[i] != delay_max + 1)
2221                                                right_edge[i] = -1;
2222                                        /*
2223                                         * If a right edge has not been
2224                                         * seen yet, then a future
2225                                         * passing test will mark this
2226                                         * edge as the left edge.
2227                                         */
2228                                        else if (right_edge[i] == delay_max + 1)
2229                                                left_edge[i] = -(d + 1);
2230                                }
2231                        }
2232
2233                        debug_cond(DLEVEL >= 2, "%s:%d center[r,d=%u]: ",
2234                                   __func__, __LINE__, d);
2235                        debug_cond(DLEVEL >= 2,
2236                                   "bit_chk_test=%i left_edge[%u]: %d ",
2237                                   bit_chk & 1, i, left_edge[i]);
2238                        debug_cond(DLEVEL >= 2, "right_edge[%u]: %d\n", i,
2239                                   right_edge[i]);
2240                        bit_chk >>= 1;
2241                }
2242        }
2243
2244        /* Check that all bits have a window */
2245        for (i = 0; i < per_dqs; i++) {
2246                debug_cond(DLEVEL >= 2,
2247                           "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d",
2248                           __func__, __LINE__, i, left_edge[i],
2249                           i, right_edge[i]);
2250                if ((left_edge[i] == dqs_max + 1) ||
2251                    (right_edge[i] == dqs_max + 1))
2252                        return i + 1;   /* FIXME: If we fail, retval > 0 */
2253        }
2254
2255        return 0;
2256}
2257
2258/**
2259 * get_window_mid_index() - Find the best middle setting of DQ/DQS phase
2260 * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2261 * @left_edge:          Left edge of the DQ/DQS phase
2262 * @right_edge:         Right edge of the DQ/DQS phase
2263 * @mid_min:            Best DQ/DQS phase middle setting
2264 *
2265 * Find index and value of the middle of the DQ/DQS working phase.
2266 */
2267static int get_window_mid_index(const int write, int *left_edge,
2268                                int *right_edge, int *mid_min)
2269{
2270        const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2271                                    rwcfg->mem_dq_per_read_dqs;
2272        int i, mid, min_index;
2273
2274        /* Find middle of window for each DQ bit */
2275        *mid_min = left_edge[0] - right_edge[0];
2276        min_index = 0;
2277        for (i = 1; i < per_dqs; i++) {
2278                mid = left_edge[i] - right_edge[i];
2279                if (mid < *mid_min) {
2280                        *mid_min = mid;
2281                        min_index = i;
2282                }
2283        }
2284
2285        /*
2286         * -mid_min/2 represents the amount that we need to move DQS.
2287         * If mid_min is odd and positive we'll need to add one to make
2288         * sure the rounding in further calculations is correct (always
2289         * bias to the right), so just add 1 for all positive values.
2290         */
2291        if (*mid_min > 0)
2292                (*mid_min)++;
2293        *mid_min = *mid_min / 2;
2294
2295        debug_cond(DLEVEL >= 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n",
2296                   __func__, __LINE__, *mid_min, min_index);
2297        return min_index;
2298}
2299
2300/**
2301 * center_dq_windows() - Center the DQ/DQS windows
2302 * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2303 * @left_edge:          Left edge of the DQ/DQS phase
2304 * @right_edge:         Right edge of the DQ/DQS phase
2305 * @mid_min:            Adjusted DQ/DQS phase middle setting
2306 * @orig_mid_min:       Original DQ/DQS phase middle setting
2307 * @min_index:          DQ/DQS phase middle setting index
2308 * @test_bgn:           Rank number to begin the test
2309 * @dq_margin:          Amount of shift for the DQ
2310 * @dqs_margin:         Amount of shift for the DQS
2311 *
2312 * Align the DQ/DQS windows in each group.
2313 */
2314static void center_dq_windows(const int write, int *left_edge, int *right_edge,
2315                              const int mid_min, const int orig_mid_min,
2316                              const int min_index, const int test_bgn,
2317                              int *dq_margin, int *dqs_margin)
2318{
2319        const s32 delay_max = write ? iocfg->io_out1_delay_max :
2320                                      iocfg->io_in_delay_max;
2321        const s32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2322                                    rwcfg->mem_dq_per_read_dqs;
2323        const s32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET :
2324                                      SCC_MGR_IO_IN_DELAY_OFFSET;
2325        const s32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off;
2326
2327        s32 temp_dq_io_delay1;
2328        int shift_dq, i, p;
2329
2330        /* Initialize data for export structures */
2331        *dqs_margin = delay_max + 1;
2332        *dq_margin  = delay_max + 1;
2333
2334        /* add delay to bring centre of all DQ windows to the same "level" */
2335        for (i = 0, p = test_bgn; i < per_dqs; i++, p++) {
2336                /* Use values before divide by 2 to reduce round off error */
2337                shift_dq = (left_edge[i] - right_edge[i] -
2338                        (left_edge[min_index] - right_edge[min_index]))/2  +
2339                        (orig_mid_min - mid_min);
2340
2341                debug_cond(DLEVEL >= 2,
2342                           "vfifo_center: before: shift_dq[%u]=%d\n",
2343                           i, shift_dq);
2344
2345                temp_dq_io_delay1 = readl(addr + (i << 2));
2346
2347                if (shift_dq + temp_dq_io_delay1 > delay_max)
2348                        shift_dq = delay_max - temp_dq_io_delay1;
2349                else if (shift_dq + temp_dq_io_delay1 < 0)
2350                        shift_dq = -temp_dq_io_delay1;
2351
2352                debug_cond(DLEVEL >= 2,
2353                           "vfifo_center: after: shift_dq[%u]=%d\n",
2354                           i, shift_dq);
2355
2356                if (write)
2357                        scc_mgr_set_dq_out1_delay(i,
2358                                                  temp_dq_io_delay1 + shift_dq);
2359                else
2360                        scc_mgr_set_dq_in_delay(p,
2361                                                temp_dq_io_delay1 + shift_dq);
2362
2363                scc_mgr_load_dq(p);
2364
2365                debug_cond(DLEVEL >= 2,
2366                           "vfifo_center: margin[%u]=[%d,%d]\n", i,
2367                           left_edge[i] - shift_dq + (-mid_min),
2368                           right_edge[i] + shift_dq - (-mid_min));
2369
2370                /* To determine values for export structures */
2371                if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin)
2372                        *dq_margin = left_edge[i] - shift_dq + (-mid_min);
2373
2374                if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin)
2375                        *dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2376        }
2377}
2378
2379/**
2380 * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering
2381 * @rank_bgn:           Rank number
2382 * @rw_group:           Read/Write Group
2383 * @test_bgn:           Rank at which the test begins
2384 * @use_read_test:      Perform a read test
2385 * @update_fom:         Update FOM
2386 *
2387 * Per-bit deskew DQ and centering.
2388 */
2389static int rw_mgr_mem_calibrate_vfifo_center(const u32 rank_bgn,
2390                        const u32 rw_group, const u32 test_bgn,
2391                        const int use_read_test, const int update_fom)
2392{
2393        const u32 addr =
2394                SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET +
2395                (rw_group << 2);
2396        /*
2397         * Store these as signed since there are comparisons with
2398         * signed numbers.
2399         */
2400        u32 sticky_bit_chk;
2401        int32_t left_edge[rwcfg->mem_dq_per_read_dqs];
2402        int32_t right_edge[rwcfg->mem_dq_per_read_dqs];
2403        int32_t orig_mid_min, mid_min;
2404        int32_t new_dqs, start_dqs, start_dqs_en = 0, final_dqs_en;
2405        int32_t dq_margin, dqs_margin;
2406        int i, min_index;
2407        int ret;
2408
2409        debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn);
2410
2411        start_dqs = readl(addr);
2412        if (iocfg->shift_dqs_en_when_shift_dqs)
2413                start_dqs_en = readl(addr - iocfg->dqs_en_delay_offset);
2414
2415        /* set the left and right edge of each bit to an illegal value */
2416        /* use (iocfg->io_in_delay_max + 1) as an illegal value */
2417        sticky_bit_chk = 0;
2418        for (i = 0; i < rwcfg->mem_dq_per_read_dqs; i++) {
2419                left_edge[i]  = iocfg->io_in_delay_max + 1;
2420                right_edge[i] = iocfg->io_in_delay_max + 1;
2421        }
2422
2423        /* Search for the left edge of the window for each bit */
2424        search_left_edge(0, rank_bgn, rw_group, rw_group, test_bgn,
2425                         &sticky_bit_chk,
2426                         left_edge, right_edge, use_read_test);
2427
2428
2429        /* Search for the right edge of the window for each bit */
2430        ret = search_right_edge(0, rank_bgn, rw_group, rw_group,
2431                                start_dqs, start_dqs_en,
2432                                &sticky_bit_chk,
2433                                left_edge, right_edge, use_read_test);
2434        if (ret) {
2435                /*
2436                 * Restore delay chain settings before letting the loop
2437                 * in rw_mgr_mem_calibrate_vfifo to retry different
2438                 * dqs/ck relationships.
2439                 */
2440                scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs);
2441                if (iocfg->shift_dqs_en_when_shift_dqs)
2442                        scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en);
2443
2444                scc_mgr_load_dqs(rw_group);
2445                writel(0, &sdr_scc_mgr->update);
2446
2447                debug_cond(DLEVEL >= 1,
2448                           "%s:%d vfifo_center: failed to find edge [%u]: %d %d",
2449                           __func__, __LINE__, i, left_edge[i], right_edge[i]);
2450                if (use_read_test) {
2451                        set_failing_group_stage(rw_group *
2452                                rwcfg->mem_dq_per_read_dqs + i,
2453                                CAL_STAGE_VFIFO,
2454                                CAL_SUBSTAGE_VFIFO_CENTER);
2455                } else {
2456                        set_failing_group_stage(rw_group *
2457                                rwcfg->mem_dq_per_read_dqs + i,
2458                                CAL_STAGE_VFIFO_AFTER_WRITES,
2459                                CAL_SUBSTAGE_VFIFO_CENTER);
2460                }
2461                return -EIO;
2462        }
2463
2464        min_index = get_window_mid_index(0, left_edge, right_edge, &mid_min);
2465
2466        /* Determine the amount we can change DQS (which is -mid_min) */
2467        orig_mid_min = mid_min;
2468        new_dqs = start_dqs - mid_min;
2469        if (new_dqs > iocfg->dqs_in_delay_max)
2470                new_dqs = iocfg->dqs_in_delay_max;
2471        else if (new_dqs < 0)
2472                new_dqs = 0;
2473
2474        mid_min = start_dqs - new_dqs;
2475        debug_cond(DLEVEL >= 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2476                   mid_min, new_dqs);
2477
2478        if (iocfg->shift_dqs_en_when_shift_dqs) {
2479                if (start_dqs_en - mid_min > iocfg->dqs_en_delay_max)
2480                        mid_min += start_dqs_en - mid_min -
2481                                   iocfg->dqs_en_delay_max;
2482                else if (start_dqs_en - mid_min < 0)
2483                        mid_min += start_dqs_en - mid_min;
2484        }
2485        new_dqs = start_dqs - mid_min;
2486
2487        debug_cond(DLEVEL >= 1,
2488                   "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n",
2489                   start_dqs,
2490                   iocfg->shift_dqs_en_when_shift_dqs ? start_dqs_en : -1,
2491                   new_dqs, mid_min);
2492
2493        /* Add delay to bring centre of all DQ windows to the same "level". */
2494        center_dq_windows(0, left_edge, right_edge, mid_min, orig_mid_min,
2495                          min_index, test_bgn, &dq_margin, &dqs_margin);
2496
2497        /* Move DQS-en */
2498        if (iocfg->shift_dqs_en_when_shift_dqs) {
2499                final_dqs_en = start_dqs_en - mid_min;
2500                scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en);
2501                scc_mgr_load_dqs(rw_group);
2502        }
2503
2504        /* Move DQS */
2505        scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs);
2506        scc_mgr_load_dqs(rw_group);
2507        debug_cond(DLEVEL >= 2,
2508                   "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d",
2509                   __func__, __LINE__, dq_margin, dqs_margin);
2510
2511        /*
2512         * Do not remove this line as it makes sure all of our decisions
2513         * have been applied. Apply the update bit.
2514         */
2515        writel(0, &sdr_scc_mgr->update);
2516
2517        if ((dq_margin < 0) || (dqs_margin < 0))
2518                return -EINVAL;
2519
2520        return 0;
2521}
2522
2523/**
2524 * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the device
2525 * @rw_group:   Read/Write Group
2526 * @phase:      DQ/DQS phase
2527 *
2528 * Because initially no communication ca be reliably performed with the memory
2529 * device, the sequencer uses a guaranteed write mechanism to write data into
2530 * the memory device.
2531 */
2532static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group,
2533                                                 const u32 phase)
2534{
2535        int ret;
2536
2537        /* Set a particular DQ/DQS phase. */
2538        scc_mgr_set_dqdqs_output_phase_all_ranks(rw_group, phase);
2539
2540        debug_cond(DLEVEL >= 1, "%s:%d guaranteed write: g=%u p=%u\n",
2541                   __func__, __LINE__, rw_group, phase);
2542
2543        /*
2544         * Altera EMI_RM 2015.05.04 :: Figure 1-25
2545         * Load up the patterns used by read calibration using the
2546         * current DQDQS phase.
2547         */
2548        rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2549
2550        if (gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)
2551                return 0;
2552
2553        /*
2554         * Altera EMI_RM 2015.05.04 :: Figure 1-26
2555         * Back-to-Back reads of the patterns used for calibration.
2556         */
2557        ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1);
2558        if (ret)
2559                debug_cond(DLEVEL >= 1,
2560                           "%s:%d Guaranteed read test failed: g=%u p=%u\n",
2561                           __func__, __LINE__, rw_group, phase);
2562        return ret;
2563}
2564
2565/**
2566 * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration
2567 * @rw_group:   Read/Write Group
2568 * @test_bgn:   Rank at which the test begins
2569 *
2570 * DQS enable calibration ensures reliable capture of the DQ signal without
2571 * glitches on the DQS line.
2572 */
2573static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group,
2574                                                       const u32 test_bgn)
2575{
2576        /*
2577         * Altera EMI_RM 2015.05.04 :: Figure 1-27
2578         * DQS and DQS Eanble Signal Relationships.
2579         */
2580
2581        /* We start at zero, so have one less dq to devide among */
2582        const u32 delay_step = iocfg->io_in_delay_max /
2583                               (rwcfg->mem_dq_per_read_dqs - 1);
2584        int ret;
2585        u32 i, p, d, r;
2586
2587        debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn);
2588
2589        /* Try different dq_in_delays since the DQ path is shorter than DQS. */
2590        for (r = 0; r < rwcfg->mem_number_of_ranks;
2591             r += NUM_RANKS_PER_SHADOW_REG) {
2592                for (i = 0, p = test_bgn, d = 0;
2593                     i < rwcfg->mem_dq_per_read_dqs;
2594                     i++, p++, d += delay_step) {
2595                        debug_cond(DLEVEL >= 1,
2596                                   "%s:%d: g=%u r=%u i=%u p=%u d=%u\n",
2597                                   __func__, __LINE__, rw_group, r, i, p, d);
2598
2599                        scc_mgr_set_dq_in_delay(p, d);
2600                        scc_mgr_load_dq(p);
2601                }
2602
2603                writel(0, &sdr_scc_mgr->update);
2604        }
2605
2606        /*
2607         * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
2608         * dq_in_delay values
2609         */
2610        ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(rw_group);
2611
2612        debug_cond(DLEVEL >= 1,
2613                   "%s:%d: g=%u found=%u; Reseting delay chain to zero\n",
2614                   __func__, __LINE__, rw_group, !ret);
2615
2616        for (r = 0; r < rwcfg->mem_number_of_ranks;
2617             r += NUM_RANKS_PER_SHADOW_REG) {
2618                scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2619                writel(0, &sdr_scc_mgr->update);
2620        }
2621
2622        return ret;
2623}
2624
2625/**
2626 * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS
2627 * @rw_group:           Read/Write Group
2628 * @test_bgn:           Rank at which the test begins
2629 * @use_read_test:      Perform a read test
2630 * @update_fom:         Update FOM
2631 *
2632 * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads
2633 * within a group.
2634 */
2635static int
2636rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn,
2637                                      const int use_read_test,
2638                                      const int update_fom)
2639
2640{
2641        int ret, grp_calibrated;
2642        u32 rank_bgn, sr;
2643
2644        /*
2645         * Altera EMI_RM 2015.05.04 :: Figure 1-28
2646         * Read per-bit deskew can be done on a per shadow register basis.
2647         */
2648        grp_calibrated = 1;
2649        for (rank_bgn = 0, sr = 0;
2650             rank_bgn < rwcfg->mem_number_of_ranks;
2651             rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
2652                ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group,
2653                                                        test_bgn,
2654                                                        use_read_test,
2655                                                        update_fom);
2656                if (!ret)
2657                        continue;
2658
2659                grp_calibrated = 0;
2660        }
2661
2662        if (!grp_calibrated)
2663                return -EIO;
2664
2665        return 0;
2666}
2667
2668/**
2669 * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO
2670 * @rw_group:           Read/Write Group
2671 * @test_bgn:           Rank at which the test begins
2672 *
2673 * Stage 1: Calibrate the read valid prediction FIFO.
2674 *
2675 * This function implements UniPHY calibration Stage 1, as explained in
2676 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2677 *
2678 * - read valid prediction will consist of finding:
2679 *   - DQS enable phase and DQS enable delay (DQS Enable Calibration)
2680 *   - DQS input phase  and DQS input delay (DQ/DQS Centering)
2681 *  - we also do a per-bit deskew on the DQ lines.
2682 */
2683static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn)
2684{
2685        u32 p, d;
2686        u32 dtaps_per_ptap;
2687        u32 failed_substage;
2688
2689        int ret;
2690
2691        debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn);
2692
2693        /* Update info for sims */
2694        reg_file_set_group(rw_group);
2695        reg_file_set_stage(CAL_STAGE_VFIFO);
2696        reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2697
2698        failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2699
2700        /* USER Determine number of delay taps for each phase tap. */
2701        dtaps_per_ptap = DIV_ROUND_UP(iocfg->delay_per_opa_tap,
2702                                      iocfg->delay_per_dqs_en_dchain_tap) - 1;
2703
2704        for (d = 0; d <= dtaps_per_ptap; d += 2) {
2705                /*
2706                 * In RLDRAMX we may be messing the delay of pins in
2707                 * the same write rw_group but outside of the current read
2708                 * the rw_group, but that's ok because we haven't calibrated
2709                 * output side yet.
2710                 */
2711                if (d > 0) {
2712                        scc_mgr_apply_group_all_out_delay_add_all_ranks(
2713                                                                rw_group, d);
2714                }
2715
2716                for (p = 0; p <= iocfg->dqdqs_out_phase_max; p++) {
2717                        /* 1) Guaranteed Write */
2718                        ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p);
2719                        if (ret)
2720                                break;
2721
2722                        /* 2) DQS Enable Calibration */
2723                        ret = rw_mgr_mem_calibrate_dqs_enable_calibration(rw_group,
2724                                                                          test_bgn);
2725                        if (ret) {
2726                                failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2727                                continue;
2728                        }
2729
2730                        /* 3) Centering DQ/DQS */
2731                        /*
2732                         * If doing read after write calibration, do not update
2733                         * FOM now. Do it then.
2734                         */
2735                        ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group,
2736                                                                test_bgn, 1, 0);
2737                        if (ret) {
2738                                failed_substage = CAL_SUBSTAGE_VFIFO_CENTER;
2739                                continue;
2740                        }
2741
2742                        /* All done. */
2743                        goto cal_done_ok;
2744                }
2745        }
2746
2747        /* Calibration Stage 1 failed. */
2748        set_failing_group_stage(rw_group, CAL_STAGE_VFIFO, failed_substage);
2749        return 0;
2750
2751        /* Calibration Stage 1 completed OK. */
2752cal_done_ok:
2753        /*
2754         * Reset the delay chains back to zero if they have moved > 1
2755         * (check for > 1 because loop will increase d even when pass in
2756         * first case).
2757         */
2758        if (d > 2)
2759                scc_mgr_zero_group(rw_group, 1);
2760
2761        return 1;
2762}
2763
2764/**
2765 * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering.
2766 * @rw_group:           Read/Write Group
2767 * @test_bgn:           Rank at which the test begins
2768 *
2769 * Stage 3: DQ/DQS Centering.
2770 *
2771 * This function implements UniPHY calibration Stage 3, as explained in
2772 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2773 */
2774static int rw_mgr_mem_calibrate_vfifo_end(const u32 rw_group,
2775                                          const u32 test_bgn)
2776{
2777        int ret;
2778
2779        debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn);
2780
2781        /* Update info for sims. */
2782        reg_file_set_group(rw_group);
2783        reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2784        reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2785
2786        ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, test_bgn, 0, 1);
2787        if (ret)
2788                set_failing_group_stage(rw_group,
2789                                        CAL_STAGE_VFIFO_AFTER_WRITES,
2790                                        CAL_SUBSTAGE_VFIFO_CENTER);
2791        return ret;
2792}
2793
2794/**
2795 * rw_mgr_mem_calibrate_lfifo() - Minimize latency
2796 *
2797 * Stage 4: Minimize latency.
2798 *
2799 * This function implements UniPHY calibration Stage 4, as explained in
2800 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2801 * Calibrate LFIFO to find smallest read latency.
2802 */
2803static u32 rw_mgr_mem_calibrate_lfifo(void)
2804{
2805        int found_one = 0;
2806
2807        debug("%s:%d\n", __func__, __LINE__);
2808
2809        /* Update info for sims. */
2810        reg_file_set_stage(CAL_STAGE_LFIFO);
2811        reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2812
2813        /* Load up the patterns used by read calibration for all ranks */
2814        rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2815
2816        do {
2817                writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2818                debug_cond(DLEVEL >= 2, "%s:%d lfifo: read_lat=%u",
2819                           __func__, __LINE__, gbl->curr_read_lat);
2820
2821                if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, NUM_READ_TESTS,
2822                                                              PASS_ALL_BITS, 1))
2823                        break;
2824
2825                found_one = 1;
2826                /*
2827                 * Reduce read latency and see if things are
2828                 * working correctly.
2829                 */
2830                gbl->curr_read_lat--;
2831        } while (gbl->curr_read_lat > 0);
2832
2833        /* Reset the fifos to get pointers to known state. */
2834        writel(0, &phy_mgr_cmd->fifo_reset);
2835
2836        if (found_one) {
2837                /* Add a fudge factor to the read latency that was determined */
2838                gbl->curr_read_lat += 2;
2839                writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2840                debug_cond(DLEVEL >= 2,
2841                           "%s:%d lfifo: success: using read_lat=%u\n",
2842                           __func__, __LINE__, gbl->curr_read_lat);
2843        } else {
2844                set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2845                                        CAL_SUBSTAGE_READ_LATENCY);
2846
2847                debug_cond(DLEVEL >= 2,
2848                           "%s:%d lfifo: failed at initial read_lat=%u\n",
2849                           __func__, __LINE__, gbl->curr_read_lat);
2850        }
2851
2852        return found_one;
2853}
2854
2855/**
2856 * search_window() - Search for the/part of the window with DM/DQS shift
2857 * @search_dm:          If 1, search for the DM shift, if 0, search for DQS shift
2858 * @rank_bgn:           Rank number
2859 * @write_group:        Write Group
2860 * @bgn_curr:           Current window begin
2861 * @end_curr:           Current window end
2862 * @bgn_best:           Current best window begin
2863 * @end_best:           Current best window end
2864 * @win_best:           Size of the best window
2865 * @new_dqs:            New DQS value (only applicable if search_dm = 0).
2866 *
2867 * Search for the/part of the window with DM/DQS shift.
2868 */
2869static void search_window(const int search_dm,
2870                          const u32 rank_bgn, const u32 write_group,
2871                          int *bgn_curr, int *end_curr, int *bgn_best,
2872                          int *end_best, int *win_best, int new_dqs)
2873{
2874        u32 bit_chk;
2875        const int max = iocfg->io_out1_delay_max - new_dqs;
2876        int d, di;
2877
2878        /* Search for the/part of the window with DM/DQS shift. */
2879        for (di = max; di >= 0; di -= DELTA_D) {
2880                if (search_dm) {
2881                        d = di;
2882                        scc_mgr_apply_group_dm_out1_delay(d);
2883                } else {
2884                        /* For DQS, we go from 0...max */
2885                        d = max - di;
2886                        /*
2887                         * Note: This only shifts DQS, so are we limiting
2888                         *       ourselves to width of DQ unnecessarily.
2889                         */
2890                        scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2891                                                                d + new_dqs);
2892                }
2893
2894                writel(0, &sdr_scc_mgr->update);
2895
2896                if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2897                                                    PASS_ALL_BITS, &bit_chk,
2898                                                    0)) {
2899                        /* Set current end of the window. */
2900                        *end_curr = search_dm ? -d : d;
2901
2902                        /*
2903                         * If a starting edge of our window has not been seen
2904                         * this is our current start of the DM window.
2905                         */
2906                        if (*bgn_curr == iocfg->io_out1_delay_max + 1)
2907                                *bgn_curr = search_dm ? -d : d;
2908
2909                        /*
2910                         * If current window is bigger than best seen.
2911                         * Set best seen to be current window.
2912                         */
2913                        if ((*end_curr - *bgn_curr + 1) > *win_best) {
2914                                *win_best = *end_curr - *bgn_curr + 1;
2915                                *bgn_best = *bgn_curr;
2916                                *end_best = *end_curr;
2917                        }
2918                } else {
2919                        /* We just saw a failing test. Reset temp edge. */
2920                        *bgn_curr = iocfg->io_out1_delay_max + 1;
2921                        *end_curr = iocfg->io_out1_delay_max + 1;
2922
2923                        /* Early exit is only applicable to DQS. */
2924                        if (search_dm)
2925                                continue;
2926
2927                        /*
2928                         * Early exit optimization: if the remaining delay
2929                         * chain space is less than already seen largest
2930                         * window we can exit.
2931                         */
2932                        if (*win_best - 1 > iocfg->io_out1_delay_max - new_dqs - d)
2933                                break;
2934                }
2935        }
2936}
2937
2938/*
2939 * rw_mgr_mem_calibrate_writes_center() - Center all windows
2940 * @rank_bgn:           Rank number
2941 * @write_group:        Write group
2942 * @test_bgn:           Rank at which the test begins
2943 *
2944 * Center all windows. Do per-bit-deskew to possibly increase size of
2945 * certain windows.
2946 */
2947static int
2948rw_mgr_mem_calibrate_writes_center(const u32 rank_bgn, const u32 write_group,
2949                                   const u32 test_bgn)
2950{
2951        int i;
2952        u32 sticky_bit_chk;
2953        u32 min_index;
2954        int left_edge[rwcfg->mem_dq_per_write_dqs];
2955        int right_edge[rwcfg->mem_dq_per_write_dqs];
2956        int mid;
2957        int mid_min, orig_mid_min;
2958        int new_dqs, start_dqs;
2959        int dq_margin, dqs_margin, dm_margin;
2960        int bgn_curr = iocfg->io_out1_delay_max + 1;
2961        int end_curr = iocfg->io_out1_delay_max + 1;
2962        int bgn_best = iocfg->io_out1_delay_max + 1;
2963        int end_best = iocfg->io_out1_delay_max + 1;
2964        int win_best = 0;
2965
2966        int ret;
2967
2968        debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2969
2970        dm_margin = 0;
2971
2972        start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS |
2973                          SCC_MGR_IO_OUT1_DELAY_OFFSET) +
2974                          (rwcfg->mem_dq_per_write_dqs << 2));
2975
2976        /* Per-bit deskew. */
2977
2978        /*
2979         * Set the left and right edge of each bit to an illegal value.
2980         * Use (iocfg->io_out1_delay_max + 1) as an illegal value.
2981         */
2982        sticky_bit_chk = 0;
2983        for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) {
2984                left_edge[i]  = iocfg->io_out1_delay_max + 1;
2985                right_edge[i] = iocfg->io_out1_delay_max + 1;
2986        }
2987
2988        /* Search for the left edge of the window for each bit. */
2989        search_left_edge(1, rank_bgn, write_group, 0, test_bgn,
2990                         &sticky_bit_chk,
2991                         left_edge, right_edge, 0);
2992
2993        /* Search for the right edge of the window for each bit. */
2994        ret = search_right_edge(1, rank_bgn, write_group, 0,
2995                                start_dqs, 0,
2996                                &sticky_bit_chk,
2997                                left_edge, right_edge, 0);
2998        if (ret) {
2999                set_failing_group_stage(test_bgn + ret - 1, CAL_STAGE_WRITES,
3000                                        CAL_SUBSTAGE_WRITES_CENTER);
3001                return -EINVAL;
3002        }
3003
3004        min_index = get_window_mid_index(1, left_edge, right_edge, &mid_min);
3005
3006        /* Determine the amount we can change DQS (which is -mid_min). */
3007        orig_mid_min = mid_min;
3008        new_dqs = start_dqs;
3009        mid_min = 0;
3010        debug_cond(DLEVEL >= 1,
3011                   "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n",
3012                   __func__, __LINE__, start_dqs, new_dqs, mid_min);
3013
3014        /* Add delay to bring centre of all DQ windows to the same "level". */
3015        center_dq_windows(1, left_edge, right_edge, mid_min, orig_mid_min,
3016                          min_index, 0, &dq_margin, &dqs_margin);
3017
3018        /* Move DQS */
3019        scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3020        writel(0, &sdr_scc_mgr->update);
3021
3022        /* Centre DM */
3023        debug_cond(DLEVEL >= 2, "%s:%d write_center: DM\n", __func__, __LINE__);
3024
3025        /*
3026         * Set the left and right edge of each bit to an illegal value.
3027         * Use (iocfg->io_out1_delay_max + 1) as an illegal value.
3028         */
3029        left_edge[0]  = iocfg->io_out1_delay_max + 1;
3030        right_edge[0] = iocfg->io_out1_delay_max + 1;
3031
3032        /* Search for the/part of the window with DM shift. */
3033        search_window(1, rank_bgn, write_group, &bgn_curr, &end_curr,
3034                      &bgn_best, &end_best, &win_best, 0);
3035
3036        /* Reset DM delay chains to 0. */
3037        scc_mgr_apply_group_dm_out1_delay(0);
3038
3039        /*
3040         * Check to see if the current window nudges up aganist 0 delay.
3041         * If so we need to continue the search by shifting DQS otherwise DQS
3042         * search begins as a new search.
3043         */
3044        if (end_curr != 0) {
3045                bgn_curr = iocfg->io_out1_delay_max + 1;
3046                end_curr = iocfg->io_out1_delay_max + 1;
3047        }
3048
3049        /* Search for the/part of the window with DQS shifts. */
3050        search_window(0, rank_bgn, write_group, &bgn_curr, &end_curr,
3051                      &bgn_best, &end_best, &win_best, new_dqs);
3052
3053        /* Assign left and right edge for cal and reporting. */
3054        left_edge[0] = -1 * bgn_best;
3055        right_edge[0] = end_best;
3056
3057        debug_cond(DLEVEL >= 2, "%s:%d dm_calib: left=%d right=%d\n",
3058                   __func__, __LINE__, left_edge[0], right_edge[0]);
3059
3060        /* Move DQS (back to orig). */
3061        scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3062
3063        /* Move DM */
3064
3065        /* Find middle of window for the DM bit. */
3066        mid = (left_edge[0] - right_edge[0]) / 2;
3067
3068        /* Only move right, since we are not moving DQS/DQ. */
3069        if (mid < 0)
3070                mid = 0;
3071
3072        /* dm_marign should fail if we never find a window. */
3073        if (win_best == 0)
3074                dm_margin = -1;
3075        else
3076                dm_margin = left_edge[0] - mid;
3077
3078        scc_mgr_apply_group_dm_out1_delay(mid);
3079        writel(0, &sdr_scc_mgr->update);
3080
3081        debug_cond(DLEVEL >= 2,
3082                   "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n",
3083                   __func__, __LINE__, left_edge[0], right_edge[0],
3084                   mid, dm_margin);
3085        /* Export values. */
3086        gbl->fom_out += dq_margin + dqs_margin;
3087
3088        debug_cond(DLEVEL >= 2,
3089                   "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n",
3090                   __func__, __LINE__, dq_margin, dqs_margin, dm_margin);
3091
3092        /*
3093         * Do not remove this line as it makes sure all of our
3094         * decisions have been applied.
3095         */
3096        writel(0, &sdr_scc_mgr->update);
3097
3098        if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0))
3099                return -EINVAL;
3100
3101        return 0;
3102}
3103
3104/**
3105 * rw_mgr_mem_calibrate_writes() - Write Calibration Part One
3106 * @rank_bgn:           Rank number
3107 * @group:              Read/Write Group
3108 * @test_bgn:           Rank at which the test begins
3109 *
3110 * Stage 2: Write Calibration Part One.
3111 *
3112 * This function implements UniPHY calibration Stage 2, as explained in
3113 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
3114 */
3115static int rw_mgr_mem_calibrate_writes(const u32 rank_bgn, const u32 group,
3116                                       const u32 test_bgn)
3117{
3118        int ret;
3119
3120        /* Update info for sims */
3121        debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn);
3122
3123        reg_file_set_group(group);
3124        reg_file_set_stage(CAL_STAGE_WRITES);
3125        reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3126
3127        ret = rw_mgr_mem_calibrate_writes_center(rank_bgn, group, test_bgn);
3128        if (ret)
3129                set_failing_group_stage(group, CAL_STAGE_WRITES,
3130                                        CAL_SUBSTAGE_WRITES_CENTER);
3131
3132        return ret;
3133}
3134
3135/**
3136 * mem_precharge_and_activate() - Precharge all banks and activate
3137 *
3138 * Precharge all banks and activate row 0 in bank "000..." and bank "111...".
3139 */
3140static void mem_precharge_and_activate(void)
3141{
3142        int r;
3143
3144        for (r = 0; r < rwcfg->mem_number_of_ranks; r++) {
3145                /* Set rank. */
3146                set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3147
3148                /* Precharge all banks. */
3149                writel(rwcfg->precharge_all, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3150                                             RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3151
3152                writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3153                writel(rwcfg->activate_0_and_1_wait1,
3154                       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3155
3156                writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3157                writel(rwcfg->activate_0_and_1_wait2,
3158                       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3159
3160                /* Activate rows. */
3161                writel(rwcfg->activate_0_and_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3162                                                RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3163        }
3164}
3165
3166/**
3167 * mem_init_latency() - Configure memory RLAT and WLAT settings
3168 *
3169 * Configure memory RLAT and WLAT parameters.
3170 */
3171static void mem_init_latency(void)
3172{
3173        /*
3174         * For AV/CV, LFIFO is hardened and always runs at full rate
3175         * so max latency in AFI clocks, used here, is correspondingly
3176         * smaller.
3177         */
3178        const u32 max_latency = (1 << misccfg->max_latency_count_width) - 1;
3179        u32 rlat, wlat;
3180
3181        debug("%s:%d\n", __func__, __LINE__);
3182
3183        /*
3184         * Read in write latency.
3185         * WL for Hard PHY does not include additive latency.
3186         */
3187        wlat = readl(&data_mgr->t_wl_add);
3188        wlat += readl(&data_mgr->mem_t_add);
3189
3190        gbl->rw_wl_nop_cycles = wlat - 1;
3191
3192        /* Read in readl latency. */
3193        rlat = readl(&data_mgr->t_rl_add);
3194
3195        /* Set a pretty high read latency initially. */
3196        gbl->curr_read_lat = rlat + 16;
3197        if (gbl->curr_read_lat > max_latency)
3198                gbl->curr_read_lat = max_latency;
3199
3200        writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3201
3202        /* Advertise write latency. */
3203        writel(wlat, &phy_mgr_cfg->afi_wlat);
3204}
3205
3206/**
3207 * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings
3208 *
3209 * Set VFIFO and LFIFO to instant-on settings in skip calibration mode.
3210 */
3211static void mem_skip_calibrate(void)
3212{
3213        u32 vfifo_offset;
3214        u32 i, j, r;
3215
3216        debug("%s:%d\n", __func__, __LINE__);
3217        /* Need to update every shadow register set used by the interface */
3218        for (r = 0; r < rwcfg->mem_number_of_ranks;
3219             r += NUM_RANKS_PER_SHADOW_REG) {
3220                /*
3221                 * Set output phase alignment settings appropriate for
3222                 * skip calibration.
3223                 */
3224                for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
3225                        scc_mgr_set_dqs_en_phase(i, 0);
3226                        if (iocfg->dll_chain_length == 6)
3227                                scc_mgr_set_dqdqs_output_phase(i, 6);
3228                        else
3229                                scc_mgr_set_dqdqs_output_phase(i, 7);
3230                        /*
3231                         * Case:33398
3232                         *
3233                         * Write data arrives to the I/O two cycles before write
3234                         * latency is reached (720 deg).
3235                         *   -> due to bit-slip in a/c bus
3236                         *   -> to allow board skew where dqs is longer than ck
3237                         *      -> how often can this happen!?
3238                         *      -> can claim back some ptaps for high freq
3239                         *       support if we can relax this, but i digress...
3240                         *
3241                         * The write_clk leads mem_ck by 90 deg
3242                         * The minimum ptap of the OPA is 180 deg
3243                         * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3244                         * The write_clk is always delayed by 2 ptaps
3245                         *
3246                         * Hence, to make DQS aligned to CK, we need to delay
3247                         * DQS by:
3248                         *    (720 - 90 - 180 - 2) *
3249                         *      (360 / iocfg->dll_chain_length)
3250                         *
3251                         * Dividing the above by (360 / iocfg->dll_chain_length)
3252                         * gives us the number of ptaps, which simplies to:
3253                         *
3254                         *    (1.25 * iocfg->dll_chain_length - 2)
3255                         */
3256                        scc_mgr_set_dqdqs_output_phase(i,
3257                                       ((125 * iocfg->dll_chain_length) / 100) - 2);
3258                }
3259                writel(0xff, &sdr_scc_mgr->dqs_ena);
3260                writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3261
3262                for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) {
3263                        writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3264                                  SCC_MGR_GROUP_COUNTER_OFFSET);
3265                }
3266                writel(0xff, &sdr_scc_mgr->dq_ena);
3267                writel(0xff, &sdr_scc_mgr->dm_ena);
3268                writel(0, &sdr_scc_mgr->update);
3269        }
3270
3271        /* Compensate for simulation model behaviour */
3272        for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
3273                scc_mgr_set_dqs_bus_in_delay(i, 10);
3274                scc_mgr_load_dqs(i);
3275        }
3276        writel(0, &sdr_scc_mgr->update);
3277
3278        /*
3279         * ArriaV has hard FIFOs that can only be initialized by incrementing
3280         * in sequencer.
3281         */
3282        vfifo_offset = misccfg->calib_vfifo_offset;
3283        for (j = 0; j < vfifo_offset; j++)
3284                writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3285        writel(0, &phy_mgr_cmd->fifo_reset);
3286
3287        /*
3288         * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal
3289         * setting from generation-time constant.
3290         */
3291        gbl->curr_read_lat = misccfg->calib_lfifo_offset;
3292        writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3293}
3294
3295/**
3296 * mem_calibrate() - Memory calibration entry point.
3297 *
3298 * Perform memory calibration.
3299 */
3300static u32 mem_calibrate(void)
3301{
3302        u32 i;
3303        u32 rank_bgn, sr;
3304        u32 write_group, write_test_bgn;
3305        u32 read_group, read_test_bgn;
3306        u32 run_groups, current_run;
3307        u32 failing_groups = 0;
3308        u32 group_failed = 0;
3309
3310        const u32 rwdqs_ratio = rwcfg->mem_if_read_dqs_width /
3311                                rwcfg->mem_if_write_dqs_width;
3312
3313        debug("%s:%d\n", __func__, __LINE__);
3314
3315        /* Initialize the data settings */
3316        gbl->error_substage = CAL_SUBSTAGE_NIL;
3317        gbl->error_stage = CAL_STAGE_NIL;
3318        gbl->error_group = 0xff;
3319        gbl->fom_in = 0;
3320        gbl->fom_out = 0;
3321
3322        /* Initialize WLAT and RLAT. */
3323        mem_init_latency();
3324
3325        /* Initialize bit slips. */
3326        mem_precharge_and_activate();
3327
3328        for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
3329                writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3330                          SCC_MGR_GROUP_COUNTER_OFFSET);
3331                /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3332                if (i == 0)
3333                        scc_mgr_set_hhp_extras();
3334
3335                scc_set_bypass_mode(i);
3336        }
3337
3338        /* Calibration is skipped. */
3339        if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3340                /*
3341                 * Set VFIFO and LFIFO to instant-on settings in skip
3342                 * calibration mode.
3343                 */
3344                mem_skip_calibrate();
3345
3346                /*
3347                 * Do not remove this line as it makes sure all of our
3348                 * decisions have been applied.
3349                 */
3350                writel(0, &sdr_scc_mgr->update);
3351                return 1;
3352        }
3353
3354        /* Calibration is not skipped. */
3355        for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3356                /*
3357                 * Zero all delay chain/phase settings for all
3358                 * groups and all shadow register sets.
3359                 */
3360                scc_mgr_zero_all();
3361
3362                run_groups = ~0;
3363
3364                for (write_group = 0, write_test_bgn = 0; write_group
3365                        < rwcfg->mem_if_write_dqs_width; write_group++,
3366                        write_test_bgn += rwcfg->mem_dq_per_write_dqs) {
3367                        /* Initialize the group failure */
3368                        group_failed = 0;
3369
3370                        current_run = run_groups & ((1 <<
3371                                RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3372                        run_groups = run_groups >>
3373                                RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3374
3375                        if (current_run == 0)
3376                                continue;
3377
3378                        writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3379                                            SCC_MGR_GROUP_COUNTER_OFFSET);
3380                        scc_mgr_zero_group(write_group, 0);
3381
3382                        for (read_group = write_group * rwdqs_ratio,
3383                             read_test_bgn = 0;
3384                             read_group < (write_group + 1) * rwdqs_ratio;
3385                             read_group++,
3386                             read_test_bgn += rwcfg->mem_dq_per_read_dqs) {
3387                                if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO)
3388                                        continue;
3389
3390                                /* Calibrate the VFIFO */
3391                                if (rw_mgr_mem_calibrate_vfifo(read_group,
3392                                                               read_test_bgn))
3393                                        continue;
3394
3395                                if (!(gbl->phy_debug_mode_flags &
3396                                      PHY_DEBUG_SWEEP_ALL_GROUPS))
3397                                        return 0;
3398
3399                                /* The group failed, we're done. */
3400                                goto grp_failed;
3401                        }
3402
3403                        /* Calibrate the output side */
3404                        for (rank_bgn = 0, sr = 0;
3405                             rank_bgn < rwcfg->mem_number_of_ranks;
3406                             rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
3407                                if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3408                                        continue;
3409
3410                                /* Not needed in quick mode! */
3411                                if (STATIC_CALIB_STEPS &
3412                                    CALIB_SKIP_DELAY_SWEEPS)
3413                                        continue;
3414
3415                                /* Calibrate WRITEs */
3416                                if (!rw_mgr_mem_calibrate_writes(rank_bgn,
3417                                                                 write_group,
3418                                                                 write_test_bgn))
3419                                        continue;
3420
3421                                group_failed = 1;
3422                                if (!(gbl->phy_debug_mode_flags &
3423                                      PHY_DEBUG_SWEEP_ALL_GROUPS))
3424                                        return 0;
3425                        }
3426
3427                        /* Some group failed, we're done. */
3428                        if (group_failed)
3429                                goto grp_failed;
3430
3431                        for (read_group = write_group * rwdqs_ratio,
3432                             read_test_bgn = 0;
3433                             read_group < (write_group + 1) * rwdqs_ratio;
3434                             read_group++,
3435                             read_test_bgn += rwcfg->mem_dq_per_read_dqs) {
3436                                if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3437                                        continue;
3438
3439                                if (!rw_mgr_mem_calibrate_vfifo_end(read_group,
3440                                                                    read_test_bgn))
3441                                        continue;
3442
3443                                if (!(gbl->phy_debug_mode_flags &
3444                                      PHY_DEBUG_SWEEP_ALL_GROUPS))
3445                                        return 0;
3446
3447                                /* The group failed, we're done. */
3448                                goto grp_failed;
3449                        }
3450
3451                        /* No group failed, continue as usual. */
3452                        continue;
3453
3454grp_failed:             /* A group failed, increment the counter. */
3455                        failing_groups++;
3456                }
3457
3458                /*
3459                 * USER If there are any failing groups then report
3460                 * the failure.
3461                 */
3462                if (failing_groups != 0)
3463                        return 0;
3464
3465                if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO)
3466                        continue;
3467
3468                /* Calibrate the LFIFO */
3469                if (!rw_mgr_mem_calibrate_lfifo())
3470                        return 0;
3471        }
3472
3473        /*
3474         * Do not remove this line as it makes sure all of our decisions
3475         * have been applied.
3476         */
3477        writel(0, &sdr_scc_mgr->update);
3478        return 1;
3479}
3480
3481/**
3482 * run_mem_calibrate() - Perform memory calibration
3483 *
3484 * This function triggers the entire memory calibration procedure.
3485 */
3486static int run_mem_calibrate(void)
3487{
3488        int pass;
3489        u32 ctrl_cfg;
3490
3491        debug("%s:%d\n", __func__, __LINE__);
3492
3493        /* Reset pass/fail status shown on afi_cal_success/fail */
3494        writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3495
3496        /* Stop tracking manager. */
3497        ctrl_cfg = readl(&sdr_ctrl->ctrl_cfg);
3498        writel(ctrl_cfg & ~SDR_CTRLGRP_CTRLCFG_DQSTRKEN_MASK,
3499               &sdr_ctrl->ctrl_cfg);
3500
3501        phy_mgr_initialize();
3502        rw_mgr_mem_initialize();
3503
3504        /* Perform the actual memory calibration. */
3505        pass = mem_calibrate();
3506
3507        mem_precharge_and_activate();
3508        writel(0, &phy_mgr_cmd->fifo_reset);
3509
3510        /* Handoff. */
3511        rw_mgr_mem_handoff();
3512        /*
3513         * In Hard PHY this is a 2-bit control:
3514         * 0: AFI Mux Select
3515         * 1: DDIO Mux Select
3516         */
3517        writel(0x2, &phy_mgr_cfg->mux_sel);
3518
3519        /* Start tracking manager. */
3520        writel(ctrl_cfg, &sdr_ctrl->ctrl_cfg);
3521
3522        return pass;
3523}
3524
3525/**
3526 * debug_mem_calibrate() - Report result of memory calibration
3527 * @pass:       Value indicating whether calibration passed or failed
3528 *
3529 * This function reports the results of the memory calibration
3530 * and writes debug information into the register file.
3531 */
3532static void debug_mem_calibrate(int pass)
3533{
3534        u32 debug_info;
3535
3536        if (pass) {
3537                printf("%s: CALIBRATION PASSED\n", __FILE__);
3538
3539                gbl->fom_in /= 2;
3540                gbl->fom_out /= 2;
3541
3542                if (gbl->fom_in > 0xff)
3543                        gbl->fom_in = 0xff;
3544
3545                if (gbl->fom_out > 0xff)
3546                        gbl->fom_out = 0xff;
3547
3548                /* Update the FOM in the register file */
3549                debug_info = gbl->fom_in;
3550                debug_info |= gbl->fom_out << 8;
3551                writel(debug_info, &sdr_reg_file->fom);
3552
3553                writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3554                writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3555        } else {
3556                printf("%s: CALIBRATION FAILED\n", __FILE__);
3557
3558                debug_info = gbl->error_stage;
3559                debug_info |= gbl->error_substage << 8;
3560                debug_info |= gbl->error_group << 16;
3561
3562                writel(debug_info, &sdr_reg_file->failing_stage);
3563                writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3564                writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3565
3566                /* Update the failing group/stage in the register file */
3567                debug_info = gbl->error_stage;
3568                debug_info |= gbl->error_substage << 8;
3569                debug_info |= gbl->error_group << 16;
3570                writel(debug_info, &sdr_reg_file->failing_stage);
3571        }
3572
3573        printf("%s: Calibration complete\n", __FILE__);
3574}
3575
3576/**
3577 * hc_initialize_rom_data() - Initialize ROM data
3578 *
3579 * Initialize ROM data.
3580 */
3581static void hc_initialize_rom_data(void)
3582{
3583        unsigned int nelem = 0;
3584        const u32 *rom_init;
3585        u32 i, addr;
3586
3587        socfpga_get_seq_inst_init(&rom_init, &nelem);
3588        addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3589        for (i = 0; i < nelem; i++)
3590                writel(rom_init[i], addr + (i << 2));
3591
3592        socfpga_get_seq_ac_init(&rom_init, &nelem);
3593        addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3594        for (i = 0; i < nelem; i++)
3595                writel(rom_init[i], addr + (i << 2));
3596}
3597
3598/**
3599 * initialize_reg_file() - Initialize SDR register file
3600 *
3601 * Initialize SDR register file.
3602 */
3603static void initialize_reg_file(void)
3604{
3605        /* Initialize the register file with the correct data */
3606        writel(misccfg->reg_file_init_seq_signature, &sdr_reg_file->signature);
3607        writel(0, &sdr_reg_file->debug_data_addr);
3608        writel(0, &sdr_reg_file->cur_stage);
3609        writel(0, &sdr_reg_file->fom);
3610        writel(0, &sdr_reg_file->failing_stage);
3611        writel(0, &sdr_reg_file->debug1);
3612        writel(0, &sdr_reg_file->debug2);
3613}
3614
3615/**
3616 * initialize_hps_phy() - Initialize HPS PHY
3617 *
3618 * Initialize HPS PHY.
3619 */
3620static void initialize_hps_phy(void)
3621{
3622        u32 reg;
3623        /*
3624         * Tracking also gets configured here because it's in the
3625         * same register.
3626         */
3627        u32 trk_sample_count = 7500;
3628        u32 trk_long_idle_sample_count = (10 << 16) | 100;
3629        /*
3630         * Format is number of outer loops in the 16 MSB, sample
3631         * count in 16 LSB.
3632         */
3633
3634        reg = 0;
3635        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3636        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3637        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3638        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3639        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3640        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3641        /*
3642         * This field selects the intrinsic latency to RDATA_EN/FULL path.
3643         * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3644         */
3645        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3646        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3647                trk_sample_count);
3648        writel(reg, &sdr_ctrl->phy_ctrl0);
3649
3650        reg = 0;
3651        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3652                trk_sample_count >>
3653                SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3654        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3655                trk_long_idle_sample_count);
3656        writel(reg, &sdr_ctrl->phy_ctrl1);
3657
3658        reg = 0;
3659        reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3660                trk_long_idle_sample_count >>
3661                SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3662        writel(reg, &sdr_ctrl->phy_ctrl2);
3663}
3664
3665/**
3666 * initialize_tracking() - Initialize tracking
3667 *
3668 * Initialize the register file with usable initial data.
3669 */
3670static void initialize_tracking(void)
3671{
3672        /*
3673         * Initialize the register file with the correct data.
3674         * Compute usable version of value in case we skip full
3675         * computation later.
3676         */
3677        writel(DIV_ROUND_UP(iocfg->delay_per_opa_tap,
3678                            iocfg->delay_per_dchain_tap) - 1,
3679               &sdr_reg_file->dtaps_per_ptap);
3680
3681        /* trk_sample_count */
3682        writel(7500, &sdr_reg_file->trk_sample_count);
3683
3684        /* longidle outer loop [15:0] */
3685        writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3686
3687        /*
3688         * longidle sample count [31:24]
3689         * trfc, worst case of 933Mhz 4Gb [23:16]
3690         * trcd, worst case [15:8]
3691         * vfifo wait [7:0]
3692         */
3693        writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3694               &sdr_reg_file->delays);
3695
3696        /* mux delay */
3697        writel((rwcfg->idle << 24) | (rwcfg->activate_1 << 16) |
3698               (rwcfg->sgle_read << 8) | (rwcfg->precharge_all << 0),
3699               &sdr_reg_file->trk_rw_mgr_addr);
3700
3701        writel(rwcfg->mem_if_read_dqs_width,
3702               &sdr_reg_file->trk_read_dqs_width);
3703
3704        /* trefi [7:0] */
3705        writel((rwcfg->refresh_all << 24) | (1000 << 0),
3706               &sdr_reg_file->trk_rfsh);
3707}
3708
3709int sdram_calibration_full(void)
3710{
3711        struct param_type my_param;
3712        struct gbl_type my_gbl;
3713        u32 pass;
3714
3715        memset(&my_param, 0, sizeof(my_param));
3716        memset(&my_gbl, 0, sizeof(my_gbl));
3717
3718        param = &my_param;
3719        gbl = &my_gbl;
3720
3721        rwcfg = socfpga_get_sdram_rwmgr_config();
3722        iocfg = socfpga_get_sdram_io_config();
3723        misccfg = socfpga_get_sdram_misc_config();
3724
3725        /* Set the calibration enabled by default */
3726        gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3727        /*
3728         * Only sweep all groups (regardless of fail state) by default
3729         * Set enabled read test by default.
3730         */
3731#if DISABLE_GUARANTEED_READ
3732        gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3733#endif
3734        /* Initialize the register file */
3735        initialize_reg_file();
3736
3737        /* Initialize any PHY CSR */
3738        initialize_hps_phy();
3739
3740        scc_mgr_initialize();
3741
3742        initialize_tracking();
3743
3744        printf("%s: Preparing to start memory calibration\n", __FILE__);
3745
3746        debug("%s:%d\n", __func__, __LINE__);
3747        debug_cond(DLEVEL >= 1,
3748                   "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3749                   rwcfg->mem_number_of_ranks, rwcfg->mem_number_of_cs_per_dimm,
3750                   rwcfg->mem_dq_per_read_dqs, rwcfg->mem_dq_per_write_dqs,
3751                   rwcfg->mem_virtual_groups_per_read_dqs,
3752                   rwcfg->mem_virtual_groups_per_write_dqs);
3753        debug_cond(DLEVEL >= 1,
3754                   "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3755                   rwcfg->mem_if_read_dqs_width, rwcfg->mem_if_write_dqs_width,
3756                   rwcfg->mem_data_width, rwcfg->mem_data_mask_width,
3757                   iocfg->delay_per_opa_tap, iocfg->delay_per_dchain_tap);
3758        debug_cond(DLEVEL >= 1, "dtap_dqsen_delay=%u, dll=%u",
3759                   iocfg->delay_per_dqs_en_dchain_tap, iocfg->dll_chain_length);
3760        debug_cond(DLEVEL >= 1,
3761                   "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3762                   iocfg->dqs_en_phase_max, iocfg->dqdqs_out_phase_max,
3763                   iocfg->dqs_en_delay_max, iocfg->dqs_in_delay_max);
3764        debug_cond(DLEVEL >= 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3765                   iocfg->io_in_delay_max, iocfg->io_out1_delay_max,
3766                   iocfg->io_out2_delay_max);
3767        debug_cond(DLEVEL >= 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3768                   iocfg->dqs_in_reserve, iocfg->dqs_out_reserve);
3769
3770        hc_initialize_rom_data();
3771
3772        /* update info for sims */
3773        reg_file_set_stage(CAL_STAGE_NIL);
3774        reg_file_set_group(0);
3775
3776        /*
3777         * Load global needed for those actions that require
3778         * some dynamic calibration support.
3779         */
3780        dyn_calib_steps = STATIC_CALIB_STEPS;
3781        /*
3782         * Load global to allow dynamic selection of delay loop settings
3783         * based on calibration mode.
3784         */
3785        if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3786                skip_delay_mask = 0xff;
3787        else
3788                skip_delay_mask = 0x0;
3789
3790        pass = run_mem_calibrate();
3791        debug_mem_calibrate(pass);
3792        return pass;
3793}
3794