linux/drivers/edac/al_mc_edac.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   4 */
   5#include <linux/bitfield.h>
   6#include <linux/bitops.h>
   7#include <linux/edac.h>
   8#include <linux/of_irq.h>
   9#include <linux/platform_device.h>
  10#include <linux/spinlock.h>
  11#include "edac_module.h"
  12
  13/* Registers Offset */
  14#define AL_MC_ECC_CFG           0x70
  15#define AL_MC_ECC_CLEAR         0x7c
  16#define AL_MC_ECC_ERR_COUNT     0x80
  17#define AL_MC_ECC_CE_ADDR0      0x84
  18#define AL_MC_ECC_CE_ADDR1      0x88
  19#define AL_MC_ECC_UE_ADDR0      0xa4
  20#define AL_MC_ECC_UE_ADDR1      0xa8
  21#define AL_MC_ECC_CE_SYND0      0x8c
  22#define AL_MC_ECC_CE_SYND1      0x90
  23#define AL_MC_ECC_CE_SYND2      0x94
  24#define AL_MC_ECC_UE_SYND0      0xac
  25#define AL_MC_ECC_UE_SYND1      0xb0
  26#define AL_MC_ECC_UE_SYND2      0xb4
  27
  28/* Registers Fields */
  29#define AL_MC_ECC_CFG_SCRUB_DISABLED    BIT(4)
  30
  31#define AL_MC_ECC_CLEAR_UE_COUNT        BIT(3)
  32#define AL_MC_ECC_CLEAR_CE_COUNT        BIT(2)
  33#define AL_MC_ECC_CLEAR_UE_ERR          BIT(1)
  34#define AL_MC_ECC_CLEAR_CE_ERR          BIT(0)
  35
  36#define AL_MC_ECC_ERR_COUNT_UE          GENMASK(31, 16)
  37#define AL_MC_ECC_ERR_COUNT_CE          GENMASK(15, 0)
  38
  39#define AL_MC_ECC_CE_ADDR0_RANK         GENMASK(25, 24)
  40#define AL_MC_ECC_CE_ADDR0_ROW          GENMASK(17, 0)
  41
  42#define AL_MC_ECC_CE_ADDR1_BG           GENMASK(25, 24)
  43#define AL_MC_ECC_CE_ADDR1_BANK         GENMASK(18, 16)
  44#define AL_MC_ECC_CE_ADDR1_COLUMN       GENMASK(11, 0)
  45
  46#define AL_MC_ECC_UE_ADDR0_RANK         GENMASK(25, 24)
  47#define AL_MC_ECC_UE_ADDR0_ROW          GENMASK(17, 0)
  48
  49#define AL_MC_ECC_UE_ADDR1_BG           GENMASK(25, 24)
  50#define AL_MC_ECC_UE_ADDR1_BANK         GENMASK(18, 16)
  51#define AL_MC_ECC_UE_ADDR1_COLUMN       GENMASK(11, 0)
  52
  53#define DRV_NAME "al_mc_edac"
  54#define AL_MC_EDAC_MSG_MAX 256
  55
  56struct al_mc_edac {
  57        void __iomem *mmio_base;
  58        spinlock_t lock;
  59        int irq_ce;
  60        int irq_ue;
  61};
  62
  63static void prepare_msg(char *message, size_t buffer_size,
  64                        enum hw_event_mc_err_type type,
  65                        u8 rank, u32 row, u8 bg, u8 bank, u16 column,
  66                        u32 syn0, u32 syn1, u32 syn2)
  67{
  68        snprintf(message, buffer_size,
  69                 "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
  70                 type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
  71                 rank, row, bg, bank, column, syn0, syn1, syn2);
  72}
  73
  74static int handle_ce(struct mem_ctl_info *mci)
  75{
  76        u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
  77        struct al_mc_edac *al_mc = mci->pvt_info;
  78        char msg[AL_MC_EDAC_MSG_MAX];
  79        u16 ce_count, column;
  80        unsigned long flags;
  81        u8 rank, bg, bank;
  82
  83        eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
  84        ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
  85        if (!ce_count)
  86                return 0;
  87
  88        ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
  89        ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
  90        ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
  91        ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
  92        ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
  93
  94        writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
  95                       al_mc->mmio_base + AL_MC_ECC_CLEAR);
  96
  97        dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
  98                ecccaddr0, ecccaddr1);
  99
 100        rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
 101        row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
 102
 103        bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
 104        bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
 105        column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
 106
 107        prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED,
 108                    rank, row, bg, bank, column,
 109                    ecccsyn0, ecccsyn1, ecccsyn2);
 110
 111        spin_lock_irqsave(&al_mc->lock, flags);
 112        edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
 113                             ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
 114        spin_unlock_irqrestore(&al_mc->lock, flags);
 115
 116        return ce_count;
 117}
 118
 119static int handle_ue(struct mem_ctl_info *mci)
 120{
 121        u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
 122        struct al_mc_edac *al_mc = mci->pvt_info;
 123        char msg[AL_MC_EDAC_MSG_MAX];
 124        u16 ue_count, column;
 125        unsigned long flags;
 126        u8 rank, bg, bank;
 127
 128        eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
 129        ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
 130        if (!ue_count)
 131                return 0;
 132
 133        eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
 134        eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
 135        eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
 136        eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
 137        eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
 138
 139        writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
 140                       al_mc->mmio_base + AL_MC_ECC_CLEAR);
 141
 142        dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
 143                eccuaddr0, eccuaddr1);
 144
 145        rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
 146        row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
 147
 148        bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
 149        bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
 150        column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
 151
 152        prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED,
 153                    rank, row, bg, bank, column,
 154                    eccusyn0, eccusyn1, eccusyn2);
 155
 156        spin_lock_irqsave(&al_mc->lock, flags);
 157        edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
 158                             ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
 159        spin_unlock_irqrestore(&al_mc->lock, flags);
 160
 161        return ue_count;
 162}
 163
 164static void al_mc_edac_check(struct mem_ctl_info *mci)
 165{
 166        struct al_mc_edac *al_mc = mci->pvt_info;
 167
 168        if (al_mc->irq_ue <= 0)
 169                handle_ue(mci);
 170
 171        if (al_mc->irq_ce <= 0)
 172                handle_ce(mci);
 173}
 174
 175static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
 176{
 177        struct platform_device *pdev = info;
 178        struct mem_ctl_info *mci = platform_get_drvdata(pdev);
 179
 180        if (handle_ue(mci))
 181                return IRQ_HANDLED;
 182        return IRQ_NONE;
 183}
 184
 185static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
 186{
 187        struct platform_device *pdev = info;
 188        struct mem_ctl_info *mci = platform_get_drvdata(pdev);
 189
 190        if (handle_ce(mci))
 191                return IRQ_HANDLED;
 192        return IRQ_NONE;
 193}
 194
 195static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
 196{
 197        u32 ecccfg0;
 198
 199        ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG);
 200
 201        if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
 202                return SCRUB_NONE;
 203        else
 204                return SCRUB_HW_SRC;
 205}
 206
 207static void devm_al_mc_edac_free(void *data)
 208{
 209        edac_mc_free(data);
 210}
 211
 212static void devm_al_mc_edac_del(void *data)
 213{
 214        edac_mc_del_mc(data);
 215}
 216
 217static int al_mc_edac_probe(struct platform_device *pdev)
 218{
 219        struct edac_mc_layer layers[1];
 220        struct mem_ctl_info *mci;
 221        struct al_mc_edac *al_mc;
 222        void __iomem *mmio_base;
 223        struct dimm_info *dimm;
 224        int ret;
 225
 226        mmio_base = devm_platform_ioremap_resource(pdev, 0);
 227        if (IS_ERR(mmio_base)) {
 228                dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
 229                        PTR_ERR(mmio_base));
 230                return PTR_ERR(mmio_base);
 231        }
 232
 233        layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
 234        layers[0].size = 1;
 235        layers[0].is_virt_csrow = false;
 236        mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
 237                            sizeof(struct al_mc_edac));
 238        if (!mci)
 239                return -ENOMEM;
 240
 241        ret = devm_add_action(&pdev->dev, devm_al_mc_edac_free, mci);
 242        if (ret) {
 243                edac_mc_free(mci);
 244                return ret;
 245        }
 246
 247        platform_set_drvdata(pdev, mci);
 248        al_mc = mci->pvt_info;
 249
 250        al_mc->mmio_base = mmio_base;
 251
 252        al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue");
 253        if (al_mc->irq_ue <= 0)
 254                dev_dbg(&pdev->dev,
 255                        "no IRQ defined for UE - falling back to polling\n");
 256
 257        al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce");
 258        if (al_mc->irq_ce <= 0)
 259                dev_dbg(&pdev->dev,
 260                        "no IRQ defined for CE - falling back to polling\n");
 261
 262        /*
 263         * In case both interrupts (ue/ce) are to be found, use interrupt mode.
 264         * In case none of the interrupt are foud, use polling mode.
 265         * In case only one interrupt is found, use interrupt mode for it but
 266         * keep polling mode enable for the other.
 267         */
 268        if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
 269                edac_op_state = EDAC_OPSTATE_POLL;
 270                mci->edac_check = al_mc_edac_check;
 271        } else {
 272                edac_op_state = EDAC_OPSTATE_INT;
 273        }
 274
 275        spin_lock_init(&al_mc->lock);
 276
 277        mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
 278        mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
 279        mci->edac_cap = EDAC_FLAG_SECDED;
 280        mci->mod_name = DRV_NAME;
 281        mci->ctl_name = "al_mc";
 282        mci->pdev = &pdev->dev;
 283        mci->scrub_mode = get_scrub_mode(mmio_base);
 284
 285        dimm = *mci->dimms;
 286        dimm->grain = 1;
 287
 288        ret = edac_mc_add_mc(mci);
 289        if (ret < 0) {
 290                dev_err(&pdev->dev,
 291                        "fail to add memory controller device (%d)\n",
 292                        ret);
 293                return ret;
 294        }
 295
 296        ret = devm_add_action(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
 297        if (ret) {
 298                edac_mc_del_mc(&pdev->dev);
 299                return ret;
 300        }
 301
 302        if (al_mc->irq_ue > 0) {
 303                ret = devm_request_irq(&pdev->dev,
 304                                       al_mc->irq_ue,
 305                                       al_mc_edac_irq_handler_ue,
 306                                       IRQF_SHARED,
 307                                       pdev->name,
 308                                       pdev);
 309                if (ret != 0) {
 310                        dev_err(&pdev->dev,
 311                                "failed to request UE IRQ %d (%d)\n",
 312                                al_mc->irq_ue, ret);
 313                        return ret;
 314                }
 315        }
 316
 317        if (al_mc->irq_ce > 0) {
 318                ret = devm_request_irq(&pdev->dev,
 319                                       al_mc->irq_ce,
 320                                       al_mc_edac_irq_handler_ce,
 321                                       IRQF_SHARED,
 322                                       pdev->name,
 323                                       pdev);
 324                if (ret != 0) {
 325                        dev_err(&pdev->dev,
 326                                "failed to request CE IRQ %d (%d)\n",
 327                                al_mc->irq_ce, ret);
 328                        return ret;
 329                }
 330        }
 331
 332        return 0;
 333}
 334
 335static const struct of_device_id al_mc_edac_of_match[] = {
 336        { .compatible = "amazon,al-mc-edac", },
 337        {},
 338};
 339
 340MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
 341
 342static struct platform_driver al_mc_edac_driver = {
 343        .probe = al_mc_edac_probe,
 344        .driver = {
 345                .name = DRV_NAME,
 346                .of_match_table = al_mc_edac_of_match,
 347        },
 348};
 349
 350module_platform_driver(al_mc_edac_driver);
 351
 352MODULE_LICENSE("GPL v2");
 353MODULE_AUTHOR("Talel Shenhar");
 354MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");
 355