[PATCH dev-5.2 2/2] i2c: aspeed: add slave inactive timeout support

Tao Ren taoren at fb.com
Fri Sep 6 09:19:16 AEST 2019


On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
> Hi Tao,
> 
> On 9/5/2019 3:28 PM, Tao Ren wrote:
>> Hi Jae,
>>
>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>> In case of multi-master environment, if a peer master incorrectly handles
>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>> and it can't escape from the slave state, so this commit adds slave
>>> inactive timeout support to recover the bus in the case.
>>>
>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo at linux.intel.com>
>>
>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>
>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
> 
> Basic concept of this implementation is setting the slave state of
> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
> aspeed_i2c_reset() directly from interrupt context. Instead, when a
> master xfer happens after that, it will try bus recovery
> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
> if needed.
> 
> If this patch doesn't work in your case, test it again after adding
> one line code into this driver. See below.

If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:

1) we don't know when userspace starts next master transfer.
2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).


Thanks,

Tao

> 
>>
>>> ---
>>>   drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>>   1 file changed, 73 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>>> index 89317929bee4..92e1a249b393 100644
>>> --- a/drivers/i2c/busses/i2c-aspeed.c
>>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>>> @@ -70,10 +70,14 @@
>>>   #define ASPEED_I2CD_TIME_SCL_HIGH_MASK            GENMASK(19, 16)
>>>   #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT            12
>>>   #define ASPEED_I2CD_TIME_SCL_LOW_MASK            GENMASK(15, 12)
>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT    8
>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK    GENMASK(9, 8)
>>>   #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK        GENMASK(3, 0)
>>>   #define ASPEED_I2CD_TIME_SCL_REG_MAX            GENMASK(3, 0)
>>> +
>>>   /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>>> -#define ASPEED_NO_TIMEOUT_CTRL                0
>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT        0
>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK            GENMASK(4, 0)
>>>     /* 0x0c : I2CD Interrupt Control Register &
>>>    * 0x10 : I2CD Interrupt Status Register
>>> @@ -81,6 +85,7 @@
>>>    * These share bit definitions, so use the same values for the enable &
>>>    * status bits.
>>>    */
>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT        BIT(15)
>>>   #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT            BIT(14)
>>>   #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE        BIT(13)
>>>   #define ASPEED_I2CD_INTR_SLAVE_MATCH            BIT(7)
>>> @@ -96,8 +101,11 @@
>>>            ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>            ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>            ASPEED_I2CD_INTR_ARBIT_LOSS)
>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS                           \
>>> +        ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>>   #define ASPEED_I2CD_INTR_ALL                               \
>>> -        (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>> +        (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |               \
>>> +         ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>            ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                   \
>>>            ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>            ASPEED_I2CD_INTR_ABNORMAL |                       \
>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>>                                  u32 divisor);
>>>       unsigned long            parent_clk_frequency;
>>>       u32                bus_frequency;
>>> +    u32                hw_timeout_ms;
>>>       /* Transaction state. */
>>>       enum aspeed_i2c_master_state    master_state;
>>>       struct i2c_msg            *msgs;
>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>>   }
>>>     #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>>> +{
>>> +    if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>>> +        return -EIO;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>   {
>>>       u32 command, irq_handled = 0;
>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>       if (!slave)
>>>           return 0;
>>>   +    if (aspeed_i2c_check_slave_error(irq_status)) {
>>> +        dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>>> +            irq_status);
>>> +        irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>>> +        bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
> 
>                 aspeed_i2c_reset(bus);
> 
> I didn't add it in this patch because I wanted to avoid calling of this
> reset function from interrupt context but give it a try.
> 
> Thanks,
> Jae

I believe this will solve my problem, but let me test it and will share you results later.

>>> +        return irq_handled;
>>> +    }
>>> +
>>>       command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>>         /* Slave was requested, restart state machine. */
>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>>       }
>>>   }
>>>   -static int aspeed_i2c_is_irq_error(u32 irq_status)
>>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>>   {
>>>       if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>           return -EAGAIN;
>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>        * should clear the command queue effectively taking us back to the
>>>        * INACTIVE state.
>>>        */
>>> -    ret = aspeed_i2c_is_irq_error(irq_status);
>>> +    ret = aspeed_i2c_check_master_error(irq_status);
>>>       if (ret) {
>>> -        dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>>> +        dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>>               irq_status);
>>>           irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>>           if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>>   /* precondition: bus.lock has been acquired. */
>>>   static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>   {
>>> +    u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>>       u32 divisor, clk_reg_val;
>>>         divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>               ASPEED_I2CD_TIME_THDSTA_MASK |
>>>               ASPEED_I2CD_TIME_TACST_MASK);
>>>       clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>>> +
>>> +    if (bus->hw_timeout_ms) {
>>> +        u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>>> +                 ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>>> +        u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>>> +                ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>>> +
>>> +        timeout_base_divisor = 0;
>>> +
>>> +        do {
>>> +            timeout_tick_us = 1000 * (16384 <<
>>> +                          (timeout_base_divisor << 1)) /
>>> +                      (bus->parent_clk_frequency / 1000);
>>> +
>>> +            if (timeout_base_divisor == div_max ||
>>> +                timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>>> +                bus->hw_timeout_ms * 1000)
>>> +                break;
>>> +        } while (timeout_base_divisor++ < div_max);
>>> +
>>> +        if (timeout_tick_us) {
>>> +            timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>>> +                              timeout_tick_us);
>>> +            if (timeout_cycles == 0)
>>> +                timeout_cycles = 1;
>>> +            else if (timeout_cycles > cycles_max)
>>> +                timeout_cycles = cycles_max;
>>> +        } else {
>>> +            timeout_cycles = 0;
>>> +        }
>>> +    } else {
>>> +        timeout_base_divisor = 0;
>>> +        timeout_cycles = 0;
>>> +    }
>>> +
>>> +    clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>>> +                  timeout_base_divisor);
>>> +
>>>       writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>>> -    writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>> +    writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>         return 0;
>>>   }
>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>>           }
>>>       }
>>>   +    device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>>> +                 &bus->hw_timeout_ms);
>>> +
>>>       /* Initialize the I2C adapter */
>>>       spin_lock_init(&bus->lock);
>>>       init_completion(&bus->cmd_complete);
>>>


More information about the openbmc mailing list