[PATCH dev-5.2 2/2] i2c: aspeed: add slave inactive timeout support
Tao Ren
taoren at fb.com
Fri Sep 6 11:16:55 AEST 2019
Hi Jae,
On 9/5/19 4:35 PM, Jae Hyun Yoo wrote:
> On 9/5/2019 4:19 PM, Tao Ren wrote:
>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
>>> Hi Tao,
>>>
>>> On 9/5/2019 3:28 PM, Tao Ren wrote:
>>>> Hi Jae,
>>>>
>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>>>> In case of multi-master environment, if a peer master incorrectly handles
>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>>>> and it can't escape from the slave state, so this commit adds slave
>>>>> inactive timeout support to recover the bus in the case.
>>>>>
>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo at linux.intel.com>
>>>>
>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>>>
>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
>>>
>>> Basic concept of this implementation is setting the slave state of
>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a
>>> master xfer happens after that, it will try bus recovery
>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
>>> if needed.
>>>
>>> If this patch doesn't work in your case, test it again after adding
>>> one line code into this driver. See below.
>>
>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:
>>
>> 1) we don't know when userspace starts next master transfer.
>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).
>>
>
> Oh, so to support the case as well, probably we need to add a flag for
> indicating recovery needs when a master xfer comes then it could
> forcibly recover and reset the bus even if the bus is idle. Can you
> please test that with making code changes? Unfortunately, I can't
> reproduce the case in my system.
Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed?
>>>
>>>>
>>>>> ---
>>>>> drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>>>> 1 file changed, 73 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>>>>> index 89317929bee4..92e1a249b393 100644
>>>>> --- a/drivers/i2c/busses/i2c-aspeed.c
>>>>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>>>>> @@ -70,10 +70,14 @@
>>>>> #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16)
>>>>> #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12
>>>>> #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12)
>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8
>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8)
>>>>> #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0)
>>>>> #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0)
>>>>> +
>>>>> /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>>>>> -#define ASPEED_NO_TIMEOUT_CTRL 0
>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0
>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0)
>>>>> /* 0x0c : I2CD Interrupt Control Register &
>>>>> * 0x10 : I2CD Interrupt Status Register
>>>>> @@ -81,6 +85,7 @@
>>>>> * These share bit definitions, so use the same values for the enable &
>>>>> * status bits.
>>>>> */
>>>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15)
>>>>> #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14)
>>>>> #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13)
>>>>> #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7)
>>>>> @@ -96,8 +101,11 @@
>>>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \
>>>>> ASPEED_I2CD_INTR_ABNORMAL | \
>>>>> ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \
>>>>> + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>>>> #define ASPEED_I2CD_INTR_ALL \
>>>>> - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \
>>>>> + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \
>>>>> + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \
>>>>> ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \
>>>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \
>>>>> ASPEED_I2CD_INTR_ABNORMAL | \
>>>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>>>> u32 divisor);
>>>>> unsigned long parent_clk_frequency;
>>>>> u32 bus_frequency;
>>>>> + u32 hw_timeout_ms;
>>>>> /* Transaction state. */
>>>>> enum aspeed_i2c_master_state master_state;
>>>>> struct i2c_msg *msgs;
>>>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>>>> }
>>>>> #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>>>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>>>>> +{
>>>>> + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>>>>> + return -EIO;
>>>>> +
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>> {
>>>>> u32 command, irq_handled = 0;
>>>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>> if (!slave)
>>>>> return 0;
>>>>> + if (aspeed_i2c_check_slave_error(irq_status)) {
>>>>> + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>>>>> + irq_status);
>>>>> + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>>>>> + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
>>>
>>> aspeed_i2c_reset(bus);
>>>
>>> I didn't add it in this patch because I wanted to avoid calling of this
>>> reset function from interrupt context but give it a try.
>>>
>>> Thanks,
>>> Jae
>>
>> I believe this will solve my problem, but let me test it and will share you results later.
>
> Please share this result too. It would be useful to complete making this
> patch.
>
> Thanks,
> Jae
Adding aspeed_i2c_reset(bus) does fix my problem. Thank you Jae.
Cheers,
Tao
>>>>> + return irq_handled;
>>>>> + }
>>>>> +
>>>>> command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>>>> /* Slave was requested, restart state machine. */
>>>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>>>> }
>>>>> }
>>>>> -static int aspeed_i2c_is_irq_error(u32 irq_status)
>>>>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>>>> {
>>>>> if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>>> return -EAGAIN;
>>>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>> * should clear the command queue effectively taking us back to the
>>>>> * INACTIVE state.
>>>>> */
>>>>> - ret = aspeed_i2c_is_irq_error(irq_status);
>>>>> + ret = aspeed_i2c_check_master_error(irq_status);
>>>>> if (ret) {
>>>>> - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>>>>> + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>>>> irq_status);
>>>>> irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>>>> if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>>>> /* precondition: bus.lock has been acquired. */
>>>>> static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>> {
>>>>> + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>>>> u32 divisor, clk_reg_val;
>>>>> divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>>>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>> ASPEED_I2CD_TIME_THDSTA_MASK |
>>>>> ASPEED_I2CD_TIME_TACST_MASK);
>>>>> clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>>>>> +
>>>>> + if (bus->hw_timeout_ms) {
>>>>> + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>>>>> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>>>>> + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>>>>> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>>>>> +
>>>>> + timeout_base_divisor = 0;
>>>>> +
>>>>> + do {
>>>>> + timeout_tick_us = 1000 * (16384 <<
>>>>> + (timeout_base_divisor << 1)) /
>>>>> + (bus->parent_clk_frequency / 1000);
>>>>> +
>>>>> + if (timeout_base_divisor == div_max ||
>>>>> + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>>>>> + bus->hw_timeout_ms * 1000)
>>>>> + break;
>>>>> + } while (timeout_base_divisor++ < div_max);
>>>>> +
>>>>> + if (timeout_tick_us) {
>>>>> + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>>>>> + timeout_tick_us);
>>>>> + if (timeout_cycles == 0)
>>>>> + timeout_cycles = 1;
>>>>> + else if (timeout_cycles > cycles_max)
>>>>> + timeout_cycles = cycles_max;
>>>>> + } else {
>>>>> + timeout_cycles = 0;
>>>>> + }
>>>>> + } else {
>>>>> + timeout_base_divisor = 0;
>>>>> + timeout_cycles = 0;
>>>>> + }
>>>>> +
>>>>> + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>>>>> + timeout_base_divisor);
>>>>> +
>>>>> writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>>>>> - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>>> + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>>> return 0;
>>>>> }
>>>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>>>> }
>>>>> }
>>>>> + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>>>>> + &bus->hw_timeout_ms);
>>>>> +
>>>>> /* Initialize the I2C adapter */
>>>>> spin_lock_init(&bus->lock);
>>>>> init_completion(&bus->cmd_complete);
>>>>>
More information about the openbmc
mailing list