[PATCH dev-5.2 2/2] i2c: aspeed: add slave inactive timeout support
Tao Ren
taoren at fb.com
Fri Sep 6 09:19:16 AEST 2019
On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
> Hi Tao,
>
> On 9/5/2019 3:28 PM, Tao Ren wrote:
>> Hi Jae,
>>
>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>> In case of multi-master environment, if a peer master incorrectly handles
>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>> and it can't escape from the slave state, so this commit adds slave
>>> inactive timeout support to recover the bus in the case.
>>>
>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo at linux.intel.com>
>>
>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>
>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
>
> Basic concept of this implementation is setting the slave state of
> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
> aspeed_i2c_reset() directly from interrupt context. Instead, when a
> master xfer happens after that, it will try bus recovery
> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
> if needed.
>
> If this patch doesn't work in your case, test it again after adding
> one line code into this driver. See below.
If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:
1) we don't know when userspace starts next master transfer.
2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).
Thanks,
Tao
>
>>
>>> ---
>>> drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>> 1 file changed, 73 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>>> index 89317929bee4..92e1a249b393 100644
>>> --- a/drivers/i2c/busses/i2c-aspeed.c
>>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>>> @@ -70,10 +70,14 @@
>>> #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16)
>>> #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12
>>> #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12)
>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8
>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8)
>>> #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0)
>>> #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0)
>>> +
>>> /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>>> -#define ASPEED_NO_TIMEOUT_CTRL 0
>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0
>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0)
>>> /* 0x0c : I2CD Interrupt Control Register &
>>> * 0x10 : I2CD Interrupt Status Register
>>> @@ -81,6 +85,7 @@
>>> * These share bit definitions, so use the same values for the enable &
>>> * status bits.
>>> */
>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15)
>>> #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14)
>>> #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13)
>>> #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7)
>>> @@ -96,8 +101,11 @@
>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \
>>> ASPEED_I2CD_INTR_ABNORMAL | \
>>> ASPEED_I2CD_INTR_ARBIT_LOSS)
>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \
>>> + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>> #define ASPEED_I2CD_INTR_ALL \
>>> - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \
>>> + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \
>>> + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \
>>> ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \
>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \
>>> ASPEED_I2CD_INTR_ABNORMAL | \
>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>> u32 divisor);
>>> unsigned long parent_clk_frequency;
>>> u32 bus_frequency;
>>> + u32 hw_timeout_ms;
>>> /* Transaction state. */
>>> enum aspeed_i2c_master_state master_state;
>>> struct i2c_msg *msgs;
>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>> }
>>> #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>>> +{
>>> + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>>> + return -EIO;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>> {
>>> u32 command, irq_handled = 0;
>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>> if (!slave)
>>> return 0;
>>> + if (aspeed_i2c_check_slave_error(irq_status)) {
>>> + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>>> + irq_status);
>>> + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>>> + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
>
> aspeed_i2c_reset(bus);
>
> I didn't add it in this patch because I wanted to avoid calling of this
> reset function from interrupt context but give it a try.
>
> Thanks,
> Jae
I believe this will solve my problem, but let me test it and will share you results later.
>>> + return irq_handled;
>>> + }
>>> +
>>> command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>> /* Slave was requested, restart state machine. */
>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>> }
>>> }
>>> -static int aspeed_i2c_is_irq_error(u32 irq_status)
>>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>> {
>>> if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>> return -EAGAIN;
>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>> * should clear the command queue effectively taking us back to the
>>> * INACTIVE state.
>>> */
>>> - ret = aspeed_i2c_is_irq_error(irq_status);
>>> + ret = aspeed_i2c_check_master_error(irq_status);
>>> if (ret) {
>>> - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>>> + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>> irq_status);
>>> irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>> if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>> /* precondition: bus.lock has been acquired. */
>>> static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>> {
>>> + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>> u32 divisor, clk_reg_val;
>>> divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>> ASPEED_I2CD_TIME_THDSTA_MASK |
>>> ASPEED_I2CD_TIME_TACST_MASK);
>>> clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>>> +
>>> + if (bus->hw_timeout_ms) {
>>> + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>>> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>>> + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>>> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>>> +
>>> + timeout_base_divisor = 0;
>>> +
>>> + do {
>>> + timeout_tick_us = 1000 * (16384 <<
>>> + (timeout_base_divisor << 1)) /
>>> + (bus->parent_clk_frequency / 1000);
>>> +
>>> + if (timeout_base_divisor == div_max ||
>>> + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>>> + bus->hw_timeout_ms * 1000)
>>> + break;
>>> + } while (timeout_base_divisor++ < div_max);
>>> +
>>> + if (timeout_tick_us) {
>>> + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>>> + timeout_tick_us);
>>> + if (timeout_cycles == 0)
>>> + timeout_cycles = 1;
>>> + else if (timeout_cycles > cycles_max)
>>> + timeout_cycles = cycles_max;
>>> + } else {
>>> + timeout_cycles = 0;
>>> + }
>>> + } else {
>>> + timeout_base_divisor = 0;
>>> + timeout_cycles = 0;
>>> + }
>>> +
>>> + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>>> + timeout_base_divisor);
>>> +
>>> writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>>> - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>> + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>> return 0;
>>> }
>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>> }
>>> }
>>> + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>>> + &bus->hw_timeout_ms);
>>> +
>>> /* Initialize the I2C adapter */
>>> spin_lock_init(&bus->lock);
>>> init_completion(&bus->cmd_complete);
>>>
More information about the openbmc
mailing list