[PATCH v4 08/11] spi: aspeed: Calibrate read timings

Chin-Ting Kuo chin-ting_kuo at aspeedtech.com
Thu Mar 31 13:46:51 AEDT 2022


Hi Cédric,

> -----Original Message-----
> From: Cédric Le Goater <clg at kaod.org>
> Sent: Wednesday, March 30, 2022 8:15 PM
> To: Chin-Ting Kuo <chin-ting_kuo at aspeedtech.com>; linux-spi at vger.kernel.org;
> linux-mtd at lists.infradead.org
> Subject: Re: [PATCH v4 08/11] spi: aspeed: Calibrate read timings
> 
> On 3/30/22 13:53, Chin-Ting Kuo wrote:
> > Hi Cédric Le,
> >
> >> -----Original Message-----
> >> From: Cédric Le Goater <clg at kaod.org>
> >> Sent: Friday, March 25, 2022 6:09 PM
> >> To: linux-spi at vger.kernel.org; linux-mtd at lists.infradead.org
> >> Subject: [PATCH v4 08/11] spi: aspeed: Calibrate read timings
> >>
> >> To accommodate the different response time of SPI transfers on
> >> different boards and different SPI NOR devices, the Aspeed
> >> controllers provide a set of Read Timing Compensation registers to
> >> tune the timing delays depending on the frequency being used. The
> >> AST2600 SoC has one of these registers per device. On the AST2500 and
> >> AST2400 SoCs, the timing register is shared by all devices which is
> problematic to get good results other than for one device.
> >>
> >> The algorithm first reads a golden buffer at low speed and then
> >> performs reads with different clocks and delay cycle settings to find
> >> a breaking point. This selects a default good frequency for the CEx control
> register.
> >> The current settings are a bit optimistic as we pick the first delay
> >> giving good results. A safer approach would be to determine an
> >> interval and choose the middle value.
> >>
> >> Calibration is performed when the direct mapping for reads is created.
> >> Since the underlying spi-nor object needs to be initialized to create
> >> the spi_mem operation for direct mapping, we should be fine. Having a
> >> specific API would clarify the requirements though.
> >>
> >> Cc: Pratyush Yadav <p.yadav at ti.com>
> >> Reviewed-by: Joel Stanley <joel at jms.id.au>
> >> Tested-by: Joel Stanley <joel at jms.id.au>
> >> Tested-by: Tao Ren <rentao.bupt at gmail.com>
> >> Signed-off-by: Cédric Le Goater <clg at kaod.org>
> >> ---
> >>   drivers/spi/spi-aspeed-smc.c | 281
> >> +++++++++++++++++++++++++++++++++++
> >>   1 file changed, 281 insertions(+)
> >>
> >> diff --git a/drivers/spi/spi-aspeed-smc.c
> >> b/drivers/spi/spi-aspeed-smc.c index
> >> 7f306da7c44e..660451667a39 100644
> >> --- a/drivers/spi/spi-aspeed-smc.c
> >> +++ b/drivers/spi/spi-aspeed-smc.c
> >> @@ -33,6 +33,8 @@
> >>   #define   CTRL_IO_ADDRESS_4B		BIT(13)	/* AST2400 SPI only
> */
> >>   #define   CTRL_IO_DUMMY_SET(dummy)					\
> >>   	(((((dummy) >> 2) & 0x1) << 14) | (((dummy) & 0x3) << 6))
> >> +#define   CTRL_FREQ_SEL_SHIFT		8
> >> +#define   CTRL_FREQ_SEL_MASK		GENMASK(11,
> >> CTRL_FREQ_SEL_SHIFT)
> >>   #define   CTRL_CE_STOP_ACTIVE		BIT(2)
> >>   #define   CTRL_IO_MODE_CMD_MASK		GENMASK(1, 0)
> >>   #define   CTRL_IO_MODE_NORMAL		0x0
> >> @@ -45,6 +47,9 @@
> >>   /* CEx Address Decoding Range Register */
> >>   #define CE0_SEGMENT_ADDR_REG		0x30
> >>
> >> +/* CEx Read timing compensation register */
> >> +#define CE0_TIMING_COMPENSATION_REG	0x94
> >> +
> >>   enum aspeed_spi_ctl_reg_value {
> >>   	ASPEED_SPI_BASE,
> >>   	ASPEED_SPI_READ,
> >> @@ -70,10 +75,15 @@ struct aspeed_spi_data {
> >>   	bool	hastype;
> >>   	u32	mode_bits;
> >>   	u32	we0;
> >> +	u32	timing;
> >> +	u32	hclk_mask;
> >> +	u32	hdiv_max;
> >>
> >>   	u32 (*segment_start)(struct aspeed_spi *aspi, u32 reg);
> >>   	u32 (*segment_end)(struct aspeed_spi *aspi, u32 reg);
> >>   	u32 (*segment_reg)(struct aspeed_spi *aspi, u32 start, u32 end);
> >> +	int (*calibrate)(struct aspeed_spi_chip *chip, u32 hdiv,
> >> +			 const u8 *golden_buf, u8 *test_buf);
> >>   };
> >>
> >>   #define ASPEED_SPI_MAX_NUM_CS	5
> >> @@ -517,6 +527,8 @@ static int aspeed_spi_chip_adjust_window(struct
> >> aspeed_spi_chip *chip,
> >>   	return 0;
> >>   }
> >>
> >> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip);
> >> +
> >>   static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc
> *desc)  {
> >>   	struct aspeed_spi *aspi =
> >> spi_controller_get_devdata(desc->mem->spi->master);
> >> @@ -565,6 +577,8 @@ static int aspeed_spi_dirmap_create(struct
> >> spi_mem_dirmap_desc *desc)
> >>   	chip->ctl_val[ASPEED_SPI_READ] = ctl_val;
> >>   	writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
> >>
> >> +	ret = aspeed_spi_do_calibration(chip);
> >> +
> >>   	dev_info(aspi->dev, "CE%d read buswidth:%d [0x%08x]\n",
> >>   		 chip->cs, op->data.buswidth,
> chip->ctl_val[ASPEED_SPI_READ]);
> >>
> >> @@ -812,6 +826,249 @@ static u32
> >> aspeed_spi_segment_ast2600_reg(struct
> >> aspeed_spi *aspi,
> >>   		((end - 1) & AST2600_SEG_ADDR_MASK);
> >>   }
> >>
> >> +/*
> >> + * Read timing compensation sequences  */
> >> +
> >> +#define CALIBRATE_BUF_SIZE SZ_16K
> >> +
> >> +static bool aspeed_spi_check_reads(struct aspeed_spi_chip *chip,
> >> +				   const u8 *golden_buf, u8 *test_buf) {
> >> +	int i;
> >> +
> >> +	for (i = 0; i < 10; i++) {
> >> +		memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
> >> +		if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0) { #if
> >> +defined(VERBOSE_DEBUG)
> >> +			print_hex_dump_bytes(DEVICE_NAME "  fail: ",
> >> DUMP_PREFIX_NONE,
> >> +					     test_buf, 0x100);
> >> +#endif
> >> +			return false;
> >> +		}
> >> +	}
> >> +	return true;
> >> +}
> >> +
> >> +#define FREAD_TPASS(i)	(((i) / 2) | (((i) & 1) ? 0 : 8))
> >> +
> >> +/*
> >> + * The timing register is shared by all devices. Only update for CE0.
> >> + */
> >> +static int aspeed_spi_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
> >> +				const u8 *golden_buf, u8 *test_buf) {
> >> +	struct aspeed_spi *aspi = chip->aspi;
> >> +	const struct aspeed_spi_data *data = aspi->data;
> >> +	int i;
> >> +	int good_pass = -1, pass_count = 0;
> >> +	u32 shift = (hdiv - 1) << 2;
> >> +	u32 mask = ~(0xfu << shift);
> >> +	u32 fread_timing_val = 0;
> >> +
> >> +	/* Try HCLK delay 0..5, each one with/without delay and look for a
> >> +	 * good pair.
> >> +	 */
> >> +	for (i = 0; i < 12; i++) {
> >> +		bool pass;
> >> +
> >> +		if (chip->cs == 0) {
> >> +			fread_timing_val &= mask;
> >> +			fread_timing_val |= FREAD_TPASS(i) << shift;
> >> +			writel(fread_timing_val, aspi->regs + data->timing);
> >> +		}
> >> +		pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> >> +		dev_dbg(aspi->dev,
> >> +			"  * [%08x] %d HCLK delay, %dns DI delay : %s",
> >> +			fread_timing_val, i / 2, (i & 1) ? 0 : 4,
> >> +			pass ? "PASS" : "FAIL");
> >> +		if (pass) {
> >> +			pass_count++;
> >> +			if (pass_count == 3) {
> >> +				good_pass = i - 1;
> >> +				break;
> >> +			}
> >> +		} else {
> >> +			pass_count = 0;
> >> +		}
> >> +	}
> >> +
> >> +	/* No good setting for this frequency */
> >> +	if (good_pass < 0)
> >> +		return -1;
> >> +
> >> +	/* We have at least one pass of margin, let's use first pass */
> >> +	if (chip->cs == 0) {
> >> +		fread_timing_val &= mask;
> >> +		fread_timing_val |= FREAD_TPASS(good_pass) << shift;
> >> +		writel(fread_timing_val, aspi->regs + data->timing);
> >> +	}
> >> +	dev_dbg(aspi->dev, " * -> good is pass %d [0x%08x]",
> >> +		good_pass, fread_timing_val);
> >> +	return 0;
> >> +}
> >> +
> >> +static bool aspeed_spi_check_calib_data(const u8 *test_buf, u32 size) {
> >> +	const u32 *tb32 = (const u32 *)test_buf;
> >> +	u32 i, cnt = 0;
> >> +
> >> +	/* We check if we have enough words that are neither all 0
> >> +	 * nor all 1's so the calibration can be considered valid.
> >> +	 *
> >> +	 * I use an arbitrary threshold for now of 64
> >> +	 */
> >> +	size >>= 2;
> >> +	for (i = 0; i < size; i++) {
> >> +		if (tb32[i] != 0 && tb32[i] != 0xffffffff)
> >> +			cnt++;
> >> +	}
> >> +	return cnt >= 64;
> >> +}
> >> +
> >> +static const u32 aspeed_spi_hclk_divs[] = {
> >> +	0xf, /* HCLK */
> >> +	0x7, /* HCLK/2 */
> >> +	0xe, /* HCLK/3 */
> >> +	0x6, /* HCLK/4 */
> >> +	0xd, /* HCLK/5 */
> >> +};
> >> +
> >> +#define ASPEED_SPI_HCLK_DIV(i) \
> >> +	(aspeed_spi_hclk_divs[(i) - 1] << CTRL_FREQ_SEL_SHIFT)
> >> +
> >> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip) {
> >> +	struct aspeed_spi *aspi = chip->aspi;
> >> +	const struct aspeed_spi_data *data = aspi->data;
> >> +	u32 ahb_freq = aspi->clk_freq;
> >> +	u32 max_freq = chip->clk_freq;
> >> +	u32 ctl_val;
> >> +	u8 *golden_buf = NULL;
> >> +	u8 *test_buf = NULL;
> >> +	int i, rc, best_div = -1;
> >> +
> >> +	dev_dbg(aspi->dev, "calculate timing compensation - AHB freq: %d MHz",
> >> +		ahb_freq / 1000000);
> >> +
> >> +	/*
> >> +	 * use the related low frequency to get check calibration data
> >> +	 * and get golden data.
> >> +	 */
> >> +	ctl_val = chip->ctl_val[ASPEED_SPI_READ] & data->hclk_mask;
> >> +	writel(ctl_val, chip->ctl);
> >> +
> >> +	test_buf = kzalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
> >> +	if (!test_buf)
> >> +		return -ENOMEM;
> >> +
> >> +	golden_buf = test_buf + CALIBRATE_BUF_SIZE;
> >> +
> >> +	memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
> >> +	if (!aspeed_spi_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
> >> +		dev_info(aspi->dev, "Calibration area too uniform, using low speed");
> >> +		goto no_calib;
> >> +	}
> >> +
> >> +#if defined(VERBOSE_DEBUG)
> >> +	print_hex_dump_bytes(DEVICE_NAME "  good: ", DUMP_PREFIX_NONE,
> >> +			     golden_buf, 0x100);
> >> +#endif
> >> +
> >> +	/* Now we iterate the HCLK dividers until we find our breaking point */
> >> +	for (i = ARRAY_SIZE(aspeed_spi_hclk_divs); i > data->hdiv_max - 1; i--) {
> >> +		u32 tv, freq;
> >> +
> >> +		freq = ahb_freq / i;
> >> +		if (freq > max_freq)
> >> +			continue;
> >> +
> >> +		/* Set the timing */
> >> +		tv = chip->ctl_val[ASPEED_SPI_READ] | ASPEED_SPI_HCLK_DIV(i);
> >> +		writel(tv, chip->ctl);
> >> +		dev_dbg(aspi->dev, "Trying HCLK/%d [%08x] ...", i, tv);
> >> +		rc = data->calibrate(chip, i, golden_buf, test_buf);
> >> +		if (rc == 0)
> >> +			best_div = i;
> >> +	}
> >> +
> >> +	/* Nothing found ? */
> >> +	if (best_div < 0) {
> >> +		dev_warn(aspi->dev, "No good frequency, using dumb slow");
> >> +	} else {
> >> +		dev_dbg(aspi->dev, "Found good read timings at HCLK/%d",
> >> best_div);
> >> +
> >> +		/* Record the freq */
> >> +		for (i = 0; i < ASPEED_SPI_MAX; i++)
> >> +			chip->ctl_val[i] = (chip->ctl_val[i] & data->hclk_mask) |
> >> +				ASPEED_SPI_HCLK_DIV(best_div);
> >> +	}
> >> +
> >> +no_calib:
> >
> > - Maybe, if the calibration process is not executed, the frequency setting
> calculated from max_frequency in the device tree can be filled in FMC10
> instead of using dumb slow one, 12.5MHz, always.
> 
> Indeed.
> 
> >   For example, except for uniform content case, the calibration process will
> be ignored when SPI clock frequency in the device tree is smaller than 40MHz.
> > - The function, aspeed_2600_spi_clk_basic_setting, in [2] can be added to
> support lower SPI clock frequency, e.g., 4MHz.
> >   For AST2600, SPI clock frequency can be calculated by
> HCLK/(FMC10[27:24] + FMC10[11:8]).
> 
> Could you please send patches on top of this series ? Here are the branches :
> 

Of course. How do I provide you the patch? By private mail or send a PR?
Besides, I may add a new callback function for this part due to difference between AST2500 and AST2600.

Thanks.

Chin-Ting

>    https://github.com/legoater/linux/commits/openbmc-5.15
>    https://github.com/legoater/linux/commits/aspeed         (mainline)
> 
> I can include them when I resend a v5.
> 
> Thanks,
> 
> C.
> 
> 
> >
> >> +	writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
> >> +	kfree(test_buf);
> >> +	return 0;
> >> +}
> >> +
> >> +#define TIMING_DELAY_DI		BIT(3)
> >> +#define TIMING_DELAY_HCYCLE_MAX	5
> >> +#define TIMING_REG_AST2600(chip)				\
> >> +	((chip)->aspi->regs + (chip)->aspi->data->timing +	\
> >> +	 (chip)->cs * 4)
> >> +
> >> +static int aspeed_spi_ast2600_calibrate(struct aspeed_spi_chip
> >> +*chip, u32
> >> hdiv,
> >> +					const u8 *golden_buf, u8 *test_buf) {
> >> +	struct aspeed_spi *aspi = chip->aspi;
> >> +	int hcycle;
> >> +	u32 shift = (hdiv - 2) << 3;
> >> +	u32 mask = ~(0xfu << shift);
> >> +	u32 fread_timing_val = 0;
> >> +
> >> +	for (hcycle = 0; hcycle <= TIMING_DELAY_HCYCLE_MAX; hcycle++) {
> >> +		int delay_ns;
> >> +		bool pass = false;
> >> +
> >> +		fread_timing_val &= mask;
> >> +		fread_timing_val |= hcycle << shift;
> >> +
> >> +		/* no DI input delay first  */
> >> +		writel(fread_timing_val, TIMING_REG_AST2600(chip));
> >> +		pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> >> +		dev_dbg(aspi->dev,
> >> +			"  * [%08x] %d HCLK delay, DI delay none : %s",
> >> +			fread_timing_val, hcycle, pass ? "PASS" : "FAIL");
> >> +		if (pass)
> >> +			return 0;
> >> +
> >> +		/* Add DI input delays  */
> >> +		fread_timing_val &= mask;
> >> +		fread_timing_val |= (TIMING_DELAY_DI | hcycle) << shift;
> >> +
> >> +		for (delay_ns = 0; delay_ns < 0x10; delay_ns++) {
> >> +			fread_timing_val &= ~(0xf << (4 + shift));
> >> +			fread_timing_val |= delay_ns << (4 + shift);
> >> +
> >> +			writel(fread_timing_val, TIMING_REG_AST2600(chip));
> >> +			pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> >> +			dev_dbg(aspi->dev,
> >> +				"  * [%08x] %d HCLK delay, DI delay %d.%dns : %s",
> >> +				fread_timing_val, hcycle, (delay_ns + 1) / 2,
> >> +				(delay_ns + 1) & 1 ? 5 : 5, pass ? "PASS" : "FAIL");
> >> +			/*
> >> +			 * TODO: This is optimistic. We should look
> >> +			 * for a working interval and save the middle
> >> +			 * value in the read timing register.
> >> +			 */
> >> +			if (pass)
> >> +				return 0;
> >> +		}
> >> +	}
> >> +
> >> +	/* No good setting for this frequency */
> >> +	return -1;
> >> +}
> >> +
> >>   /*
> >>    * Platform definitions
> >>    */
> >> @@ -820,6 +1077,10 @@ static const struct aspeed_spi_data
> >> ast2400_fmc_data = {
> >>   	.hastype       = true,
> >>   	.we0	       = 16,
> >>   	.ctl0	       = CE0_CTRL_REG,
> >> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
> >> +	.hclk_mask     = 0xfffff0ff,
> >> +	.hdiv_max      = 1,
> >> +	.calibrate     = aspeed_spi_calibrate,
> >>   	.segment_start = aspeed_spi_segment_start,
> >>   	.segment_end   = aspeed_spi_segment_end,
> >>   	.segment_reg   = aspeed_spi_segment_reg,
> >> @@ -830,6 +1091,10 @@ static const struct aspeed_spi_data
> >> ast2400_spi_data = {
> >>   	.hastype       = false,
> >>   	.we0	       = 0,
> >>   	.ctl0	       = 0x04,
> >> +	.timing	       = 0x14,
> >> +	.hclk_mask     = 0xfffff0ff,
> >> +	.hdiv_max      = 1,
> >> +	.calibrate     = aspeed_spi_calibrate,
> >>   	/* No segment registers */
> >>   };
> >>
> >> @@ -838,6 +1103,10 @@ static const struct aspeed_spi_data
> >> ast2500_fmc_data = {
> >>   	.hastype       = true,
> >>   	.we0	       = 16,
> >>   	.ctl0	       = CE0_CTRL_REG,
> >> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
> >> +	.hclk_mask     = 0xfffff0ff,
> >> +	.hdiv_max      = 1,
> >> +	.calibrate     = aspeed_spi_calibrate,
> >>   	.segment_start = aspeed_spi_segment_start,
> >>   	.segment_end   = aspeed_spi_segment_end,
> >>   	.segment_reg   = aspeed_spi_segment_reg,
> >> @@ -848,6 +1117,10 @@ static const struct aspeed_spi_data
> >> ast2500_spi_data = {
> >>   	.hastype       = false,
> >>   	.we0	       = 16,
> >>   	.ctl0	       = CE0_CTRL_REG,
> >> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
> >> +	.hclk_mask     = 0xfffff0ff,
> >> +	.hdiv_max      = 1,
> >> +	.calibrate     = aspeed_spi_calibrate,
> >>   	.segment_start = aspeed_spi_segment_start,
> >>   	.segment_end   = aspeed_spi_segment_end,
> >>   	.segment_reg   = aspeed_spi_segment_reg,
> >> @@ -859,6 +1132,10 @@ static const struct aspeed_spi_data
> >> ast2600_fmc_data = {
> >>   	.mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
> >>   	.we0	       = 16,
> >>   	.ctl0	       = CE0_CTRL_REG,
> >> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
> >> +	.hclk_mask     = 0xf0fff0ff,
> >> +	.hdiv_max      = 2,
> >> +	.calibrate     = aspeed_spi_ast2600_calibrate,
> >>   	.segment_start = aspeed_spi_segment_ast2600_start,
> >>   	.segment_end   = aspeed_spi_segment_ast2600_end,
> >>   	.segment_reg   = aspeed_spi_segment_ast2600_reg,
> >> @@ -870,6 +1147,10 @@ static const struct aspeed_spi_data
> >> ast2600_spi_data = {
> >>   	.mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
> >>   	.we0	       = 16,
> >>   	.ctl0	       = CE0_CTRL_REG,
> >> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
> >> +	.hclk_mask     = 0xf0fff0ff,
> >> +	.hdiv_max      = 2,
> >> +	.calibrate     = aspeed_spi_ast2600_calibrate,
> >>   	.segment_start = aspeed_spi_segment_ast2600_start,
> >>   	.segment_end   = aspeed_spi_segment_ast2600_end,
> >>   	.segment_reg   = aspeed_spi_segment_ast2600_reg,
> >> --
> >> 2.34.1
> >
> >
> > Best Wishes,
> > Chin-Ting



More information about the Linux-aspeed mailing list