[PATCH v3] ARM: l2x0: Add OF based initialization

Rob Herring robherring2 at gmail.com
Wed Jul 6 05:08:21 EST 2011


Grant,

On 07/04/2011 10:55 PM, Grant Likely wrote:
> On Mon, Jul 04, 2011 at 03:15:56PM -0500, Rob Herring wrote:
>> From: Rob Herring <rob.herring at calxeda.com>
>>
>> This adds probing for ARM L2x0 cache controllers via device tree. Support
>> includes the L210, L220, and PL310 controllers. The binding allows setting
>> up cache RAM latencies and filter addresses (PL310 only).
>>
>> Signed-off-by: Rob Herring <rob.herring at calxeda.com>
>> ---
>> I've tested this version and fixed some issues from the one I sent to the
>> CSR platform thread.
>>
>> Changes in v3:
>> - Allow platforms to set aux ctrl reg with aux_value and aux_mask.
>> - Add RAM latency and filter address bindings based on CSR's platform needs.
>>
>>  Documentation/devicetree/bindings/arm/l2cc.txt |   40 ++++++++
>>  arch/arm/include/asm/hardware/cache-l2x0.h     |   17 ++++
>>  arch/arm/mm/cache-l2x0.c                       |  120 ++++++++++++++++++++++++
>>  3 files changed, 177 insertions(+), 0 deletions(-)
>>  create mode 100644 Documentation/devicetree/bindings/arm/l2cc.txt
>>
>> diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt
>> new file mode 100644
>> index 0000000..79e66fb
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/arm/l2cc.txt
>> @@ -0,0 +1,40 @@
>> +* ARM L2 Cache Controller
>> +
>> +ARM cores often have a separate level 2 cache controller. There are various
>> +implementations of the L2 cache controller with compatible programming models.
>> +The ARM L2 cache representation in the device tree should be done as under:-
> 
> Damaged sentence?
> 
>> +
>> +Required properties:
>> +
>> +- compatible : should be one of
>> +	"arm,pl310-cache"
>> +	"arm,l220-cache"
>> +	"arm,l210-cache"
>> +- cache-unified : Specifies the cache is a unified cache.
>> +- cache-level : Should be set to 2 for a level 2 cache.
>> +- reg : Physical base address and size of cache controller's memory mapped
>> +  registers.
>> +
>> +Optional properties:
>> +
>> +- data-latency : Cycles of latency for Data RAM accesses. Specifies 3 cells of
>> +  read, write and setup latencies. Controllers without setup latency control
>> +  should use 0.
>> +- tag-latency : Cycles of latency for Tag RAM accesses. Specifies 3 cells of
>> +  read, write and setup latencies. Controllers without setup latency control
>> +  should use 0.
>> +- dirty-latency : Cycles of latency for reads of Dirty RAMs. This is a single
>> +  cell.t
>> +- filter-ranges : <start end> Address range the  
> 
> Incomplete sentence?
> 
> Typically address ranges in the DT are <start size> pairs.  Does the
> filter-ranges property deviate from this?
> 
> Personally, I'd suggest prefixing these custom properties with "arm,"
> to avoid any potential namespace conflict.
> 
>> +
>> +Example:
>> +
>> +L2: l2-cache {
>> +        compatible = "arm,pl310-cache", "cache";
> 
> Drop "cache".  It isn't useful.
> 
>> +        reg = <0xfff12000 0x1000>;
>> +        data-latency = <1 1 1>;
>> +        tag-latency = <2 2 2>;
>> +        cache-unified;
>> +        cache-level = <2>;
>> +};
>> +
>> diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
>> index 16bd480..8fe149f 100644
>> --- a/arch/arm/include/asm/hardware/cache-l2x0.h
>> +++ b/arch/arm/include/asm/hardware/cache-l2x0.h
>> @@ -47,6 +47,8 @@
>>  #define L2X0_CLEAN_INV_WAY		0x7FC
>>  #define L2X0_LOCKDOWN_WAY_D		0x900
>>  #define L2X0_LOCKDOWN_WAY_I		0x904
>> +#define L2X0_ADDR_FILTER_START		0xC00
>> +#define L2X0_ADDR_FILTER_END		0xC04
>>  #define L2X0_TEST_OPERATION		0xF00
>>  #define L2X0_LINE_DATA			0xF10
>>  #define L2X0_LINE_TAG			0xF30
>> @@ -62,6 +64,14 @@
>>  #define L2X0_CACHE_ID_PART_L310		(3 << 6)
>>  
>>  #define L2X0_AUX_CTRL_MASK			0xc0000fff
>> +#define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT	0
>> +#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK	0x7
>> +#define L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT	3
>> +#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK	(0x7 << 3)
>> +#define L2X0_AUX_CTRL_TAG_LATENCY_SHIFT		6
>> +#define L2X0_AUX_CTRL_TAG_LATENCY_MASK		(0x7 << 6)
>> +#define L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT	9
>> +#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK	(0x7 << 9)
>>  #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT	16
>>  #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT		17
>>  #define L2X0_AUX_CTRL_WAY_SIZE_MASK		(0x3 << 17)
>> @@ -72,8 +82,15 @@
>>  #define L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT	29
>>  #define L2X0_AUX_CTRL_EARLY_BRESP_SHIFT		30
>>  
>> +#define L2X0_LATENCY_CTRL_SETUP_SHIFT	0
>> +#define L2X0_LATENCY_CTRL_RD_SHIFT	4
>> +#define L2X0_LATENCY_CTRL_WR_SHIFT	8
>> +
>> +#define L2X0_ADDR_FILTER_EN		1
>> +
>>  #ifndef __ASSEMBLY__
>>  extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask);
>> +extern int l2x0_of_init(__u32 aux_val, __u32 aux_mask);
>>  #endif
>>  
>>  #endif
>> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
>> index ef59099..649be84 100644
>> --- a/arch/arm/mm/cache-l2x0.c
>> +++ b/arch/arm/mm/cache-l2x0.c
>> @@ -16,9 +16,12 @@
>>   * along with this program; if not, write to the Free Software
>>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>>   */
>> +#include <linux/err.h>
>>  #include <linux/init.h>
>>  #include <linux/spinlock.h>
>>  #include <linux/io.h>
>> +#include <linux/of.h>
>> +#include <linux/of_address.h>
>>  
>>  #include <asm/cacheflush.h>
>>  #include <asm/hardware/cache-l2x0.h>
>> @@ -344,3 +347,120 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
>>  	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
>>  			ways, cache_id, aux, l2x0_size);
>>  }
>> +
>> +#ifdef CONFIG_OF
>> +static const struct of_device_id l2x0_ids[] __initconst = {
>> +	{ .compatible = "arm,pl310-cache" },
>> +	{ .compatible = "arm,l220-cache" },
>> +	{ .compatible = "arm,l210-cache" },
>> +	{}
>> +};
>> +
>> +static void __init l2x0_of_set_address_filter(const struct device_node *np)
>> +{
>> +	u32 start, end;
>> +	const u32 *prop;
>> +	int len;
>> +	int is_pl310 = of_device_is_compatible(np, "arm,pl310-cache");
>> +
>> +	if (!is_pl310 || (readl_relaxed(l2x0_base + L2X0_CTRL) & 1))
>> +		return;
>> +
>> +	prop = of_get_property(np, "filter-ranges", &len);
>> +	if (!prop || (len != (2 * sizeof(prop))))
>> +		return;
>> +
>> +	start = be32_to_cpup(prop++) | L2X0_ADDR_FILTER_EN;
>> +	end = be32_to_cpup(prop++);
>> +	writel_relaxed(end, l2x0_base + L2X0_ADDR_FILTER_END);
>> +	writel_relaxed(start, l2x0_base + L2X0_ADDR_FILTER_START);
>> +}
>> +
>> +static void __init l2x0_of_set_ram_timings(const struct device_node *np,
>> +					  __u32 *aux_val, __u32 *aux_mask)
>> +{
>> +	u32 data_rd = 0, data_wr = 0, data_setup = 0;
>> +	u32 tag_rd = 0, tag_wr = 0, tag_setup = 0;
>> +	u32 dirty = 0;
>> +	const u32 *prop;
> 
> const __be32 *prop;
> 
>> +	int len;
>> +	int is_pl310 = of_device_is_compatible(np, "arm,pl310-cache");
>> +
>> +	if (readl_relaxed(l2x0_base + L2X0_CTRL) & 1)
>> +		return;
>> +
>> +	prop = of_get_property(np, "data-latency", &len);
>> +	if (prop && (len == (3 * sizeof(prop)))) {
>> +		data_rd = be32_to_cpup(prop++);
>> +		data_wr = be32_to_cpup(prop++);
>> +		data_setup = be32_to_cpup(prop);
>> +	}
> 
> I wonder if it would be useful to have an of_property_read_u32array() helper?
> 
>> +
>> +	prop = of_get_property(np, "tag-latency", &len);
>> +	if (prop && (len == (3 * sizeof(prop)))) {
>> +		tag_rd = be32_to_cpup(prop++);
>> +		tag_wr = be32_to_cpup(prop++);
>> +		tag_setup = be32_to_cpup(prop);
>> +	}
>> +
>> +	prop = of_get_property(np, "dirty-latency", &len);
>> +	if (prop && (len == sizeof(prop)))
>> +		dirty = be32_to_cpup(prop);
> 
> of_property_read_u32()
> 
>> +
>> +	if (is_pl310 && tag_wr && tag_rd && tag_setup)
>> +		writel_relaxed(
>> +			(--tag_wr << L2X0_LATENCY_CTRL_WR_SHIFT) |
>> +			(--tag_rd << L2X0_LATENCY_CTRL_RD_SHIFT) |
>> +			(--tag_setup << L2X0_LATENCY_CTRL_SETUP_SHIFT),
> 
> tag_wr, tag_rd and tag_setup are only used once, so the self decrement
> is confusing.  I'd rather see simply '(tag_rw - 1) << ...'
> 
> 
>> +			l2x0_base + L2X0_TAG_LATENCY_CTRL);
>> +
>> +	if (is_pl310 && data_wr && data_rd && data_setup)
>> +		writel_relaxed(
>> +			(--data_wr << L2X0_LATENCY_CTRL_WR_SHIFT) |
>> +			(--data_rd << L2X0_LATENCY_CTRL_RD_SHIFT) |
>> +			(--data_setup << L2X0_LATENCY_CTRL_SETUP_SHIFT),
>> +			l2x0_base + L2X0_TAG_LATENCY_CTRL);
> 
> Hmmm, there are 2 sets of if() blocks here.  One for is_pl310, and one
> for !is_pl310.  Instead of testing is_pl310 over and over, it would
> make more sense to me to do:
> 
> 	if (is_pl310) {
> 		if (tag_wr && tag_rd && tag_setup)
> 			...
> 		if (data_wr && data_wr && data_setup)
> 			...
> 	} else {
> 		if (tag_rd)
> 			...
> 		if (data_rd)
> 			...
> 		...
> 	}

It was really just to avoid another level of indentation.

>> +
>> +	if (!is_pl310 && tag_rd) {
>> +		*aux_val &= ~L2X0_AUX_CTRL_TAG_LATENCY_MASK;
>> +		*aux_val |= --tag_rd << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
>> +		*aux_mask &= ~L2X0_AUX_CTRL_TAG_LATENCY_MASK;
>> +	}
>> +
>> +	if (!is_pl310 && data_rd) {
>> +		*aux_val &= ~L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK;
>> +		*aux_val |= --data_rd << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT;
>> +		*aux_mask &= ~L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK;
>> +	}
>> +
>> +	if (!is_pl310 && data_wr) {
>> +		*aux_val &= ~L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
>> +		*aux_val |= --data_wr << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT;
>> +		*aux_mask &= ~L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
>> +	}
>> +
>> +	if (!is_pl310 && dirty) {
>> +		*aux_val &= ~L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
>> +		*aux_val |= --dirty << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
>> +		*aux_mask &= ~L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
>> +	}
> 
> Something about this just feels suboptimal.  It's essentially the
> exact same block of code 4 times with different values, masks and
> shifts.  It may be best the way it is, but I do wonder if it could be
> made to look nicer.

How about something like this:

> 	if (is_pl310) {
> 		if (tag[0] && tag[1] && tag[2])
> 			writel_relaxed(
> 				((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
> 				((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
> 				((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
> 				l2x0_base + L2X0_TAG_LATENCY_CTRL);
> 
> 		if (data[0] && data[1] && data[2])
> 			writel_relaxed(
> 				((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
> 				((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
> 				((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
> 				l2x0_base + L2X0_DATA_LATENCY_CTRL);
> 
> 		return;
> 	}
> 
> 	if (tag[0]) {
> 		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
> 		val |= (tag[0] - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
> 	}
> 
> 	if (data[0] && data[1]) {
> 		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
> 			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
> 		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
> 		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
> 	}
> 
> 	if (dirty) {
> 		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
> 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
> 	}
> 
> 	*aux_val &= ~mask;
> 	*aux_val |= val;
> 	*aux_mask &= ~mask;
> }

Rob


More information about the devicetree-discuss mailing list