[PATCHv2 pci-next 1/2] PCI/AER: correctable error message as KERN_INFO

Sathyanarayanan Kuppuswamy sathyanarayanan.kuppuswamy at linux.intel.com
Sat Mar 18 05:50:22 AEDT 2023



On 3/17/23 10:51 AM, Grant Grundler wrote:
> Since correctable errors have been corrected (and counted), the dmesg output
> should not be reported as a warning, but rather as "informational".
> 
> Otherwise, using a certain well known vendor's PCIe parts in a USB4 docking
> station, the dmesg buffer can be spammed with correctable errors, 717 bytes
> per instance, potentially many MB per day.

Why don't you investigate why you are getting so many correctable errors?
Isn't solving the problem preferable to hiding the logs?

> 
> Given the "WARN" priority, these messages have already confused the typical
> user that stumbles across them, support staff (triaging feedback reports),
> and more than a few linux kernel devs. Changing to INFO will hide these
> messages from most audiences.
> 
> Signed-off-by: Grant Grundler <grundler at chromium.org>
> ---
>  drivers/pci/pcie/aer.c | 29 +++++++++++++++++++----------
>  1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index f6c24ded134c..cb6b96233967 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -687,23 +687,29 @@ static void __aer_print_error(struct pci_dev *dev,
>  {
>  	const char **strings;
>  	unsigned long status = info->status & ~info->mask;
> -	const char *level, *errmsg;
>  	int i;
>  
>  	if (info->severity == AER_CORRECTABLE) {
>  		strings = aer_correctable_error_string;
> -		level = KERN_WARNING;
> +		pci_info(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
> +			info->status, info->mask);
>  	} else {
>  		strings = aer_uncorrectable_error_string;
> -		level = KERN_ERR;
> +		pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
> +			info->status, info->mask);
>  	}
>  
>  	for_each_set_bit(i, &status, 32) {
> -		errmsg = strings[i];
> +		const char *errmsg = strings[i];
> +
>  		if (!errmsg)
>  			errmsg = "Unknown Error Bit";
>  
> -		pci_printk(level, dev, "   [%2d] %-22s%s\n", i, errmsg,
> +		if (info->severity == AER_CORRECTABLE)
> +			pci_info(dev, "   [%2d] %-22s%s\n", i, errmsg,
> +				info->first_error == i ? " (First)" : "");
> +		else
> +			pci_err(dev, "   [%2d] %-22s%s\n", i, errmsg,
>  				info->first_error == i ? " (First)" : "");
>  	}
>  	pci_dev_aer_stats_incr(dev, info);
> @@ -724,7 +730,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
>  	agent = AER_GET_AGENT(info->severity, info->status);
>  
> -	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
> +	level = (info->severity == AER_CORRECTABLE) ? KERN_INFO : KERN_ERR;
>  
>  	pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>  		   aer_error_severity_string[info->severity],
> @@ -797,14 +803,17 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
>  	info.mask = mask;
>  	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
>  
> -	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
>  	__aer_print_error(dev, &info);
> -	pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
> -		aer_error_layer[layer], aer_agent_string[agent]);
>  
> -	if (aer_severity != AER_CORRECTABLE)
> +	if (aer_severity == AER_CORRECTABLE) {
> +		pci_info(dev, "aer_layer=%s, aer_agent=%s\n",
> +			aer_error_layer[layer], aer_agent_string[agent]);
> +	} else {
> +		pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
> +			aer_error_layer[layer], aer_agent_string[agent]);
>  		pci_err(dev, "aer_uncor_severity: 0x%08x\n",
>  			aer->uncor_severity);
> +	}
>  
>  	if (tlp_header_valid)
>  		__print_tlp_header(dev, &aer->header_log);

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer


More information about the Linuxppc-dev mailing list