Web lists-archives.org

Re: [PATCH 1/2] x86 boot: add E820_RESVD_KERN




On Mon, Jun 30, 2008 at 2:15 AM, Yinghai Lu <yhlu.kernel@xxxxxxxxx> wrote:
> On Mon, Jun 30, 2008 at 12:51 AM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>> On Mon, 2008-06-30 at 00:34 -0700, Yinghai Lu wrote:
>>> On Mon, Jun 30, 2008 at 12:03 AM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>>> > On Fri, 2008-06-27 at 15:05 -0700, Yinghai Lu wrote:
>>> >> On Thu, Jun 26, 2008 at 7:48 PM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>>> >> > On Thu, 2008-06-26 at 19:22 -0700, Yinghai Lu wrote:
>>> >> >> On Thu, Jun 26, 2008 at 2:47 AM, Yinghai Lu <yhlu.kernel@xxxxxxxxx> wrote:
>>> >> >> > On Thu, Jun 26, 2008 at 12:48 AM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>>> >> >> >> On Thu, 2008-06-26 at 00:25 -0700, Yinghai Lu wrote:
>>> >> >> >> [...]
>>> >> >> >>> >                if (pfn >= limit_pfn)
>>> >> >> >>> > @@ -977,7 +978,7 @@ u64 __init early_reserve_e820(u64 startt
>>> >> >> >>> >                return 0;
>>> >> >> >>> >
>>> >> >> >>> >        addr = round_down(start + size - sizet, align);
>>> >> >> >>> > -       e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
>>> >> >> >>> > +       e820_update_range(addr, sizet, E820_RAM, E820_RESVD_KERN);
>>> >> >> >>>
>>> >> >> >>> this line is not needed.
>>> >> >> >>
>>> >> >> >> Why? Memory reserved by early_rserved_e820 should not be saved during
>>> >> >> >> hibernation? shoudl not be saved by kdump?
>>> >> >> >>
>>> >> >
>>> >> > Can you tell me why this line is not needed?
>>> >> >
>>> >> > [...]
>>> >> >> some like the attach patch...
>>> >> >>
>>> >> >> you still can merge parse_setup_data parse_e820_ext
>>> >> >> also entries in parse_e820_ext is not initialized..., __copy_e820_map
>>> >> >> will do nothing.
>>> >> >
>>> >> > OK. Because some E820 entries are available after parse_setup_data(),
>>> >> > it is better to call reserve_setup_data() after calling
>>> >> > parse_setup_data() if update_e820_range() is used instead of
>>> >> > reserve_early().
>>> >>
>>> >> please modify it and test on your platforms then submit to Ingo..
>>> >
>>> > It seems that there is an issue:
>>> >
>>> > - If parse_setup_data() is called before reserve_setup_data(), and there
>>> > is a conflict between memory area used by setup_data and other memory
>>> > area, it is possible that the contents of setup_data is changed. So that
>>> > system may panic before reporting memory area conflict. And it seems
>>> > that memory area conflict is not checked by e820_update_range().
>>>
>>> what is "other memory area"? returned from find_e820_area? no one use that yet.
>>
>> I mean memory area reserved with reserved_early() or e820_update_range()
>> before reserve_setup_data() is called.
>
> before parse_setup_data, reserve_early is called for
> 1. kernel text/data/bss + initial pgt
> 2. ramdisk
> 3. ebda
> e820_update_range is not called.
> at this time early_res have RAM reserved by kernel.
>
> then setup_memory_map is called, so e820 have some ranges...directly
> from e820 table
>
> next need to call parse_setup_data
> it will add some entries in e820
>
> then reserve_setup_data is called, it will use e820_update_range to
> reserve setup_data itself directly in e820
>
> ...
>
>>
>> And because there is no conflict check in e820_update_range(), what to
>> deal with potential conflict between setup_data and other memory area
>> regardless which one is reserved earlier?
>
> find_e820_area will make sure it only find ram from e820 and it is not
> conflict with early_res
>
>
>>
>>
>> Another issue related:
>>
>> Because some memmap entries are available via extended E820 memmap
>> (SETUP_E820_EXT), it is not strictly safe to use e820_update_range()
>> between setup_memory_map() and parse_setup_data(). It may be better to
>> parse extended E820 memmap right after setup_memory_map().
>
> will look at the code again..

Ying,

please check attach the v2 patch...
remove one left over reserve_setup_data for 32 bit

YH
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ba5ac88..e03b89a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -120,6 +120,7 @@ void __init e820_print_map(char *who)
 		       (e820.map[i].addr + e820.map[i].size));
 		switch (e820.map[i].type) {
 		case E820_RAM:
+		case E820_RESERVED_KERN:
 			printk(KERN_CONT "(usable)\n");
 			break;
 		case E820_RESERVED:
@@ -611,7 +612,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 			register_nosave_region(pfn, PFN_UP(ei->addr));
 
 		pfn = PFN_DOWN(ei->addr + ei->size);
-		if (ei->type != E820_RAM)
+		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
 			register_nosave_region(PFN_UP(ei->addr), pfn);
 
 		if (pfn >= limit_pfn)
@@ -1207,6 +1208,7 @@ void __init e820_reserve_resources(void)
 	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
 	for (i = 0; i < e820.nr_map; i++) {
 		switch (e820.map[i].type) {
+		case E820_RESERVED_KERN:
 		case E820_RAM:	res->name = "System RAM"; break;
 		case E820_ACPI:	res->name = "ACPI Tables"; break;
 		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index a6816be..3e66bd3 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -53,21 +53,3 @@ void __init reserve_ebda_region(void)
 	/* reserve all memory between lowmem and the 1MB mark */
 	reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
 }
-
-void __init reserve_setup_data(void)
-{
-	struct setup_data *data;
-	u64 pa_data;
-	char buf[32];
-
-	if (boot_params.hdr.version < 0x0209)
-		return;
-	pa_data = boot_params.hdr.setup_data;
-	while (pa_data) {
-		data = early_ioremap(pa_data, sizeof(*data));
-		sprintf(buf, "setup data %x", data->type);
-		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
-		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
-	}
-}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c970929..2ff9bc6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -123,7 +123,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
 #endif
 
 	reserve_ebda_region();
-	reserve_setup_data();
 
 	/*
 	 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fb318ed..cf03401 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -389,14 +389,33 @@ static void __init parse_setup_data(void)
 		default:
 			break;
 		}
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-		free_early(pa_data, pa_data+sizeof(*data)+data->len);
-#endif
 		pa_data = data->next;
 		early_iounmap(data, PAGE_SIZE);
 	}
 }
 
+static void __init reserve_setup_data(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+	char buf[32];
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, sizeof(*data));
+		sprintf(buf, "setup data %x", data->type);
+		e820_update_range(pa_data, sizeof(*data)+data->len,
+			 E820_RAM, E820_RESERVED_KERN);
+		pa_data = data->next;
+		early_iounmap(data, sizeof(*data));
+	}
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	printk(KERN_INFO "extended physical RAM map:\n");
+	e820_print_map("extended");
+}
+
 /*
  * --------- Crashkernel reservation ------------------------------
  */
@@ -524,7 +543,6 @@ void __init setup_arch(char **cmdline_p)
 	pre_setup_arch_hook();
 	early_cpu_init();
 	early_ioremap_init();
-	reserve_setup_data();
 #else
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
@@ -566,6 +584,9 @@ void __init setup_arch(char **cmdline_p)
 	ARCH_SETUP
 
 	setup_memory_map();
+	parse_setup_data();
+	reserve_setup_data();
+
 	copy_edd();
 
 	if (!boot_params.hdr.root_flags)
@@ -592,8 +613,6 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
-	parse_setup_data();
-
 	parse_early_param();
 
 	if (acpi_mps_check()) {
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index 6eeba3b..ae22bdf 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -108,6 +108,4 @@ struct boot_params {
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
-void reserve_setup_data(void);
-
 #endif /* _ASM_BOOTPARAM_H */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index f622685..45e904f 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -44,6 +44,9 @@
 #define E820_ACPI	3
 #define E820_NVS	4
 
+/* reserved RAM used by kernel itself */
+#define E820_RESERVED_KERN        128
+
 #ifndef __ASSEMBLY__
 struct e820entry {
 	__u64 addr;	/* start of memory segment */