From 5f2809e69c7128f86316048221cf45146f69a4a0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:05 -0700 Subject: bootmem: clean up alloc_bootmem_core alloc_bootmem_core has become quite nasty to read over time. This is a clean rewrite that keeps the semantics. bdata->last_pos has been dropped. bdata->last_success has been renamed to hint_idx and it is now an index relative to the node's range. Since further block searching might start at this index, it is now set to the end of a succeeded allocation rather than its beginning. bdata->last_offset has been renamed to last_end_off to be more clear that it represents the ending address of the last allocation relative to the node. [y-goto@jp.fujitsu.com: fix new alloc_bootmem_core()] Signed-off-by: Johannes Weiner Signed-off-by: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 212 +++++++++++++++++++++-------------------------------------- 1 file changed, 76 insertions(+), 136 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 300d126ec53..94ea612decc 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, * considered reserved. */ - if (addr >= bdata->node_boot_start && addr < bdata->last_success) - bdata->last_success = addr; + if (addr >= bdata->node_boot_start && + PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx) + bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start); /* * Round up to index to the range. @@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -/* - * We 'merge' subsequent allocations to save space. We might 'lose' - * some fraction of a page if allocations cannot be satisfied due to - * size constraints on boxes where there is physical RAM space - * fragmentation - in these cases (mostly large memory boxes) this - * is not a problem. - * - * On low memory boxes we get it right in 100% of the cases. - * - * alignment has to be a power of 2 value. - * - * NOTE: This function is _not_ reentrant. - */ -static void * __init -alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, - unsigned long align, unsigned long goal, unsigned long limit) +static void * __init alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { - unsigned long areasize, preferred; - unsigned long i, start = 0, incr, eidx, end_pfn; - void *ret; - unsigned long node_boot_start; - void *node_bootmem_map; - - if (!size) { - printk("alloc_bootmem_core(): zero-sized request\n"); - BUG(); - } - BUG_ON(align & (align-1)); + unsigned long min, max, start, sidx, midx, step; + + BUG_ON(!size); + BUG_ON(align & (align - 1)); + BUG_ON(limit && goal + size > limit); - /* on nodes without memory - bootmem_map is NULL */ if (!bdata->node_bootmem_map) return NULL; @@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, align, goal, limit); - /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ - node_boot_start = bdata->node_boot_start; - node_bootmem_map = bdata->node_bootmem_map; - if (align) { - node_boot_start = ALIGN(bdata->node_boot_start, align); - if (node_boot_start > bdata->node_boot_start) - node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + - PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; - } + min = PFN_DOWN(bdata->node_boot_start); + max = bdata->node_low_pfn; - if (limit && node_boot_start >= limit) + goal >>= PAGE_SHIFT; + limit >>= PAGE_SHIFT; + + if (limit && max > limit) + max = limit; + if (max <= min) return NULL; - end_pfn = bdata->node_low_pfn; - limit = PFN_DOWN(limit); - if (limit && end_pfn > limit) - end_pfn = limit; + step = max(align >> PAGE_SHIFT, 1UL); - eidx = end_pfn - PFN_DOWN(node_boot_start); + if (goal && min < goal && goal < max) + start = ALIGN(goal, step); + else + start = ALIGN(min, step); - /* - * We try to allocate bootmem pages above 'goal' - * first, then we try to allocate lower pages. - */ - preferred = 0; - if (goal && PFN_DOWN(goal) < end_pfn) { - if (goal > node_boot_start) - preferred = goal - node_boot_start; - - if (bdata->last_success > node_boot_start && - bdata->last_success - node_boot_start >= preferred) - if (!limit || (limit && limit > bdata->last_success)) - preferred = bdata->last_success - node_boot_start; - } + sidx = start - PFN_DOWN(bdata->node_boot_start); + midx = max - PFN_DOWN(bdata->node_boot_start); - preferred = PFN_DOWN(ALIGN(preferred, align)); - areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; - incr = align >> PAGE_SHIFT ? : 1; + if (bdata->hint_idx > sidx) { + /* Make sure we retry on failure */ + goal = 1; + sidx = ALIGN(bdata->hint_idx, step); + } -restart_scan: - for (i = preferred; i < eidx;) { - unsigned long j; + while (1) { + int merge; + void *region; + unsigned long eidx, i, start_off, end_off; +find_block: + sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); + sidx = ALIGN(sidx, step); + eidx = sidx + PFN_UP(size); - i = find_next_zero_bit(node_bootmem_map, eidx, i); - i = ALIGN(i, incr); - if (i >= eidx) + if (sidx >= midx || eidx > midx) break; - if (test_bit(i, node_bootmem_map)) { - i += incr; - continue; - } - for (j = i + 1; j < i + areasize; ++j) { - if (j >= eidx) - goto fail_block; - if (test_bit(j, node_bootmem_map)) - goto fail_block; - } - start = i; - goto found; - fail_block: - i = ALIGN(j, incr); - if (i == j) - i += incr; - } - - if (preferred > 0) { - preferred = 0; - goto restart_scan; - } - return NULL; -found: - bdata->last_success = PFN_PHYS(start) + node_boot_start; - BUG_ON(start >= eidx); + for (i = sidx; i < eidx; i++) + if (test_bit(i, bdata->node_bootmem_map)) { + sidx = ALIGN(i, step); + if (sidx == i) + sidx += step; + goto find_block; + } - /* - * Is the next page of the previous allocation-end the start - * of this allocation's buffer? If yes then we can 'merge' - * the previous partial page with this allocation. - */ - if (align < PAGE_SIZE && - bdata->last_offset && bdata->last_pos+1 == start) { - unsigned long offset, remaining_size; - offset = ALIGN(bdata->last_offset, align); - BUG_ON(offset > PAGE_SIZE); - remaining_size = PAGE_SIZE - offset; - if (size < remaining_size) { - areasize = 0; - /* last_pos unchanged */ - bdata->last_offset = offset + size; - ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + node_boot_start); - } else { - remaining_size = size - remaining_size; - areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + node_boot_start); - bdata->last_pos = start + areasize - 1; - bdata->last_offset = remaining_size; - } - bdata->last_offset &= ~PAGE_MASK; - } else { - bdata->last_pos = start + areasize - 1; - bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); + if (bdata->last_end_off && + PFN_DOWN(bdata->last_end_off) + 1 == sidx) + start_off = ALIGN(bdata->last_end_off, align); + else + start_off = PFN_PHYS(sidx); + + merge = PFN_DOWN(start_off) < sidx; + end_off = start_off + size; + + bdata->last_end_off = end_off; + bdata->hint_idx = PFN_UP(end_off); + + /* + * Reserve the area now: + */ + for (i = PFN_DOWN(start_off) + merge; + i < PFN_UP(end_off); i++) + if (test_and_set_bit(i, bdata->node_bootmem_map)) + BUG(); + + region = phys_to_virt(bdata->node_boot_start + start_off); + memset(region, 0, size); + return region; } - bdebug("nid=%td start=%lx end=%lx\n", - bdata - bootmem_node_data, - start + PFN_DOWN(bdata->node_boot_start), - start + areasize + PFN_DOWN(bdata->node_boot_start)); + if (goal) { + goal = 0; + sidx = 0; + goto find_block; + } - /* - * Reserve the area now: - */ - for (i = start; i < start + areasize; i++) - if (unlikely(test_and_set_bit(i, node_bootmem_map))) - BUG(); - memset(ret, 0, size); - return ret; + return NULL; } /** -- cgit v1.2.3