From: Sebastian Andrzej Siewior Subject: kernel/fork: Move task stack account to do_exit(). Date: Thu, 18 Nov 2021 15:34:50 +0100 Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.16/older/patches-5.16-rc2-rt4.tar.xz There is no need to perform the stack accounting of the outgoing task in its final schedule() invocation which happens with disabled preemption. The task is leaving, the resources will be freed and the accounting can happen in do_exit() before the actual schedule invocation which frees the stack memory. Move the accounting of the stack memory from release_task_stack() to exit_task_stack_account() which then can be invoked from do_exit(). Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20211118143452.136421-7-bigeasy@linutronix.de --- include/linux/sched/task_stack.h | 2 ++ kernel/exit.c | 1 + kernel/fork.c | 35 +++++++++++++++++++++++------------ 3 files changed, 26 insertions(+), 12 deletions(-) --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -79,6 +79,8 @@ static inline void *try_get_task_stack(s static inline void put_task_stack(struct task_struct *tsk) {} #endif +void exit_task_stack_account(struct task_struct *tsk); + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) --- a/kernel/exit.c +++ b/kernel/exit.c @@ -871,6 +871,7 @@ void __noreturn do_exit(long code) put_page(tsk->task_frag.page); validate_creds_for_do_exit(tsk); + exit_task_stack_account(tsk); check_stack_usage(); preempt_disable(); --- a/kernel/fork.c +++ b/kernel/fork.c @@ -210,9 +210,8 @@ static int free_vm_stack_cache(unsigned return 0; } -static int memcg_charge_kernel_stack(struct task_struct *tsk) +static int memcg_charge_kernel_stack(struct vm_struct *vm) { - struct vm_struct *vm = task_stack_vm_area(tsk); int i; int ret; @@ -238,6 +237,7 @@ static int memcg_charge_kernel_stack(str static int alloc_thread_stack_node(struct task_struct *tsk, int node) { + struct vm_struct *vm; void *stack; int i; @@ -255,7 +255,7 @@ static int alloc_thread_stack_node(struc /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); - if (memcg_charge_kernel_stack(tsk)) { + if (memcg_charge_kernel_stack(s)) { vfree(s->addr); return -ENOMEM; } @@ -278,7 +278,8 @@ static int alloc_thread_stack_node(struc if (!stack) return -ENOMEM; - if (memcg_charge_kernel_stack(tsk)) { + vm = find_vm_area(stack); + if (memcg_charge_kernel_stack(vm)) { vfree(stack); return -ENOMEM; } @@ -287,19 +288,15 @@ static int alloc_thread_stack_node(struc * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - tsk->stack_vm_area = find_vm_area(stack); + tsk->stack_vm_area = vm; tsk->stack = stack; return 0; } static void free_thread_stack(struct task_struct *tsk) { - struct vm_struct *vm = task_stack_vm_area(tsk); int i; - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); - for (i = 0; i < NR_CACHED_STACKS; i++) { if (this_cpu_cmpxchg(cached_stacks[i], NULL, tsk->stack_vm_area) != NULL) @@ -450,12 +447,25 @@ static void account_kernel_stack(struct } } +void exit_task_stack_account(struct task_struct *tsk) +{ + account_kernel_stack(tsk, -1); + + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + struct vm_struct *vm; + int i; + + vm = task_stack_vm_area(tsk); + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) + memcg_kmem_uncharge_page(vm->pages[i], 0); + } +} + static void release_task_stack(struct task_struct *tsk) { if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD)) return; /* Better to leak the stack than to free prematurely */ - account_kernel_stack(tsk, -1); free_thread_stack(tsk); } @@ -916,6 +926,7 @@ static struct task_struct *dup_task_stru #ifdef CONFIG_THREAD_INFO_IN_TASK refcount_set(&tsk->stack_refcount, 1); #endif + account_kernel_stack(tsk, 1); err = scs_prepare(tsk, node); if (err) @@ -959,8 +970,6 @@ static struct task_struct *dup_task_stru tsk->wake_q.next = NULL; tsk->pf_io_worker = NULL; - account_kernel_stack(tsk, 1); - kcov_task_init(tsk); kmap_local_fork(tsk); @@ -979,6 +988,7 @@ static struct task_struct *dup_task_stru return tsk; free_stack: + exit_task_stack_account(tsk); free_thread_stack(tsk); free_tsk: free_task_struct(tsk); @@ -2475,6 +2485,7 @@ static __latent_entropy struct task_stru exit_creds(p); bad_fork_free: WRITE_ONCE(p->__state, TASK_DEAD); + exit_task_stack_account(p); put_task_stack(p); delayed_free_task(p); fork_out: