/* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define LOG_TAG "lowmemorykiller" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef LMKD_LOG_STATS #include "statslog.h" #endif /* * Define LMKD_TRACE_KILLS to record lmkd kills in kernel traces * to profile and correlate with OOM kills */ #ifdef LMKD_TRACE_KILLS #define ATRACE_TAG ATRACE_TAG_ALWAYS #include #define TRACE_KILL_START(pid) ATRACE_INT(__FUNCTION__, pid); #define TRACE_KILL_END() ATRACE_INT(__FUNCTION__, 0); #else /* LMKD_TRACE_KILLS */ #define TRACE_KILL_START(pid) ((void)(pid)) #define TRACE_KILL_END() ((void)0) #endif /* LMKD_TRACE_KILLS */ #ifndef __unused #define __unused __attribute__((__unused__)) #endif #define MEMCG_SYSFS_PATH "/dev/memcg/" #define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes" #define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes" #define ZONEINFO_PATH "/proc/zoneinfo" #define MEMINFO_PATH "/proc/meminfo" #define LINE_MAX 128 /* gid containing AID_SYSTEM required */ #define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree" #define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj" #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #define EIGHT_MEGA (1 << 23) /* Defined as ProcessList.SYSTEM_ADJ in ProcessList.java */ #define SYSTEM_ADJ (-900) /* default to old in-kernel interface if no memory pressure events */ static bool use_inkernel_interface = true; static bool has_inkernel_module; /* memory pressure levels */ enum vmpressure_level { VMPRESS_LEVEL_LOW = 0, VMPRESS_LEVEL_MEDIUM, VMPRESS_LEVEL_CRITICAL, VMPRESS_LEVEL_COUNT }; static const char *level_name[] = { "low", "medium", "critical" }; struct { int64_t min_nr_free_pages; /* recorded but not used yet */ int64_t max_nr_free_pages; } low_pressure_mem = { -1, -1 }; static int level_oomadj[VMPRESS_LEVEL_COUNT]; static int mpevfd[VMPRESS_LEVEL_COUNT] = { -1, -1, -1 }; static bool debug_process_killing; static bool enable_pressure_upgrade; static int64_t upgrade_pressure; static int64_t downgrade_pressure; static bool low_ram_device; static bool kill_heaviest_task; static unsigned long kill_timeout_ms; static bool use_minfree_levels; /* data required to handle events */ struct event_handler_info { int data; void (*handler)(int data, uint32_t events); }; /* data required to handle socket events */ struct sock_event_handler_info { int sock; struct event_handler_info handler_info; }; /* max supported number of data connections */ #define MAX_DATA_CONN 2 /* socket event handler data */ static struct sock_event_handler_info ctrl_sock; static struct sock_event_handler_info data_sock[MAX_DATA_CONN]; /* vmpressure event handler data */ static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT]; /* 3 memory pressure levels, 1 ctrl listen socket, 2 ctrl data socket */ #define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT) static int epollfd; static int maxevents; /* OOM score values used by both kernel and framework */ #define OOM_SCORE_ADJ_MIN (-1000) #define OOM_SCORE_ADJ_MAX 1000 static int lowmem_adj[MAX_TARGETS]; static int lowmem_minfree[MAX_TARGETS]; static int lowmem_targets_size; /* Fields to parse in /proc/zoneinfo */ enum zoneinfo_field { ZI_NR_FREE_PAGES = 0, ZI_NR_FILE_PAGES, ZI_NR_SHMEM, ZI_NR_UNEVICTABLE, ZI_WORKINGSET_REFAULT, ZI_HIGH, ZI_FIELD_COUNT }; static const char* const zoneinfo_field_names[ZI_FIELD_COUNT] = { "nr_free_pages", "nr_file_pages", "nr_shmem", "nr_unevictable", "workingset_refault", "high", }; union zoneinfo { struct { int64_t nr_free_pages; int64_t nr_file_pages; int64_t nr_shmem; int64_t nr_unevictable; int64_t workingset_refault; int64_t high; /* fields below are calculated rather than read from the file */ int64_t totalreserve_pages; } field; int64_t arr[ZI_FIELD_COUNT]; }; /* Fields to parse in /proc/meminfo */ enum meminfo_field { MI_NR_FREE_PAGES = 0, MI_CACHED, MI_SWAP_CACHED, MI_BUFFERS, MI_SHMEM, MI_UNEVICTABLE, MI_FREE_SWAP, MI_DIRTY, MI_FIELD_COUNT }; static const char* const meminfo_field_names[MI_FIELD_COUNT] = { "MemFree:", "Cached:", "SwapCached:", "Buffers:", "Shmem:", "Unevictable:", "SwapFree:", "Dirty:", }; union meminfo { struct { int64_t nr_free_pages; int64_t cached; int64_t swap_cached; int64_t buffers; int64_t shmem; int64_t unevictable; int64_t free_swap; int64_t dirty; /* fields below are calculated rather than read from the file */ int64_t nr_file_pages; } field; int64_t arr[MI_FIELD_COUNT]; }; enum field_match_result { NO_MATCH, PARSE_FAIL, PARSE_SUCCESS }; struct adjslot_list { struct adjslot_list *next; struct adjslot_list *prev; }; struct proc { struct adjslot_list asl; int pid; uid_t uid; int oomadj; struct proc *pidhash_next; }; struct reread_data { const char* const filename; int fd; }; #ifdef LMKD_LOG_STATS static bool enable_stats_log; static android_log_context log_ctx; #endif #define PIDHASH_SZ 1024 static struct proc *pidhash[PIDHASH_SZ]; #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) #define ADJTOSLOT(adj) ((adj) + -OOM_SCORE_ADJ_MIN) static struct adjslot_list procadjslot_list[ADJTOSLOT(OOM_SCORE_ADJ_MAX) + 1]; /* PAGE_SIZE / 1024 */ static long page_k; static bool parse_int64(const char* str, int64_t* ret) { char* endptr; long long val = strtoll(str, &endptr, 10); if (str == endptr || val > INT64_MAX) { return false; } *ret = (int64_t)val; return true; } static enum field_match_result match_field(const char* cp, const char* ap, const char* const field_names[], int field_count, int64_t* field, int *field_idx) { int64_t val; int i; for (i = 0; i < field_count; i++) { if (!strcmp(cp, field_names[i])) { *field_idx = i; return parse_int64(ap, field) ? PARSE_SUCCESS : PARSE_FAIL; } } return NO_MATCH; } /* * Read file content from the beginning up to max_len bytes or EOF * whichever happens first. */ static ssize_t read_all(int fd, char *buf, size_t max_len) { ssize_t ret = 0; off_t offset = 0; while (max_len > 0) { ssize_t r = TEMP_FAILURE_RETRY(pread(fd, buf, max_len, offset)); if (r == 0) { break; } if (r == -1) { return -1; } ret += r; buf += r; offset += r; max_len -= r; } return ret; } /* * Read a new or already opened file from the beginning. * If the file has not been opened yet data->fd should be set to -1. * To be used with files which are read often and possibly during high * memory pressure to minimize file opening which by itself requires kernel * memory allocation and might result in a stall on memory stressed system. */ static int reread_file(struct reread_data *data, char *buf, size_t buf_size) { ssize_t size; if (data->fd == -1) { data->fd = open(data->filename, O_RDONLY | O_CLOEXEC); if (data->fd == -1) { ALOGE("%s open: %s", data->filename, strerror(errno)); return -1; } } size = read_all(data->fd, buf, buf_size - 1); if (size < 0) { ALOGE("%s read: %s", data->filename, strerror(errno)); close(data->fd); data->fd = -1; return -1; } ALOG_ASSERT((size_t)size < buf_size - 1, data->filename " too large"); buf[size] = 0; return 0; } static struct proc *pid_lookup(int pid) { struct proc *procp; for (procp = pidhash[pid_hashfn(pid)]; procp && procp->pid != pid; procp = procp->pidhash_next) ; return procp; } static void adjslot_insert(struct adjslot_list *head, struct adjslot_list *new) { struct adjslot_list *next = head->next; new->prev = head; new->next = next; next->prev = new; head->next = new; } static void adjslot_remove(struct adjslot_list *old) { struct adjslot_list *prev = old->prev; struct adjslot_list *next = old->next; next->prev = prev; prev->next = next; } static struct adjslot_list *adjslot_tail(struct adjslot_list *head) { struct adjslot_list *asl = head->prev; return asl == head ? NULL : asl; } static void proc_slot(struct proc *procp) { int adjslot = ADJTOSLOT(procp->oomadj); adjslot_insert(&procadjslot_list[adjslot], &procp->asl); } static void proc_unslot(struct proc *procp) { adjslot_remove(&procp->asl); } static void proc_insert(struct proc *procp) { int hval = pid_hashfn(procp->pid); procp->pidhash_next = pidhash[hval]; pidhash[hval] = procp; proc_slot(procp); } static int pid_remove(int pid) { int hval = pid_hashfn(pid); struct proc *procp; struct proc *prevp; for (procp = pidhash[hval], prevp = NULL; procp && procp->pid != pid; procp = procp->pidhash_next) prevp = procp; if (!procp) return -1; if (!prevp) pidhash[hval] = procp->pidhash_next; else prevp->pidhash_next = procp->pidhash_next; proc_unslot(procp); free(procp); return 0; } /* * Write a string to a file. * Returns false if the file does not exist. */ static bool writefilestring(const char *path, const char *s, bool err_if_missing) { int fd = open(path, O_WRONLY | O_CLOEXEC); ssize_t len = strlen(s); ssize_t ret; if (fd < 0) { if (err_if_missing) { ALOGE("Error opening %s; errno=%d", path, errno); } return false; } ret = TEMP_FAILURE_RETRY(write(fd, s, len)); if (ret < 0) { ALOGE("Error writing %s; errno=%d", path, errno); } else if (ret < len) { ALOGE("Short write on %s; length=%zd", path, ret); } close(fd); return true; } static void cmd_procprio(LMKD_CTRL_PACKET packet) { struct proc *procp; char path[80]; char val[20]; int soft_limit_mult; struct lmk_procprio params; bool is_system_server; struct passwd *pwdrec; lmkd_pack_get_procprio(packet, ¶ms); if (params.oomadj < OOM_SCORE_ADJ_MIN || params.oomadj > OOM_SCORE_ADJ_MAX) { ALOGE("Invalid PROCPRIO oomadj argument %d", params.oomadj); return; } /* gid containing AID_READPROC required */ /* CAP_SYS_RESOURCE required */ /* CAP_DAC_OVERRIDE required */ snprintf(path, sizeof(path), "/proc/%d/oom_score_adj", params.pid); snprintf(val, sizeof(val), "%d", params.oomadj); if (!writefilestring(path, val, false)) { ALOGW("Failed to open %s; errno=%d: process %d might have been killed", path, errno, params.pid); /* If this file does not exist the process is dead. */ return; } if (use_inkernel_interface) { return; } if (params.oomadj >= 900) { soft_limit_mult = 0; } else if (params.oomadj >= 800) { soft_limit_mult = 0; } else if (params.oomadj >= 700) { soft_limit_mult = 0; } else if (params.oomadj >= 600) { // Launcher should be perceptible, don't kill it. params.oomadj = 200; soft_limit_mult = 1; } else if (params.oomadj >= 500) { soft_limit_mult = 0; } else if (params.oomadj >= 400) { soft_limit_mult = 0; } else if (params.oomadj >= 300) { soft_limit_mult = 1; } else if (params.oomadj >= 200) { soft_limit_mult = 2; } else if (params.oomadj >= 100) { soft_limit_mult = 10; } else if (params.oomadj >= 0) { soft_limit_mult = 20; } else { // Persistent processes will have a large // soft limit 512MB. soft_limit_mult = 64; } snprintf(path, sizeof(path), MEMCG_SYSFS_PATH "apps/uid_%d/pid_%d/memory.soft_limit_in_bytes", params.uid, params.pid); snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA); /* * system_server process has no memcg under /dev/memcg/apps but should be * registered with lmkd. This is the best way so far to identify it. */ is_system_server = (params.oomadj == SYSTEM_ADJ && (pwdrec = getpwnam("system")) != NULL && params.uid == pwdrec->pw_uid); writefilestring(path, val, !is_system_server); procp = pid_lookup(params.pid); if (!procp) { procp = malloc(sizeof(struct proc)); if (!procp) { // Oh, the irony. May need to rebuild our state. return; } procp->pid = params.pid; procp->uid = params.uid; procp->oomadj = params.oomadj; proc_insert(procp); } else { proc_unslot(procp); procp->oomadj = params.oomadj; proc_slot(procp); } } static void cmd_procremove(LMKD_CTRL_PACKET packet) { struct lmk_procremove params; if (use_inkernel_interface) { return; } lmkd_pack_get_procremove(packet, ¶ms); pid_remove(params.pid); } static void cmd_target(int ntargets, LMKD_CTRL_PACKET packet) { int i; struct lmk_target target; if (ntargets > (int)ARRAY_SIZE(lowmem_adj)) return; for (i = 0; i < ntargets; i++) { lmkd_pack_get_target(packet, i, &target); lowmem_minfree[i] = target.minfree; lowmem_adj[i] = target.oom_adj_score; } lowmem_targets_size = ntargets; if (has_inkernel_module) { char minfreestr[128]; char killpriostr[128]; minfreestr[0] = '\0'; killpriostr[0] = '\0'; for (i = 0; i < lowmem_targets_size; i++) { char val[40]; if (i) { strlcat(minfreestr, ",", sizeof(minfreestr)); strlcat(killpriostr, ",", sizeof(killpriostr)); } snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_minfree[i] : 0); strlcat(minfreestr, val, sizeof(minfreestr)); snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_adj[i] : 0); strlcat(killpriostr, val, sizeof(killpriostr)); } writefilestring(INKERNEL_MINFREE_PATH, minfreestr, true); writefilestring(INKERNEL_ADJ_PATH, killpriostr, true); } } static void ctrl_data_close(int dsock_idx) { struct epoll_event epev; ALOGI("closing lmkd data connection"); if (epoll_ctl(epollfd, EPOLL_CTL_DEL, data_sock[dsock_idx].sock, &epev) == -1) { // Log a warning and keep going ALOGW("epoll_ctl for data connection socket failed; errno=%d", errno); } maxevents--; close(data_sock[dsock_idx].sock); data_sock[dsock_idx].sock = -1; } static int ctrl_data_read(int dsock_idx, char *buf, size_t bufsz) { int ret = 0; ret = TEMP_FAILURE_RETRY(read(data_sock[dsock_idx].sock, buf, bufsz)); if (ret == -1) { ALOGE("control data socket read failed; errno=%d", errno); } else if (ret == 0) { ALOGE("Got EOF on control data socket"); ret = -1; } return ret; } static void ctrl_command_handler(int dsock_idx) { LMKD_CTRL_PACKET packet; int len; enum lmk_cmd cmd; int nargs; int targets; len = ctrl_data_read(dsock_idx, (char *)packet, CTRL_PACKET_MAX_SIZE); if (len <= 0) return; if (len < (int)sizeof(int)) { ALOGE("Wrong control socket read length len=%d", len); return; } cmd = lmkd_pack_get_cmd(packet); nargs = len / sizeof(int) - 1; if (nargs < 0) goto wronglen; switch(cmd) { case LMK_TARGET: targets = nargs / 2; if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj)) goto wronglen; cmd_target(targets, packet); break; case LMK_PROCPRIO: if (nargs != 3) goto wronglen; cmd_procprio(packet); break; case LMK_PROCREMOVE: if (nargs != 1) goto wronglen; cmd_procremove(packet); break; default: ALOGE("Received unknown command code %d", cmd); return; } return; wronglen: ALOGE("Wrong control socket read length cmd=%d len=%d", cmd, len); } static void ctrl_data_handler(int data, uint32_t events) { if (events & EPOLLIN) { ctrl_command_handler(data); } } static int get_free_dsock() { for (int i = 0; i < MAX_DATA_CONN; i++) { if (data_sock[i].sock < 0) { return i; } } return -1; } static void ctrl_connect_handler(int data __unused, uint32_t events __unused) { struct epoll_event epev; int free_dscock_idx = get_free_dsock(); if (free_dscock_idx < 0) { /* * Number of data connections exceeded max supported. This should not * happen but if it does we drop all existing connections and accept * the new one. This prevents inactive connections from monopolizing * data socket and if we drop ActivityManager connection it will * immediately reconnect. */ for (int i = 0; i < MAX_DATA_CONN; i++) { ctrl_data_close(i); } free_dscock_idx = 0; } data_sock[free_dscock_idx].sock = accept(ctrl_sock.sock, NULL, NULL); if (data_sock[free_dscock_idx].sock < 0) { ALOGE("lmkd control socket accept failed; errno=%d", errno); return; } ALOGI("lmkd data connection established"); /* use data to store data connection idx */ data_sock[free_dscock_idx].handler_info.data = free_dscock_idx; data_sock[free_dscock_idx].handler_info.handler = ctrl_data_handler; epev.events = EPOLLIN; epev.data.ptr = (void *)&(data_sock[free_dscock_idx].handler_info); if (epoll_ctl(epollfd, EPOLL_CTL_ADD, data_sock[free_dscock_idx].sock, &epev) == -1) { ALOGE("epoll_ctl for data connection socket failed; errno=%d", errno); ctrl_data_close(free_dscock_idx); return; } maxevents++; } #ifdef LMKD_LOG_STATS static void memory_stat_parse_line(char *line, struct memory_stat *mem_st) { char key[LINE_MAX]; int64_t value; sscanf(line,"%s %" SCNd64 "", key, &value); if (strcmp(key, "total_") < 0) { return; } if (!strcmp(key, "total_pgfault")) mem_st->pgfault = value; else if (!strcmp(key, "total_pgmajfault")) mem_st->pgmajfault = value; else if (!strcmp(key, "total_rss")) mem_st->rss_in_bytes = value; else if (!strcmp(key, "total_cache")) mem_st->cache_in_bytes = value; else if (!strcmp(key, "total_swap")) mem_st->swap_in_bytes = value; } static int memory_stat_parse(struct memory_stat *mem_st, int pid, uid_t uid) { FILE *fp; char buf[PATH_MAX]; snprintf(buf, sizeof(buf), MEMCG_PROCESS_MEMORY_STAT_PATH, uid, pid); fp = fopen(buf, "r"); if (fp == NULL) { ALOGE("%s open failed: %s", buf, strerror(errno)); return -1; } while (fgets(buf, PAGE_SIZE, fp) != NULL ) { memory_stat_parse_line(buf, mem_st); } fclose(fp); return 0; } #endif /* /prop/zoneinfo parsing routines */ static int64_t zoneinfo_parse_protection(char *cp) { int64_t max = 0; long long zoneval; char *save_ptr; for (cp = strtok_r(cp, "(), ", &save_ptr); cp; cp = strtok_r(NULL, "), ", &save_ptr)) { zoneval = strtoll(cp, &cp, 0); if (zoneval > max) { max = (zoneval > INT64_MAX) ? INT64_MAX : zoneval; } } return max; } static bool zoneinfo_parse_line(char *line, union zoneinfo *zi) { char *cp = line; char *ap; char *save_ptr; int64_t val; int field_idx; cp = strtok_r(line, " ", &save_ptr); if (!cp) { return true; } if (!strcmp(cp, "protection:")) { ap = strtok_r(NULL, ")", &save_ptr); } else { ap = strtok_r(NULL, " ", &save_ptr); } if (!ap) { return true; } switch (match_field(cp, ap, zoneinfo_field_names, ZI_FIELD_COUNT, &val, &field_idx)) { case (PARSE_SUCCESS): zi->arr[field_idx] += val; break; case (NO_MATCH): if (!strcmp(cp, "protection:")) { zi->field.totalreserve_pages += zoneinfo_parse_protection(ap); } break; case (PARSE_FAIL): default: return false; } return true; } static int zoneinfo_parse(union zoneinfo *zi) { static struct reread_data file_data = { .filename = ZONEINFO_PATH, .fd = -1, }; char buf[PAGE_SIZE]; char *save_ptr; char *line; memset(zi, 0, sizeof(union zoneinfo)); if (reread_file(&file_data, buf, sizeof(buf)) < 0) { return -1; } for (line = strtok_r(buf, "\n", &save_ptr); line; line = strtok_r(NULL, "\n", &save_ptr)) { if (!zoneinfo_parse_line(line, zi)) { ALOGE("%s parse error", file_data.filename); return -1; } } zi->field.totalreserve_pages += zi->field.high; return 0; } /* /prop/meminfo parsing routines */ static bool meminfo_parse_line(char *line, union meminfo *mi) { char *cp = line; char *ap; char *save_ptr; int64_t val; int field_idx; enum field_match_result match_res; cp = strtok_r(line, " ", &save_ptr); if (!cp) { return false; } ap = strtok_r(NULL, " ", &save_ptr); if (!ap) { return false; } match_res = match_field(cp, ap, meminfo_field_names, MI_FIELD_COUNT, &val, &field_idx); if (match_res == PARSE_SUCCESS) { mi->arr[field_idx] = val / page_k; } return (match_res != PARSE_FAIL); } static int meminfo_parse(union meminfo *mi) { static struct reread_data file_data = { .filename = MEMINFO_PATH, .fd = -1, }; char buf[PAGE_SIZE]; char *save_ptr; char *line; memset(mi, 0, sizeof(union meminfo)); if (reread_file(&file_data, buf, sizeof(buf)) < 0) { return -1; } for (line = strtok_r(buf, "\n", &save_ptr); line; line = strtok_r(NULL, "\n", &save_ptr)) { if (!meminfo_parse_line(line, mi)) { ALOGE("%s parse error", file_data.filename); return -1; } } mi->field.nr_file_pages = mi->field.cached + mi->field.swap_cached + mi->field.buffers; return 0; } static int proc_get_size(int pid) { char path[PATH_MAX]; char line[LINE_MAX]; int fd; int rss = 0; int total; ssize_t ret; /* gid containing AID_READPROC required */ snprintf(path, PATH_MAX, "/proc/%d/statm", pid); fd = open(path, O_RDONLY | O_CLOEXEC); if (fd == -1) return -1; ret = read_all(fd, line, sizeof(line) - 1); if (ret < 0) { close(fd); return -1; } sscanf(line, "%d %d ", &total, &rss); close(fd); return rss; } static char *proc_get_name(int pid) { char path[PATH_MAX]; static char line[LINE_MAX]; int fd; char *cp; ssize_t ret; /* gid containing AID_READPROC required */ snprintf(path, PATH_MAX, "/proc/%d/cmdline", pid); fd = open(path, O_RDONLY | O_CLOEXEC); if (fd == -1) return NULL; ret = read_all(fd, line, sizeof(line) - 1); close(fd); if (ret < 0) { return NULL; } cp = strchr(line, ' '); if (cp) *cp = '\0'; return line; } static struct proc *proc_adj_lru(int oomadj) { return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]); } static struct proc *proc_get_heaviest(int oomadj) { struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)]; struct adjslot_list *curr = head->next; struct proc *maxprocp = NULL; int maxsize = 0; while (curr != head) { int pid = ((struct proc *)curr)->pid; int tasksize = proc_get_size(pid); if (tasksize <= 0) { struct adjslot_list *next = curr->next; pid_remove(pid); curr = next; } else { if (tasksize > maxsize) { maxsize = tasksize; maxprocp = (struct proc *)curr; } curr = curr->next; } } return maxprocp; } /* Kill one process specified by procp. Returns the size of the process killed */ static int kill_one_process(struct proc* procp, int min_score_adj, enum vmpressure_level level) { int pid = procp->pid; uid_t uid = procp->uid; char *taskname; int tasksize; int r; #ifdef LMKD_LOG_STATS struct memory_stat mem_st = {}; int memory_stat_parse_result = -1; #endif taskname = proc_get_name(pid); if (!taskname) { pid_remove(pid); return -1; } tasksize = proc_get_size(pid); if (tasksize <= 0) { pid_remove(pid); return -1; } #ifdef LMKD_LOG_STATS if (enable_stats_log) { memory_stat_parse_result = memory_stat_parse(&mem_st, pid, uid); } #endif TRACE_KILL_START(pid); /* CAP_KILL required */ r = kill(pid, SIGKILL); ALOGI( "Killing '%s' (%d), uid %d, adj %d\n" " to free %ldkB because system is under %s memory pressure (min_oom_adj=%d)\n", taskname, pid, uid, procp->oomadj, tasksize * page_k, level_name[level], min_score_adj); pid_remove(pid); TRACE_KILL_END(); if (r) { ALOGE("kill(%d): errno=%d", pid, errno); return -1; } else { #ifdef LMKD_LOG_STATS if (memory_stat_parse_result == 0) { stats_write_lmk_kill_occurred(log_ctx, LMK_KILL_OCCURRED, uid, taskname, procp->oomadj, mem_st.pgfault, mem_st.pgmajfault, mem_st.rss_in_bytes, mem_st.cache_in_bytes, mem_st.swap_in_bytes); } #endif return tasksize; } return tasksize; } /* * Find processes to kill to free required number of pages. * If pages_to_free is set to 0 only one process will be killed. * Returns the size of the killed processes. */ static int find_and_kill_processes(enum vmpressure_level level, int min_score_adj, int pages_to_free) { int i; int killed_size; int pages_freed = 0; #ifdef LMKD_LOG_STATS if (enable_stats_log) { stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED, LMK_STATE_CHANGE_START); } #endif for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) { struct proc *procp; while (true) { procp = kill_heaviest_task ? proc_get_heaviest(i) : proc_adj_lru(i); if (!procp) break; killed_size = kill_one_process(procp, min_score_adj, level); if (killed_size >= 0) { pages_freed += killed_size; if (pages_freed >= pages_to_free) { #ifdef LMKD_LOG_STATS if (enable_stats_log) { stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED, LMK_STATE_CHANGE_STOP); } #endif return pages_freed; } } } } #ifdef LMKD_LOG_STATS if (enable_stats_log) { stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED, LMK_STATE_CHANGE_STOP); } #endif return pages_freed; } static int64_t get_memory_usage(struct reread_data *file_data) { int ret; int64_t mem_usage; char buf[32]; if (reread_file(file_data, buf, sizeof(buf)) < 0) { return -1; } if (!parse_int64(buf, &mem_usage)) { ALOGE("%s parse error", file_data->filename); return -1; } if (mem_usage == 0) { ALOGE("No memory!"); return -1; } return mem_usage; } void record_low_pressure_levels(union meminfo *mi) { if (low_pressure_mem.min_nr_free_pages == -1 || low_pressure_mem.min_nr_free_pages > mi->field.nr_free_pages) { if (debug_process_killing) { ALOGI("Low pressure min memory update from %" PRId64 " to %" PRId64, low_pressure_mem.min_nr_free_pages, mi->field.nr_free_pages); } low_pressure_mem.min_nr_free_pages = mi->field.nr_free_pages; } /* * Free memory at low vmpressure events occasionally gets spikes, * possibly a stale low vmpressure event with memory already * freed up (no memory pressure should have been reported). * Ignore large jumps in max_nr_free_pages that would mess up our stats. */ if (low_pressure_mem.max_nr_free_pages == -1 || (low_pressure_mem.max_nr_free_pages < mi->field.nr_free_pages && mi->field.nr_free_pages - low_pressure_mem.max_nr_free_pages < low_pressure_mem.max_nr_free_pages * 0.1)) { if (debug_process_killing) { ALOGI("Low pressure max memory update from %" PRId64 " to %" PRId64, low_pressure_mem.max_nr_free_pages, mi->field.nr_free_pages); } low_pressure_mem.max_nr_free_pages = mi->field.nr_free_pages; } } enum vmpressure_level upgrade_level(enum vmpressure_level level) { return (enum vmpressure_level)((level < VMPRESS_LEVEL_CRITICAL) ? level + 1 : level); } enum vmpressure_level downgrade_level(enum vmpressure_level level) { return (enum vmpressure_level)((level > VMPRESS_LEVEL_LOW) ? level - 1 : level); } static inline unsigned long get_time_diff_ms(struct timeval *from, struct timeval *to) { return (to->tv_sec - from->tv_sec) * 1000 + (to->tv_usec - from->tv_usec) / 1000; } static void mp_event_common(int data, uint32_t events __unused) { int ret; unsigned long long evcount; int64_t mem_usage, memsw_usage; int64_t mem_pressure; enum vmpressure_level lvl; union meminfo mi; union zoneinfo zi; static struct timeval last_report_tm; static unsigned long skip_count = 0; enum vmpressure_level level = (enum vmpressure_level)data; long other_free = 0, other_file = 0; int min_score_adj; int pages_to_free = 0; int minfree = 0; static struct reread_data mem_usage_file_data = { .filename = MEMCG_MEMORY_USAGE, .fd = -1, }; static struct reread_data memsw_usage_file_data = { .filename = MEMCG_MEMORYSW_USAGE, .fd = -1, }; /* * Check all event counters from low to critical * and upgrade to the highest priority one. By reading * eventfd we also reset the event counters. */ for (lvl = VMPRESS_LEVEL_LOW; lvl < VMPRESS_LEVEL_COUNT; lvl++) { if (mpevfd[lvl] != -1 && TEMP_FAILURE_RETRY(read(mpevfd[lvl], &evcount, sizeof(evcount))) > 0 && evcount > 0 && lvl > level) { level = lvl; } } if (kill_timeout_ms) { struct timeval curr_tm; gettimeofday(&curr_tm, NULL); if (get_time_diff_ms(&last_report_tm, &curr_tm) < kill_timeout_ms) { skip_count++; return; } } if (skip_count > 0) { ALOGI("%lu memory pressure events were skipped after a kill!", skip_count); skip_count = 0; } if (meminfo_parse(&mi) < 0 || zoneinfo_parse(&zi) < 0) { ALOGE("Failed to get free memory!"); return; } if (use_minfree_levels) { int i; other_free = mi.field.nr_free_pages - zi.field.totalreserve_pages; if (mi.field.nr_file_pages > (mi.field.shmem + mi.field.unevictable + mi.field.swap_cached)) { other_file = (mi.field.nr_file_pages - mi.field.shmem - mi.field.unevictable - mi.field.swap_cached); } else { other_file = 0; } min_score_adj = OOM_SCORE_ADJ_MAX + 1; for (i = 0; i < lowmem_targets_size; i++) { minfree = lowmem_minfree[i]; if (other_free < minfree && other_file < minfree) { min_score_adj = lowmem_adj[i]; break; } } if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) return; /* Free up enough pages to push over the highest minfree level */ pages_to_free = lowmem_minfree[lowmem_targets_size - 1] - ((other_free < other_file) ? other_free : other_file); goto do_kill; } if (level == VMPRESS_LEVEL_LOW) { record_low_pressure_levels(&mi); } if (level_oomadj[level] > OOM_SCORE_ADJ_MAX) { /* Do not monitor this pressure level */ return; } if ((mem_usage = get_memory_usage(&mem_usage_file_data)) < 0) { goto do_kill; } if ((memsw_usage = get_memory_usage(&memsw_usage_file_data)) < 0) { goto do_kill; } // Calculate percent for swappinness. mem_pressure = (mem_usage * 100) / memsw_usage; if (enable_pressure_upgrade && level != VMPRESS_LEVEL_CRITICAL) { // We are swapping too much. if (mem_pressure < upgrade_pressure) { level = upgrade_level(level); if (debug_process_killing) { ALOGI("Event upgraded to %s", level_name[level]); } } } // If the pressure is larger than downgrade_pressure lmk will not // kill any process, since enough memory is available. if (mem_pressure > downgrade_pressure) { if (debug_process_killing) { ALOGI("Ignore %s memory pressure", level_name[level]); } return; } else if (level == VMPRESS_LEVEL_CRITICAL && mem_pressure > upgrade_pressure) { if (debug_process_killing) { ALOGI("Downgrade critical memory pressure"); } // Downgrade event, since enough memory available. level = downgrade_level(level); } do_kill: if (low_ram_device) { /* For Go devices kill only one task */ if (find_and_kill_processes(level, level_oomadj[level], 0) == 0) { if (debug_process_killing) { ALOGI("Nothing to kill"); } } } else { int pages_freed; if (!use_minfree_levels) { /* If pressure level is less than critical and enough free swap then ignore */ if (level < VMPRESS_LEVEL_CRITICAL && mi.field.free_swap > low_pressure_mem.max_nr_free_pages) { if (debug_process_killing) { ALOGI("Ignoring pressure since %" PRId64 " swap pages are available ", mi.field.free_swap); } return; } /* Free up enough memory to downgrate the memory pressure to low level */ if (mi.field.nr_free_pages < low_pressure_mem.max_nr_free_pages) { pages_to_free = low_pressure_mem.max_nr_free_pages - mi.field.nr_free_pages; } else { if (debug_process_killing) { ALOGI("Ignoring pressure since more memory is " "available (%" PRId64 ") than watermark (%" PRId64 ")", mi.field.nr_free_pages, low_pressure_mem.max_nr_free_pages); } return; } min_score_adj = level_oomadj[level]; } pages_freed = find_and_kill_processes(level, min_score_adj, pages_to_free); if (use_minfree_levels) { ALOGI("Killing because cache %ldkB is below " "limit %ldkB for oom_adj %d\n" " Free memory is %ldkB %s reserved", other_file * page_k, minfree * page_k, min_score_adj, other_free * page_k, other_free >= 0 ? "above" : "below"); } if (pages_freed < pages_to_free) { ALOGI("Unable to free enough memory (pages to free=%d, pages freed=%d)", pages_to_free, pages_freed); } else { ALOGI("Reclaimed enough memory (pages to free=%d, pages freed=%d)", pages_to_free, pages_freed); gettimeofday(&last_report_tm, NULL); } } } static bool init_mp_common(enum vmpressure_level level) { int mpfd; int evfd; int evctlfd; char buf[256]; struct epoll_event epev; int ret; int level_idx = (int)level; const char *levelstr = level_name[level_idx]; /* gid containing AID_SYSTEM required */ mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC); if (mpfd < 0) { ALOGI("No kernel memory.pressure_level support (errno=%d)", errno); goto err_open_mpfd; } evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY | O_CLOEXEC); if (evctlfd < 0) { ALOGI("No kernel memory cgroup event control (errno=%d)", errno); goto err_open_evctlfd; } evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); if (evfd < 0) { ALOGE("eventfd failed for level %s; errno=%d", levelstr, errno); goto err_eventfd; } ret = snprintf(buf, sizeof(buf), "%d %d %s", evfd, mpfd, levelstr); if (ret >= (ssize_t)sizeof(buf)) { ALOGE("cgroup.event_control line overflow for level %s", levelstr); goto err; } ret = TEMP_FAILURE_RETRY(write(evctlfd, buf, strlen(buf) + 1)); if (ret == -1) { ALOGE("cgroup.event_control write failed for level %s; errno=%d", levelstr, errno); goto err; } epev.events = EPOLLIN; /* use data to store event level */ vmpressure_hinfo[level_idx].data = level_idx; vmpressure_hinfo[level_idx].handler = mp_event_common; epev.data.ptr = (void *)&vmpressure_hinfo[level_idx]; ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, evfd, &epev); if (ret == -1) { ALOGE("epoll_ctl for level %s failed; errno=%d", levelstr, errno); goto err; } maxevents++; mpevfd[level] = evfd; close(evctlfd); return true; err: close(evfd); err_eventfd: close(evctlfd); err_open_evctlfd: close(mpfd); err_open_mpfd: return false; } static int init(void) { struct epoll_event epev; int i; int ret; page_k = sysconf(_SC_PAGESIZE); if (page_k == -1) page_k = PAGE_SIZE; page_k /= 1024; epollfd = epoll_create(MAX_EPOLL_EVENTS); if (epollfd == -1) { ALOGE("epoll_create failed (errno=%d)", errno); return -1; } // mark data connections as not connected for (int i = 0; i < MAX_DATA_CONN; i++) { data_sock[i].sock = -1; } ctrl_sock.sock = android_get_control_socket("lmkd"); if (ctrl_sock.sock < 0) { ALOGE("get lmkd control socket failed"); return -1; } ret = listen(ctrl_sock.sock, MAX_DATA_CONN); if (ret < 0) { ALOGE("lmkd control socket listen failed (errno=%d)", errno); return -1; } epev.events = EPOLLIN; ctrl_sock.handler_info.handler = ctrl_connect_handler; epev.data.ptr = (void *)&(ctrl_sock.handler_info); if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_sock.sock, &epev) == -1) { ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno); return -1; } maxevents++; has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK); use_inkernel_interface = has_inkernel_module; if (use_inkernel_interface) { ALOGI("Using in-kernel low memory killer interface"); } else { if (!init_mp_common(VMPRESS_LEVEL_LOW) || !init_mp_common(VMPRESS_LEVEL_MEDIUM) || !init_mp_common(VMPRESS_LEVEL_CRITICAL)) { ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer"); return -1; } } for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) { procadjslot_list[i].next = &procadjslot_list[i]; procadjslot_list[i].prev = &procadjslot_list[i]; } return 0; } static void mainloop(void) { struct event_handler_info* handler_info; struct epoll_event *evt; while (1) { struct epoll_event events[maxevents]; int nevents; int i; nevents = epoll_wait(epollfd, events, maxevents, -1); if (nevents == -1) { if (errno == EINTR) continue; ALOGE("epoll_wait failed (errno=%d)", errno); continue; } /* * First pass to see if any data socket connections were dropped. * Dropped connection should be handled before any other events * to deallocate data connection and correctly handle cases when * connection gets dropped and reestablished in the same epoll cycle. * In such cases it's essential to handle connection closures first. */ for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) { if ((evt->events & EPOLLHUP) && evt->data.ptr) { ALOGI("lmkd data connection dropped"); handler_info = (struct event_handler_info*)evt->data.ptr; ctrl_data_close(handler_info->data); } } /* Second pass to handle all other events */ for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) { if (evt->events & EPOLLERR) ALOGD("EPOLLERR on event #%d", i); if (evt->events & EPOLLHUP) { /* This case was handled in the first pass */ continue; } if (evt->data.ptr) { handler_info = (struct event_handler_info*)evt->data.ptr; handler_info->handler(handler_info->data, evt->events); } } } } int main(int argc __unused, char **argv __unused) { struct sched_param param = { .sched_priority = 1, }; /* By default disable low level vmpressure events */ level_oomadj[VMPRESS_LEVEL_LOW] = property_get_int32("ro.lmk.low", OOM_SCORE_ADJ_MAX + 1); level_oomadj[VMPRESS_LEVEL_MEDIUM] = property_get_int32("ro.lmk.medium", 800); level_oomadj[VMPRESS_LEVEL_CRITICAL] = property_get_int32("ro.lmk.critical", 0); debug_process_killing = property_get_bool("ro.lmk.debug", false); /* By default disable upgrade/downgrade logic */ enable_pressure_upgrade = property_get_bool("ro.lmk.critical_upgrade", false); upgrade_pressure = (int64_t)property_get_int32("ro.lmk.upgrade_pressure", 100); downgrade_pressure = (int64_t)property_get_int32("ro.lmk.downgrade_pressure", 100); kill_heaviest_task = property_get_bool("ro.lmk.kill_heaviest_task", false); low_ram_device = property_get_bool("ro.config.low_ram", false); kill_timeout_ms = (unsigned long)property_get_int32("ro.lmk.kill_timeout_ms", 0); use_minfree_levels = property_get_bool("ro.lmk.use_minfree_levels", false); #ifdef LMKD_LOG_STATS statslog_init(&log_ctx, &enable_stats_log); #endif if (!init()) { if (!use_inkernel_interface) { /* * MCL_ONFAULT pins pages as they fault instead of loading * everything immediately all at once. (Which would be bad, * because as of this writing, we have a lot of mapped pages we * never use.) Old kernels will see MCL_ONFAULT and fail with * EINVAL; we ignore this failure. * * N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT * pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault * in pages. */ /* CAP_IPC_LOCK required */ if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) { ALOGW("mlockall failed %s", strerror(errno)); } /* CAP_NICE required */ if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { ALOGW("set SCHED_FIFO failed %s", strerror(errno)); } } mainloop(); } #ifdef LMKD_LOG_STATS statslog_destroy(&log_ctx); #endif ALOGI("exiting"); return 0; }