diff options
author | Josh Gao <jmgao@google.com> | 2016-10-18 18:17:52 -0700 |
---|---|---|
committer | Josh Gao <jmgao@google.com> | 2017-01-17 13:57:57 -0800 |
commit | cbe70cb0a8cb0171f3802273050e851a47b090ed (patch) | |
tree | 87a2805f11d9fa5283fe169d9b155f5b08a94f85 /debuggerd/client | |
parent | 4c5c9db17b55bfcb2d62eec158440d0e794e30bb (diff) | |
download | core-cbe70cb0a8cb0171f3802273050e851a47b090ed.tar.gz core-cbe70cb0a8cb0171f3802273050e851a47b090ed.tar.bz2 core-cbe70cb0a8cb0171f3802273050e851a47b090ed.zip |
debuggerd: advance our amazing bet.
Remove debuggerd in favor of a helper process that gets execed by
crashing processes.
Bug: http://b/30705528
Test: debuggerd_test
Change-Id: I9906c69473989cbf7fe5ea6cccf9a9c563d75906
Diffstat (limited to 'debuggerd/client')
-rw-r--r-- | debuggerd/client/debuggerd_client.cpp | 456 |
1 files changed, 133 insertions, 323 deletions
diff --git a/debuggerd/client/debuggerd_client.cpp b/debuggerd/client/debuggerd_client.cpp index c67d747fc..81d70df1c 100644 --- a/debuggerd/client/debuggerd_client.cpp +++ b/debuggerd/client/debuggerd_client.cpp @@ -1,379 +1,189 @@ /* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. + * Copyright 2016, The Android Open Source Project * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -#include "debuggerd/client.h" +#include <debuggerd/client.h> -#include <errno.h> -#include <inttypes.h> -#include <pthread.h> -#include <sched.h> +#include <fcntl.h> #include <signal.h> -#include <stddef.h> -#include <stdio.h> #include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/prctl.h> -#include <sys/socket.h> -#include <sys/syscall.h> -#include <sys/un.h> -#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/types.h> #include <unistd.h> -#include "private/libc_logging.h" - -#if defined(TARGET_IS_64_BIT) && !defined(__LP64__) -#define SOCKET_NAME "android:debuggerd32" -#else -#define SOCKET_NAME "android:debuggerd" -#endif - -// see man(2) prctl, specifically the section about PR_GET_NAME -#define MAX_TASK_NAME_LEN (16) - -static debuggerd_callbacks_t g_callbacks; +#include <chrono> -// Don't use __libc_fatal because it exits via abort, which might put us back into a signal handler. -#define fatal(...) \ - do { \ - __libc_format_log(ANDROID_LOG_FATAL, "libc", __VA_ARGS__); \ - _exit(1); \ - } while (0) +#include <android-base/logging.h> +#include <android-base/stringprintf.h> +#include <android-base/unique_fd.h> +#include <cutils/sockets.h> +#include <debuggerd/handler.h> +#include <debuggerd/protocol.h> +#include <debuggerd/util.h> -static int socket_abstract_client(const char* name, int type) { - sockaddr_un addr; +using android::base::unique_fd; - // Test with length +1 for the *initial* '\0'. - size_t namelen = strlen(name); - if ((namelen + 1) > sizeof(addr.sun_path)) { - errno = EINVAL; - return -1; +static bool send_signal(pid_t pid, bool backtrace) { + sigval val; + val.sival_int = backtrace; + if (sigqueue(pid, DEBUGGER_SIGNAL, val) != 0) { + PLOG(ERROR) << "libdebuggerd_client: failed to send signal to pid " << pid; + return false; } + return true; +} - // This is used for abstract socket namespace, we need - // an initial '\0' at the start of the Unix socket path. - // - // Note: The path in this case is *not* supposed to be - // '\0'-terminated. ("man 7 unix" for the gory details.) - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_LOCAL; - addr.sun_path[0] = 0; - memcpy(addr.sun_path + 1, name, namelen); - - socklen_t alen = namelen + offsetof(sockaddr_un, sun_path) + 1; - - int s = socket(AF_LOCAL, type, 0); - if (s == -1) { - return -1; - } +static bool check_dumpable(pid_t pid) { + // /proc/<pid> is owned by the effective UID of the process. + // Ownership of most of the other files in /proc/<pid> varies based on PR_SET_DUMPABLE. + // If PR_GET_DUMPABLE would return 0, they're owned by root, instead. + std::string proc_pid_path = android::base::StringPrintf("/proc/%d/", pid); + std::string proc_pid_status_path = proc_pid_path + "/status"; - int rc = TEMP_FAILURE_RETRY(connect(s, reinterpret_cast<sockaddr*>(&addr), alen)); - if (rc == -1) { - close(s); - return -1; + unique_fd proc_pid_fd(open(proc_pid_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)); + if (proc_pid_fd == -1) { + return false; } - - return s; -} - -/* - * Writes a summary of the signal to the log file. We do this so that, if - * for some reason we're not able to contact debuggerd, there is still some - * indication of the failure in the log. - * - * We could be here as a result of native heap corruption, or while a - * mutex is being held, so we don't want to use any libc functions that - * could allocate memory or hold a lock. - */ -static void log_signal_summary(int signum, const siginfo_t* info) { - const char* signal_name = "???"; - bool has_address = false; - switch (signum) { - case SIGABRT: - signal_name = "SIGABRT"; - break; - case SIGBUS: - signal_name = "SIGBUS"; - has_address = true; - break; - case SIGFPE: - signal_name = "SIGFPE"; - has_address = true; - break; - case SIGILL: - signal_name = "SIGILL"; - has_address = true; - break; - case SIGSEGV: - signal_name = "SIGSEGV"; - has_address = true; - break; -#if defined(SIGSTKFLT) - case SIGSTKFLT: - signal_name = "SIGSTKFLT"; - break; -#endif - case SIGSYS: - signal_name = "SIGSYS"; - break; - case SIGTRAP: - signal_name = "SIGTRAP"; - break; + unique_fd proc_pid_status_fd(openat(proc_pid_fd, "status", O_RDONLY | O_CLOEXEC)); + if (proc_pid_status_fd == -1) { + return false; } - char thread_name[MAX_TASK_NAME_LEN + 1]; // one more for termination - if (prctl(PR_GET_NAME, reinterpret_cast<unsigned long>(thread_name), 0, 0, 0) != 0) { - strcpy(thread_name, "<name unknown>"); - } else { - // short names are null terminated by prctl, but the man page - // implies that 16 byte names are not. - thread_name[MAX_TASK_NAME_LEN] = 0; + struct stat proc_pid_st; + struct stat proc_pid_status_st; + if (fstat(proc_pid_fd.get(), &proc_pid_st) != 0 || + fstat(proc_pid_status_fd.get(), &proc_pid_status_st) != 0) { + return false; } - // "info" will be null if the siginfo_t information was not available. - // Many signals don't have an address or a code. - char code_desc[32]; // ", code -6" - char addr_desc[32]; // ", fault addr 0x1234" - addr_desc[0] = code_desc[0] = 0; - if (info != nullptr) { - // For a rethrown signal, this si_code will be right and the one debuggerd shows will - // always be SI_TKILL. - __libc_format_buffer(code_desc, sizeof(code_desc), ", code %d", info->si_code); - if (has_address) { - __libc_format_buffer(addr_desc, sizeof(addr_desc), ", fault addr %p", info->si_addr); - } + // We can't figure out if a process is dumpable if its effective UID is root, but that's fine + // because being root bypasses the PR_SET_DUMPABLE check for ptrace. + if (proc_pid_st.st_uid == 0) { + return true; } - __libc_format_log(ANDROID_LOG_FATAL, "libc", "Fatal signal %d (%s)%s%s in tid %d (%s)", signum, - signal_name, code_desc, addr_desc, gettid(), thread_name); -} -/* - * Returns true if the handler for signal "signum" has SA_SIGINFO set. - */ -static bool have_siginfo(int signum) { - struct sigaction old_action, new_action; - - memset(&new_action, 0, sizeof(new_action)); - new_action.sa_handler = SIG_DFL; - new_action.sa_flags = SA_RESTART; - sigemptyset(&new_action.sa_mask); - - if (sigaction(signum, &new_action, &old_action) < 0) { - __libc_format_log(ANDROID_LOG_WARN, "libc", "Failed testing for SA_SIGINFO: %s", - strerror(errno)); + if (proc_pid_status_st.st_uid == 0) { return false; } - bool result = (old_action.sa_flags & SA_SIGINFO) != 0; - if (sigaction(signum, &old_action, nullptr) == -1) { - __libc_format_log(ANDROID_LOG_WARN, "libc", "Restore failed in test for SA_SIGINFO: %s", - strerror(errno)); - } - return result; + return true; } -static void send_debuggerd_packet(pid_t crashing_tid, pid_t pseudothread_tid) { - // Mutex to prevent multiple crashing threads from trying to talk - // to debuggerd at the same time. - static pthread_mutex_t crash_mutex = PTHREAD_MUTEX_INITIALIZER; - int ret = pthread_mutex_trylock(&crash_mutex); - if (ret != 0) { - if (ret == EBUSY) { - __libc_format_log(ANDROID_LOG_INFO, "libc", - "Another thread contacted debuggerd first; not contacting debuggerd."); - // This will never complete since the lock is never released. - pthread_mutex_lock(&crash_mutex); - } else { - __libc_format_log(ANDROID_LOG_INFO, "libc", "pthread_mutex_trylock failed: %s", strerror(ret)); +bool debuggerd_trigger_dump(pid_t pid, unique_fd output_fd, DebuggerdDumpType dump_type, + int timeout_ms) { + LOG(INFO) << "libdebuggerd_client: started dumping process " << pid; + unique_fd sockfd; + const auto end = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms); + auto set_timeout = [timeout_ms, &sockfd, &end]() { + if (timeout_ms <= 0) { + return true; } - return; - } - - int s = socket_abstract_client(SOCKET_NAME, SOCK_STREAM | SOCK_CLOEXEC); - if (s == -1) { - __libc_format_log(ANDROID_LOG_FATAL, "libc", "Unable to open connection to debuggerd: %s", - strerror(errno)); - return; - } - // debuggerd knows our pid from the credentials on the - // local socket but we need to tell it the tid of the crashing thread. - // debuggerd will be paranoid and verify that we sent a tid - // that's actually in our process. - debugger_msg_t msg; - msg.action = DEBUGGER_ACTION_CRASH; - msg.tid = crashing_tid; - msg.ignore_tid = pseudothread_tid; - msg.abort_msg_address = 0; - - if (g_callbacks.get_abort_message) { - msg.abort_msg_address = reinterpret_cast<uintptr_t>(g_callbacks.get_abort_message()); - } - - ret = TEMP_FAILURE_RETRY(write(s, &msg, sizeof(msg))); - if (ret == sizeof(msg)) { - char debuggerd_ack; - ret = TEMP_FAILURE_RETRY(read(s, &debuggerd_ack, 1)); - if (g_callbacks.post_dump) { - g_callbacks.post_dump(); + auto now = std::chrono::steady_clock::now(); + if (now > end) { + return false; } - } else { - // read or write failed -- broken connection? - __libc_format_log(ANDROID_LOG_FATAL, "libc", "Failed while talking to debuggerd: %s", - strerror(errno)); - } - close(s); -} + auto time_left = std::chrono::duration_cast<std::chrono::microseconds>(end - now); + auto seconds = std::chrono::duration_cast<std::chrono::seconds>(time_left); + auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(time_left - seconds); + struct timeval timeout = { + .tv_sec = static_cast<long>(seconds.count()), + .tv_usec = static_cast<long>(microseconds.count()), + }; -struct debugger_thread_info { - pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - pid_t crashing_tid; - pid_t pseudothread_tid; - int signal_number; - siginfo_t* info; -}; + if (setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) != 0) { + return false; + } + if (setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) != 0) { + return false; + } -// Logging and contacting debuggerd requires free file descriptors, which we might not have. -// Work around this by spawning a "thread" that shares its parent's address space, but not its file -// descriptor table, so that we can close random file descriptors without affecting the original -// process. Note that this doesn't go through pthread_create, so TLS is shared with the spawning -// process. -static void* pseudothread_stack; -static int debuggerd_dispatch_pseudothread(void* arg) { - debugger_thread_info* thread_info = static_cast<debugger_thread_info*>(arg); + return true; + }; - for (int i = 3; i < 1024; ++i) { - close(i); + if (!check_dumpable(pid)) { + dprintf(output_fd.get(), "target pid %d is not dumpable\n", pid); + return true; } - log_signal_summary(thread_info->signal_number, thread_info->info); - send_debuggerd_packet(thread_info->crashing_tid, thread_info->pseudothread_tid); - pthread_mutex_unlock(&thread_info->mutex); - return 0; -} - -/* - * Catches fatal signals so we can ask debuggerd to ptrace us before - * we crash. - */ -static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void*) { - // It's possible somebody cleared the SA_SIGINFO flag, which would mean - // our "info" arg holds an undefined value. - if (!have_siginfo(signal_number)) { - info = nullptr; + sockfd.reset(socket(AF_LOCAL, SOCK_SEQPACKET, 0)); + if (sockfd == -1) { + PLOG(ERROR) << "libdebugger_client: failed to create socket"; + return false; } - debugger_thread_info thread_info = { - .crashing_tid = gettid(), - .signal_number = signal_number, - .info = info - }; - - pthread_mutex_lock(&thread_info.mutex); - pid_t child_pid = clone(debuggerd_dispatch_pseudothread, pseudothread_stack, - CLONE_THREAD | CLONE_SIGHAND | CLONE_VM | CLONE_CHILD_SETTID, - &thread_info, nullptr, nullptr, &thread_info.pseudothread_tid); - - if (child_pid == -1) { - fatal("failed to spawn debuggerd dispatch thread: %s", strerror(errno)); + if (!set_timeout()) { + PLOG(ERROR) << "libdebugger_client: failed to set timeout"; + return false; } - // Wait for the child to finish and unlock the mutex. - // This relies on bionic behavior that isn't guaranteed by the standard. - pthread_mutex_lock(&thread_info.mutex); - - - // We need to return from the signal handler so that debuggerd can dump the - // thread that crashed, but returning here does not guarantee that the signal - // will be thrown again, even for SIGSEGV and friends, since the signal could - // have been sent manually. Resend the signal with rt_tgsigqueueinfo(2) to - // preserve the SA_SIGINFO contents. - signal(signal_number, SIG_DFL); + if (socket_local_client_connect(sockfd.get(), kTombstonedInterceptSocketName, + ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_SEQPACKET) == -1) { + PLOG(ERROR) << "libdebuggerd_client: failed to connect to tombstoned"; + return false; + } - struct siginfo si; - if (!info) { - memset(&si, 0, sizeof(si)); - si.si_code = SI_USER; - si.si_pid = getpid(); - si.si_uid = getuid(); - info = &si; - } else if (info->si_code >= 0 || info->si_code == SI_TKILL) { - // rt_tgsigqueueinfo(2)'s documentation appears to be incorrect on kernels - // that contain commit 66dd34a (3.9+). The manpage claims to only allow - // negative si_code values that are not SI_TKILL, but 66dd34a changed the - // check to allow all si_code values in calls coming from inside the house. + InterceptRequest req = {.pid = pid }; + if (!set_timeout()) { + PLOG(ERROR) << "libdebugger_client: failed to set timeout"; } - int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), gettid(), signal_number, info); - if (rc != 0) { - fatal("failed to resend signal during crash: %s", strerror(errno)); + if (send_fd(sockfd.get(), &req, sizeof(req), std::move(output_fd)) != sizeof(req)) { + PLOG(ERROR) << "libdebuggerd_client: failed to send output fd to tombstoned"; + return false; } -} -void debuggerd_init(debuggerd_callbacks_t* callbacks) { - if (callbacks) { - g_callbacks = *callbacks; + bool backtrace = dump_type == kDebuggerdBacktrace; + send_signal(pid, backtrace); + + if (!set_timeout()) { + PLOG(ERROR) << "libdebugger_client: failed to set timeout"; } - void* thread_stack_allocation = - mmap(nullptr, PAGE_SIZE * 3, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (thread_stack_allocation == MAP_FAILED) { - fatal("failed to allocate debuggerd thread stack"); + InterceptResponse response; + ssize_t rc = TEMP_FAILURE_RETRY(recv(sockfd.get(), &response, sizeof(response), MSG_TRUNC)); + if (rc == 0) { + LOG(ERROR) << "libdebuggerd_client: failed to read response from tombstoned: timeout reached?"; + return false; + } else if (rc != sizeof(response)) { + LOG(ERROR) + << "libdebuggerd_client: received packet of unexpected length from tombstoned: expected " + << sizeof(response) << ", received " << rc; + return false; } - char* stack = static_cast<char*>(thread_stack_allocation) + PAGE_SIZE; - if (mprotect(stack, PAGE_SIZE, PROT_READ | PROT_WRITE) != 0) { - fatal("failed to mprotect debuggerd thread stack"); + if (response.success != 1) { + response.error_message[sizeof(response.error_message) - 1] = '\0'; + LOG(ERROR) << "libdebuggerd_client: tombstoned reported failure: " << response.error_message; } - // Stack grows negatively, set it to the last byte in the page... - stack = (stack + PAGE_SIZE - 1); - // and align it. - stack -= 15; - pseudothread_stack = stack; + LOG(INFO) << "libdebuggerd_client: done dumping process " << pid; - struct sigaction action; - memset(&action, 0, sizeof(action)); - sigemptyset(&action.sa_mask); - action.sa_sigaction = debuggerd_signal_handler; - action.sa_flags = SA_RESTART | SA_SIGINFO; + return true; +} - // Use the alternate signal stack if available so we can catch stack overflows. - action.sa_flags |= SA_ONSTACK; +int dump_backtrace_to_file(pid_t tid, int fd) { + return dump_backtrace_to_file_timeout(tid, fd, 0); +} - sigaction(SIGABRT, &action, nullptr); - sigaction(SIGBUS, &action, nullptr); - sigaction(SIGFPE, &action, nullptr); - sigaction(SIGILL, &action, nullptr); - sigaction(SIGSEGV, &action, nullptr); -#if defined(SIGSTKFLT) - sigaction(SIGSTKFLT, &action, nullptr); -#endif - sigaction(SIGTRAP, &action, nullptr); +int dump_backtrace_to_file_timeout(pid_t tid, int fd, int timeout_secs) { + android::base::unique_fd copy(dup(fd)); + if (copy == -1) { + return -1; + } + int timeout_ms = timeout_secs > 0 ? timeout_secs * 1000 : 0; + return debuggerd_trigger_dump(tid, std::move(copy), kDebuggerdBacktrace, timeout_ms) ? 0 : -1; } |