diff options
Diffstat (limited to 'bootstat/bootstat.cpp')
| -rw-r--r-- | bootstat/bootstat.cpp | 373 |
1 files changed, 371 insertions, 2 deletions
diff --git a/bootstat/bootstat.cpp b/bootstat/bootstat.cpp index bd611f0b9..a0a9307a9 100644 --- a/bootstat/bootstat.cpp +++ b/bootstat/bootstat.cpp @@ -20,6 +20,7 @@ #include <getopt.h> #include <unistd.h> +#include <sys/klog.h> #include <chrono> #include <cmath> @@ -32,11 +33,14 @@ #include <vector> #include <android-base/chrono_utils.h> +#include <android-base/file.h> #include <android-base/logging.h> #include <android-base/parseint.h> #include <android-base/strings.h> #include <android/log.h> +#include <cutils/android_reboot.h> #include <cutils/properties.h> +#include <log/logcat.h> #include <metricslogger/metrics_logger.h> #include "boot_event_record_store.h" @@ -118,6 +122,14 @@ std::string GetProperty(const char* key) { return std::string(&temp[0], len); } +void SetProperty(const char* key, const std::string& val) { + property_set(key, val.c_str()); +} + +void SetProperty(const char* key, const char* val) { + property_set(key, val); +} + constexpr int32_t kUnknownBootReason = 1; // A mapping from boot reason string, as read from the ro.boot.bootreason @@ -175,7 +187,23 @@ const std::map<std::string, int32_t> kBootReasonMap = { {"reboot,cold", 48}, {"reboot,recovery", 49}, {"thermal_shutdown", 50}, - {"s3_wakeup", 51} + {"s3_wakeup", 51}, + {"kernel_panic,sysrq", 52}, + {"kernel_panic,NULL", 53}, + {"kernel_panic,BUG", 54}, + {"bootloader", 55}, + {"cold", 56}, + {"hard", 57}, + {"warm", 58}, + {"recovery", 59}, + {"thermal-shutdown", 60}, + {"shutdown,thermal", 61}, + {"shutdown,battery", 62}, + {"reboot,ota", 63}, + {"reboot,factory_reset", 64}, + {"reboot,", 65}, + {"reboot,shell", 66}, + {"reboot,adb", 67}, }; // Converts a string value representing the reason the system booted to an @@ -191,6 +219,323 @@ int32_t BootReasonStrToEnum(const std::string& boot_reason) { return kUnknownBootReason; } +// Canonical list of supported primary reboot reasons. +const std::vector<const std::string> knownReasons = { + // kernel + "watchdog", + "kernel_panic", + // strong + "recovery", // Should not happen from ro.boot.bootreason + "bootloader", // Should not happen from ro.boot.bootreason + // blunt + "cold", + "hard", + "warm", + "shutdown", // Can not happen from ro.boot.bootreason + "reboot", // Default catch-all for anything unknown +}; + +// Returns true if the supplied reason prefix is considered detailed enough. +bool isStrongRebootReason(const std::string& r) { + for (auto &s : knownReasons) { + if (s == "cold") break; + // Prefix defined as terminated by a nul or comma (,). + if (android::base::StartsWith(r, s.c_str()) && + ((r.length() == s.length()) || (r[s.length()] == ','))) { + return true; + } + } + return false; +} + +// Returns true if the supplied reason prefix is associated with the kernel. +bool isKernelRebootReason(const std::string& r) { + for (auto &s : knownReasons) { + if (s == "recovery") break; + // Prefix defined as terminated by a nul or comma (,). + if (android::base::StartsWith(r, s.c_str()) && + ((r.length() == s.length()) || (r[s.length()] == ','))) { + return true; + } + } + return false; +} + +// Returns true if the supplied reason prefix is considered known. +bool isKnownRebootReason(const std::string& r) { + for (auto &s : knownReasons) { + // Prefix defined as terminated by a nul or comma (,). + if (android::base::StartsWith(r, s.c_str()) && + ((r.length() == s.length()) || (r[s.length()] == ','))) { + return true; + } + } + return false; +} + +// If the reboot reason should be improved, report true if is too blunt. +bool isBluntRebootReason(const std::string& r) { + if (isStrongRebootReason(r)) return false; + + if (!isKnownRebootReason(r)) return true; // Can not support unknown as detail + + size_t pos = 0; + while ((pos = r.find(',', pos)) != std::string::npos) { + ++pos; + std::string next(r.substr(pos)); + if (next.length() == 0) break; + if (next[0] == ',') continue; + if (!isKnownRebootReason(next)) return false; // Unknown subreason is good. + if (isStrongRebootReason(next)) return false; // eg: reboot,reboot + } + return true; +} + +// std::transform Helper callback functions: +// Converts a string value representing the reason the system booted to a +// string complying with Android system standard reason. +char tounderline(char c) { return ::isblank(c) ? '_' : c; } +char toprintable(char c) { return ::isprint(c) ? c : '?'; } + +const char system_reboot_reason_property[] = "sys.boot.reason"; +const char last_reboot_reason_property[] = LAST_REBOOT_REASON_PROPERTY; +const char bootloader_reboot_reason_property[] = "ro.boot.bootreason"; + +// Scrub, Sanitize, Standardize and Enhance the boot reason string supplied. +std::string BootReasonStrToReason(const std::string& boot_reason) { + std::string ret(GetProperty(system_reboot_reason_property)); + std::string reason(boot_reason); + // If sys.boot.reason == ro.boot.bootreason, let's re-evaluate + if (reason == ret) ret = ""; + + // Cleanup boot_reason regarding acceptable character set + std::transform(reason.begin(), reason.end(), reason.begin(), ::tolower); + std::transform(reason.begin(), reason.end(), reason.begin(), tounderline); + std::transform(reason.begin(), reason.end(), reason.begin(), toprintable); + + // Is the current system boot reason sys.boot.reason valid? + if (!isKnownRebootReason(ret)) ret = ""; + + if (ret == "") { + // Is the bootloader boot reason ro.boot.bootreason known? + std::vector<std::string> words(android::base::Split(reason, ",_-")); + for (auto &s : knownReasons) { + std::string blunt; + for (auto &r : words) { + if (r == s) { + if (isBluntRebootReason(s)) { + blunt = s; + } else { + ret = s; + break; + } + } + } + if (ret == "") ret = blunt; + if (ret != "") break; + } + } + + if (ret == "") { + // A series of checks to take some officially unsupported reasons + // reported by the bootloader and find some logical and canonical + // sense. In an ideal world, we would require those bootloaders + // to behave and follow our standards. + static const std::vector<std::pair<const std::string, const std::string>> aliasReasons = { + {"watchdog", "wdog"}, + {"cold,powerkey", "powerkey"}, + {"kernel_panic", "panic"}, + {"shutdown,thermal", "thermal"}, + {"warm,s3_wakeup", "s3_wakeup"}, + {"hard,hw_reset", "hw_reset"}, + {"bootloader", ""}, + }; + + // Either the primary or alias is found _somewhere_ in the reason string. + for (auto &s : aliasReasons) { + if (reason.find(s.first) != std::string::npos) { + ret = s.first; + break; + } + if (s.second.size() && (reason.find(s.second) != std::string::npos)) { + ret = s.first; + break; + } + } + } + + // If watchdog is the reason, see if there is a security angle? + if (ret == "watchdog") { + if (reason.find("sec") != std::string::npos) { + ret += ",security"; + } + } + + // Check the other reason resources if the reason is still blunt. + if (isBluntRebootReason(ret)) { + // Check to see if last klog has some refinement hints. + std::string content; + if (!android::base::ReadFileToString("/sys/fs/pstore/console-ramoops-0", + &content)) { + android::base::ReadFileToString("/sys/fs/pstore/console-ramoops", + &content); + } + + // The toybox reboot command used directly (unlikely)? But also + // catches init's response to the Android's more controlled reboot command. + if (content.rfind("reboot: Power down") != std::string::npos) { + ret = "shutdown"; // Still too blunt, but more accurate. + // ToDo: init should record the shutdown reason to kernel messages ala: + // init: shutdown system with command 'last_reboot_reason' + // so that if pstore has persistence we can get some details + // that could be missing in last_reboot_reason_property. + } + + static const char cmd[] = "reboot: Restarting system with command '"; + size_t pos = content.rfind(cmd); + if (pos != std::string::npos) { + pos += strlen(cmd); + std::string subReason(content.substr(pos)); + pos = subReason.find('\''); + if (pos != std::string::npos) subReason.erase(pos); + if (subReason != "") { // Will not land "reboot" as that is too blunt. + if (isKernelRebootReason(subReason)) { + ret = "reboot," + subReason; // User space can't talk kernel reasons. + } else { + ret = subReason; + } + } + } + + // Check for kernel panics, but allowed to override reboot command. + if (content.rfind("sysrq: SysRq : Trigger a crash") != std::string::npos) { + // Can not happen, except on userdebug, during testing/debugging. + ret = "kernel_panic,sysrq"; + } else if (content.rfind( + "Unable to handle kernel NULL pointer dereference at virtual address") + != std::string::npos) { + ret = "kernel_panic,NULL"; + } else if (content.rfind("Kernel BUG at ") != std::string::npos) { + ret = "kernel_panic,BUG"; + } else if ((content.rfind("Power held for ") != std::string::npos) || + (content.rfind("charger: [") != std::string::npos)) { + ret = "cold"; + } + + // The following battery test should migrate to a default system health HAL + + // Let us not worry if the reboot command was issued, for the cases of + // reboot -p, reboot <no reason>, reboot cold, reboot warm and reboot hard. + // Same for bootloader and ro.boot.bootreasons of this set, but a dead + // battery could conceivably lead to these, so worthy of override. + if (isBluntRebootReason(ret)) { + // Heuristic to determine if shutdown possibly because of a dead battery? + // Really a hail-mary pass to find it in last klog content ... + static const int battery_dead_threshold = 2; // percent + static const char battery[] = "healthd: battery l="; + pos = content.rfind(battery); // last one + if (pos != std::string::npos) { + int level = atoi(content.substr(pos + strlen(battery)).c_str()); + LOG(INFO) << "Battery level at shutdown " << level << "%"; + if (level <= battery_dead_threshold) { + ret = "shutdown,battery"; + } + } else { // Most likely + // Content buffer no longer will have console data. Beware if more + // checks added below, that depend on parsing console content. + content = ""; + + LOG(DEBUG) << "Can not find last low battery in last console messages"; + android_logcat_context ctx = create_android_logcat(); + FILE *fp = android_logcat_popen(&ctx, "logcat -b kernel -v brief -d"); + if (fp != nullptr) { + android::base::ReadFdToString(fileno(fp), &content); + } + android_logcat_pclose(&ctx, fp); + android_logcat_destroy(&ctx); + static const char logcat_battery[] = "W/healthd ( 0): battery l="; + const char* match = logcat_battery; + + if (content == "") { + // Service logd.klog not running, go to smaller buffer in the kernel. + int rc = klogctl(KLOG_SIZE_BUFFER, nullptr, 0); + if (rc > 0) { + ssize_t len = rc + 1024; // 1K Margin should it grow between calls. + std::unique_ptr<char[]> buf(new char[len]); + rc = klogctl(KLOG_READ_ALL, buf.get(), len); + if (rc < len) { + len = rc + 1; + } + buf[--len] = '\0'; + content = buf.get(); + } + match = battery; + } + + pos = content.find(match); // The first one it finds. + if (pos != std::string::npos) { + pos += strlen(match); + int level = atoi(content.substr(pos).c_str()); + LOG(INFO) << "Battery level at startup " << level << "%"; + if (level <= battery_dead_threshold) { + ret = "shutdown,battery"; + } + } else { + LOG(DEBUG) << "Can not find first battery level in dmesg or logcat"; + } + } + } + + // Is there a controlled shutdown hint in last_reboot_reason_property? + if (isBluntRebootReason(ret)) { + // Content buffer no longer will have console data. Beware if more + // checks added below, that depend on parsing console content. + content = GetProperty(last_reboot_reason_property); + + // String is either "reboot,<reason>" or "shutdown,<reason>". + // We will set if default reasons, only override with detail if thermal. + if (!isBluntRebootReason(content)) { + // Ok, we want it, let's squash it if secondReason is known. + pos = content.find(','); + if (pos != std::string::npos) { + ++pos; + std::string secondReason(content.substr(pos)); + ret = isKnownRebootReason(secondReason) ? secondReason : content; + } else { + ret = content; + } + } + } + + // Other System Health HAL reasons? + + // ToDo: /proc/sys/kernel/boot_reason needs a HAL interface to + // possibly offer hardware-specific clues from the PMIC. + } + + // If unknown left over from above, make it "reboot,<boot_reason>" + if (ret == "") { + ret = "reboot"; + if (android::base::StartsWith(reason, "reboot")) { + reason = reason.substr(strlen("reboot")); + while (reason[0] == ',') { + reason = reason.substr(1); + } + } + if (reason != "") { + ret += ","; + ret += reason; + } + } + + LOG(INFO) << "Canonical boot reason: " << ret; + if (isKernelRebootReason(ret) && (GetProperty(last_reboot_reason_property) != "")) { + // Rewrite as it must be old news, kernel reasons trump user space. + SetProperty(last_reboot_reason_property, ret); + } + return ret; +} + // Returns the appropriate metric key prefix for the boot_complete metric such // that boot metrics after a system update are labeled as ota_boot_complete; // otherwise, they are labeled as boot_complete. This method encapsulates the @@ -212,9 +557,19 @@ std::string CalculateBootCompletePrefix() { if (!boot_event_store.GetBootEvent(kBuildDateKey, &record)) { boot_complete_prefix = "factory_reset_" + boot_complete_prefix; boot_event_store.AddBootEventWithValue(kBuildDateKey, build_date); + LOG(INFO) << "Canonical boot reason: " << "reboot,factory_reset"; + SetProperty(system_reboot_reason_property, "reboot,factory_reset"); + if (GetProperty(bootloader_reboot_reason_property) == "") { + SetProperty(bootloader_reboot_reason_property, "reboot,factory_reset"); + } } else if (build_date != record.second) { boot_complete_prefix = "ota_" + boot_complete_prefix; boot_event_store.AddBootEventWithValue(kBuildDateKey, build_date); + LOG(INFO) << "Canonical boot reason: " << "reboot,ota"; + SetProperty(system_reboot_reason_property, "reboot,ota"); + if (GetProperty(bootloader_reboot_reason_property) == "") { + SetProperty(bootloader_reboot_reason_property, "reboot,ota"); + } } return boot_complete_prefix; @@ -358,9 +713,23 @@ void RecordBootComplete() { // Records the boot_reason metric by querying the ro.boot.bootreason system // property. void RecordBootReason() { - int32_t boot_reason = BootReasonStrToEnum(GetProperty("ro.boot.bootreason")); + const std::string reason(GetProperty(bootloader_reboot_reason_property)); + + // Log the raw bootloader_boot_reason property value. + int32_t boot_reason = BootReasonStrToEnum(reason); BootEventRecordStore boot_event_store; boot_event_store.AddBootEventWithValue("boot_reason", boot_reason); + + // Log the scrubbed system_boot_reason. + const std::string system_reason(BootReasonStrToReason(reason)); + int32_t system_boot_reason = BootReasonStrToEnum(system_reason); + boot_event_store.AddBootEventWithValue("system_boot_reason", system_boot_reason); + + // Record the scrubbed system_boot_reason to the property + SetProperty(system_reboot_reason_property, system_reason); + if (reason == "") { + SetProperty(bootloader_reboot_reason_property, system_reason); + } } // Records two metrics related to the user resetting a device: the time at |
