Source code for crash.types.slab

#!/usr/bin/python3
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

from typing import TypeVar, Union, Tuple, Iterable, Dict, Optional, Set
from typing import ValuesView

import sys
import traceback

from crash.util import container_of, find_member_variant
from crash.util.symbols import Types, TypeCallbacks, SymbolCallbacks
from crash.types.percpu import get_percpu_var
from crash.types.list import list_for_each, list_for_each_entry, ListError
from crash.types.page import page_from_gdb_obj, page_from_addr, Page
from crash.types.node import for_each_nid
from crash.types.cpu import for_each_online_cpu
from crash.types.node import numa_node_id

import gdb

AC_PERCPU = "percpu"
AC_SHARED = "shared"
AC_ALIEN = "alien"

ArrayCacheEntry = Dict[str, Union[int, str]]

slab_partial = 0
slab_full = 1
slab_free = 2

slab_list_name = {0: "partial", 1: "full", 2: "free"}
slab_list_fullname = {0: "slabs_partial", 1: "slabs_full", 2: "slabs_free"}

BUFCTL_END = ~0 & 0xffffffff

[docs]def col_error(msg: str) -> str: return "\033[1;31;40m {}\033[0;37;40m ".format(msg)
[docs]def col_bold(msg: str) -> str: return "\033[1;37;40m {}\033[0;37;40m ".format(msg)
types = Types(['kmem_cache', 'struct kmem_cache']) SlabType = TypeVar('SlabType', bound='Slab') KmemCacheType = TypeVar('KmemCacheType', bound='KmemCache')
[docs]class Slab: slab_list_head: str = 'list' page_slab: bool = False real_slab_type: gdb.Type bufctl_type: gdb.Type
[docs] @classmethod def check_page_type(cls, gdbtype: gdb.Type) -> None: if cls.page_slab is False: cls.page_slab = True cls.real_slab_type = gdbtype cls.slab_list_head = 'lru'
[docs] @classmethod def check_slab_type(cls, gdbtype: gdb.Type) -> None: cls.page_slab = False cls.real_slab_type = gdbtype cls.slab_list_head = 'list'
[docs] @classmethod def check_bufctl_type(cls, gdbtype: gdb.Type) -> None: cls.bufctl_type = gdbtype
[docs] @classmethod def from_addr(cls, slab_addr: int, kmem_cache: Union[int, 'KmemCache']) -> 'Slab': if not isinstance(kmem_cache, KmemCache): kmem_cache = kmem_cache_from_addr(kmem_cache) slab_struct = gdb.Value(slab_addr).cast(cls.real_slab_type.pointer()).dereference() return cls(slab_struct, kmem_cache)
[docs] @classmethod def from_page(cls, page: Page) -> 'Slab': kmem_cache_addr = int(page.get_slab_cache()) kmem_cache = kmem_cache_from_addr(kmem_cache_addr) if kmem_cache is None: raise RuntimeError("No kmem cache found for page") if cls.page_slab: return cls(page.gdb_obj, kmem_cache) slab_addr = int(page.get_slab_page()) return cls.from_addr(slab_addr, kmem_cache)
[docs] @classmethod def from_list_head(cls, list_head: gdb.Value, kmem_cache: 'KmemCache') -> 'Slab': gdb_obj = container_of(list_head, cls.real_slab_type, cls.slab_list_head) return cls(gdb_obj, kmem_cache)
def __init__(self, gdb_obj: gdb.Value, kmem_cache: 'KmemCache', error: bool = False) -> None: self.error = error self.gdb_obj = gdb_obj self.kmem_cache = kmem_cache self.free: Set[int] = set() self.misplaced_list: Optional[str] self.misplaced_error: Optional[str] self.misplaced_list = None self.misplaced_error = None if error: return if self.page_slab: self.inuse = int(gdb_obj["active"]) self.page = page_from_gdb_obj(gdb_obj) else: self.inuse = int(gdb_obj["inuse"]) self.s_mem = int(gdb_obj["s_mem"]) def __add_free_obj_by_idx(self, idx: int) -> bool: objs_per_slab = self.kmem_cache.objs_per_slab bufsize = self.kmem_cache.buffer_size if idx >= objs_per_slab: self.__error(": free object index %d overflows %d" % (idx, objs_per_slab)) return False obj_addr = self.s_mem + idx * bufsize if obj_addr in self.free: self.__error(": object %x duplicated on freelist" % obj_addr) return False self.free.add(obj_addr) return True def __populate_free(self) -> None: if self.free: return objs_per_slab = self.kmem_cache.objs_per_slab if self.page_slab: page = self.gdb_obj freelist = page["freelist"].cast(self.bufctl_type.pointer()) for i in range(self.inuse, objs_per_slab): obj_idx = int(freelist[i]) self.__add_free_obj_by_idx(obj_idx) # XXX not generally useful and reliable if False and objs_per_slab > 1: all_zeroes = True for i in range(objs_per_slab): obj_idx = int(freelist[i]) if obj_idx != 0: all_zeroes = False if all_zeroes: self.__error(": freelist full of zeroes") else: bufctl = self.gdb_obj.address[1].cast(self.bufctl_type).address f = int(self.gdb_obj["free"]) while f != BUFCTL_END: if not self.__add_free_obj_by_idx(f): self.__error(": bufctl cycle detected") break f = int(bufctl[f])
[docs] def find_obj(self, addr: int) -> Union[int, None]: bufsize = self.kmem_cache.buffer_size objs_per_slab = self.kmem_cache.objs_per_slab if int(addr) < self.s_mem: return None idx = (int(addr) - self.s_mem) // bufsize if idx >= objs_per_slab: return None return self.s_mem + (idx * bufsize)
[docs] def contains_obj(self, addr: int) -> Tuple[bool, int, Optional[ArrayCacheEntry]]: obj_addr = self.find_obj(addr) if not obj_addr: return (False, 0, None) self.__populate_free() if obj_addr in self.free: return (False, int(obj_addr), None) ac = self.kmem_cache.get_array_caches() if obj_addr in ac: return (False, int(obj_addr), ac[obj_addr]) return (True, int(obj_addr), None)
def __error(self, msg: str, misplaced: bool = False) -> None: msg = col_error("cache %s slab %x%s" % (self.kmem_cache.name, int(self.gdb_obj.address), msg)) self.error = True if misplaced: self.misplaced_error = msg else: print(msg) def __free_error(self, list_name: str) -> None: self.misplaced_list = list_name self.__error(": is on list %s, but has %d of %d objects allocated" % (list_name, self.inuse, self.kmem_cache.objs_per_slab), misplaced=True)
[docs] def get_objects(self) -> Iterable[int]: bufsize = self.kmem_cache.buffer_size obj = self.s_mem # pylint: disable=unused-variable for i in range(self.kmem_cache.objs_per_slab): yield obj obj += bufsize
[docs] def get_allocated_objects(self) -> Iterable[int]: for obj in self.get_objects(): c = self.contains_obj(obj) if c[0]: yield obj
[docs] def check(self, slabtype: int, nid: int) -> int: self.__populate_free() num_free = len(self.free) max_free = self.kmem_cache.objs_per_slab if self.kmem_cache.off_slab and not Slab.page_slab: struct_slab_slab = slab_from_obj_addr(int(self.gdb_obj.address)) if not struct_slab_slab: self.__error(": OFF_SLAB struct slab is not a slab object itself") else: struct_slab_cache = struct_slab_slab.kmem_cache.name if not self.kmem_cache.off_slab_cache: if struct_slab_cache not in ("size-64", "size-128"): self.__error(": OFF_SLAB struct slab is in a wrong cache %s" % struct_slab_cache) else: self.kmem_cache.off_slab_cache = struct_slab_cache elif struct_slab_cache != self.kmem_cache.off_slab_cache: self.__error(": OFF_SLAB struct slab is in a wrong cache %s" % struct_slab_cache) addr = int(self.gdb_obj.address) struct_slab_obj = struct_slab_slab.contains_obj(addr) if not struct_slab_obj[0]: self.__error(": OFF_SLAB struct slab is not allocated") print(struct_slab_obj) elif struct_slab_obj[1] != int(self.gdb_obj.address): self.__error(": OFF_SLAB struct slab at wrong offset{}" .format(int(self.gdb_obj.address) - struct_slab_obj[1])) if self.inuse + num_free != max_free: self.__error(": inuse=%d free=%d adds up to %d (should be %d)" % (self.inuse, num_free, self.inuse + num_free, max_free)) if slabtype == slab_free: if num_free != max_free: self.__free_error("slab_free") elif slabtype == slab_partial: if num_free in (0, max_free): self.__free_error("slab_partial") elif slabtype == slab_full: if num_free > 0: self.__free_error("slab_full") if self.page_slab: slab_nid = self.page.get_nid() if nid != slab_nid: self.__error(": slab is on nid %d instead of %d" % (slab_nid, nid)) print("free objects %d" % num_free) ac = self.kmem_cache.get_array_caches() last_page_addr = 0 for obj in self.get_objects(): if obj in self.free and obj in ac: self.__error(": obj %x is marked as free but in array cache:" % obj) print(ac[obj]) try: page = page_from_addr(obj).compound_head() except gdb.NotAvailableError: self.__error(": failed to get page for object %x" % obj) continue if int(page.gdb_obj.address) == last_page_addr: continue last_page_addr = int(page.gdb_obj.address) if page.get_nid() != nid: self.__error(": obj %x is on nid %d instead of %d" % (obj, page.get_nid(), nid)) if not page.is_slab(): self.__error(": obj %x is not on PageSlab page" % obj) kmem_cache_addr = int(page.get_slab_cache()) if kmem_cache_addr != int(self.kmem_cache.gdb_obj.address): self.__error(": obj %x is on page where pointer to kmem_cache points to %x instead of %x" % (obj, kmem_cache_addr, int(self.kmem_cache.gdb_obj.address))) if self.page_slab: continue slab_addr = int(page.get_slab_page()) if slab_addr != self.gdb_obj.address: self.__error(": obj %x is on page where pointer to slab wrongly points to %x" % (obj, slab_addr)) return num_free
[docs]class KmemCache: buffer_size_name = None nodelists_name = None percpu_name = None percpu_cache = None head_name = "list" alien_cache_type_exists = False
[docs] @classmethod def check_kmem_cache_type(cls, gdbtype: gdb.Type) -> None: cls.buffer_size_name = find_member_variant(gdbtype, ['buffer_size', 'size']) cls.nodelists_name = find_member_variant(gdbtype, ['nodelists', 'node']) cls.percpu_name = find_member_variant(gdbtype, ['cpu_cache', 'array']) cls.percpu_cache = bool(cls.percpu_name == 'cpu_cache') cls.head_name = find_member_variant(gdbtype, ['next', 'list'])
[docs] @classmethod # pylint: disable=unused-argument def setup_alien_cache_type(cls, gdbtype: gdb.Type) -> None: cls.alien_cache_type_exists = True
def __init__(self, name: str, gdb_obj: gdb.Value) -> None: self.name = name self.gdb_obj = gdb_obj self.objs_per_slab = int(gdb_obj["num"]) self.buffer_size = int(gdb_obj[KmemCache.buffer_size_name]) if int(gdb_obj["flags"]) & 0x80000000: self.off_slab = True self.off_slab_cache: Optional[str] self.off_slab_cache = None else: self.off_slab = False self.array_caches: Dict[int, Dict] = dict() def __get_nodelist(self, node: int) -> gdb.Value: return self.gdb_obj[KmemCache.nodelists_name][node] def __get_nodelists(self) -> Iterable[Tuple[int, gdb.Value]]: for nid in for_each_nid(): node = self.__get_nodelist(nid) if int(node) == 0: continue yield (nid, node.dereference())
[docs] @staticmethod def all_find_obj(addr: int) -> Optional[Tuple[bool, int, Optional[ArrayCacheEntry]]]: slab = slab_from_obj_addr(addr) if not slab: return None return slab.contains_obj(addr)
def __fill_array_cache(self, acache: gdb.Value, ac_type: str, nid_src: int, nid_tgt: int) -> None: avail = int(acache["avail"]) # TODO check avail > limit if avail == 0: return cache_dict = {"ac_type" : ac_type, "nid_src" : nid_src, "nid_tgt" : nid_tgt} # print(cache_dict) if ac_type == AC_PERCPU: nid_tgt = numa_node_id(nid_tgt) for i in range(avail): ptr = int(acache["entry"][i]) # print(hex(ptr)) if ptr in self.array_caches: print(col_error("WARNING: array cache duplicity detected!")) else: self.array_caches[ptr] = cache_dict page = page_from_addr(ptr) obj_nid = page.get_nid() if obj_nid != nid_tgt: print(col_error("Object {:#x} in cache {} is on wrong nid {} instead of {}" .format(ptr, cache_dict, obj_nid, nid_tgt))) def __fill_alien_caches(self, node: gdb.Value, nid_src: int) -> None: alien_cache = node["alien"] # TODO check that this only happens for single-node systems? if int(alien_cache) == 0: return for nid in for_each_nid(): array = alien_cache[nid].dereference() # TODO: limit should prevent this? if array.address == 0: continue if self.alien_cache_type_exists: array = array["ac"] # A node cannot have alien cache on the same node, but some # kernels (xen) seem to have a non-null pointer there anyway if nid_src == nid: continue self.__fill_array_cache(array, AC_ALIEN, nid_src, nid) def __fill_percpu_caches(self) -> None: cpu_cache = self.gdb_obj[KmemCache.percpu_name] for cpu in for_each_online_cpu(): if KmemCache.percpu_cache: array = get_percpu_var(cpu_cache, cpu) else: array = cpu_cache[cpu].dereference() self.__fill_array_cache(array, AC_PERCPU, -1, cpu) def __fill_all_array_caches(self) -> None: self.array_caches = dict() self.__fill_percpu_caches() # TODO check and report collisions for (nid, node) in self.__get_nodelists(): shared_cache = node["shared"] if int(shared_cache) != 0: self.__fill_array_cache(shared_cache.dereference(), AC_SHARED, nid, nid) self.__fill_alien_caches(node, nid)
[docs] def get_array_caches(self) -> Dict[int, ArrayCacheEntry]: if not self.array_caches: self.__fill_all_array_caches() return self.array_caches
def __get_allocated_objects(self, node: gdb.Value, slabtype: int) -> Iterable[int]: for slab in self.get_slabs_of_type(node, slabtype): for obj in slab.get_allocated_objects(): yield obj
[docs] def get_allocated_objects(self) -> Iterable[int]: # pylint: disable=unused-variable for (nid, node) in self.__get_nodelists(): for obj in self.__get_allocated_objects(node, slab_partial): yield obj for obj in self.__get_allocated_objects(node, slab_full): yield obj
[docs] def get_slabs_of_type(self, node: gdb.Value, slabtype: int, reverse: bool = False, exact_cycles: bool = False) -> Iterable[Slab]: wrong_list_nodes = dict() for stype in range(3): if stype != slabtype: wrong_list_nodes[int(node[slab_list_fullname[stype]].address)] = stype slab_list = node[slab_list_fullname[slabtype]] for list_head in list_for_each(slab_list, reverse=reverse, exact_cycles=exact_cycles): try: if int(list_head) in wrong_list_nodes.keys(): wrong_type = wrong_list_nodes[int(list_head)] print(col_error("Encountered head of {} slab list while traversing {} slab list, skipping" .format(slab_list_name[wrong_type], slab_list_name[slabtype]))) continue slab = Slab.from_list_head(list_head, self) except gdb.NotAvailableError: traceback.print_exc() print("failed to initialize slab object from list_head {:#x}: {}" .format(int(list_head), sys.exc_info()[0])) continue yield slab
def __check_slab(self, slab: Slab, slabtype: int, nid: int, errors: Dict) -> int: addr = int(slab.gdb_obj.address) free = 0 if slab.error is False: free = slab.check(slabtype, nid) if slab.misplaced_error is None and errors['num_misplaced'] > 0: if errors['num_misplaced'] > 0: print(col_error("{} slab objects were misplaced, printing the last:" .format(errors['num_misplaced']))) print(errors['last_misplaced']) errors['num_misplaced'] = 0 errors['last_misplaced'] = None if slab.error is False: errors['num_ok'] += 1 errors['last_ok'] = addr if not errors['first_ok']: errors['first_ok'] = addr else: if errors['num_ok'] > 0: print("{} slab objects were ok between {:#x} and {:#x}" .format(errors['num_ok'], errors['first_ok'], errors['last_ok'])) errors['num_ok'] = 0 errors['first_ok'] = None errors['last_ok'] = None if slab.misplaced_error is not None: if errors['num_misplaced'] == 0: print(slab.misplaced_error) errors['num_misplaced'] += 1 errors['last_misplaced'] = slab.misplaced_error return free def ___check_slabs(self, node: gdb.Value, slabtype: int, nid: int, reverse: bool = False) -> Tuple[bool, int, int]: slabs = 0 free = 0 check_ok = True errors = {'first_ok': None, 'last_ok': None, 'num_ok': 0, 'first_misplaced': None, 'last_misplaced': None, 'num_misplaced': 0} try: for slab in self.get_slabs_of_type(node, slabtype, reverse, exact_cycles=True): try: free += self.__check_slab(slab, slabtype, nid, errors) except gdb.NotAvailableError as e: print(col_error("Exception when checking slab {:#x}:{}" .format(int(slab.gdb_obj.address), e))) traceback.print_exc() slabs += 1 except (gdb.NotAvailableError, ListError) as e: print(col_error("Unrecoverable error when traversing {} slab list: {}" .format(slab_list_name[slabtype], e))) check_ok = False count = errors['num_ok'] if count: print("{} slab objects were ok between {:#x} and {:#x}" .format(errors['num_ok'], errors['first_ok'], errors['last_ok'])) count = errors['num_misplaced'] if count: print(col_error("{} slab objects were misplaced, printing the last:" .format(errors['num_misplaced']))) print(errors['last_misplaced']) return (check_ok, slabs, free) def __check_slabs(self, node: gdb.Value, slabtype: int, nid: int) -> int: slab_list = node[slab_list_fullname[slabtype]] print("checking {} slab list {:#x}".format(slab_list_name[slabtype], int(slab_list.address))) (check_ok, slabs, free) = self.___check_slabs(node, slabtype, nid) if not check_ok: print("Retrying the slab list in reverse order") (check_ok, slabs_rev, free_rev) = \ self.___check_slabs(node, slabtype, nid, reverse=True) slabs += slabs_rev free += free_rev #print("checked {} slabs in {} slab list".format( # slabs, slab_list_name[slabtype])) return free
[docs] def check_array_caches(self) -> None: acs = self.get_array_caches() for ac_ptr in acs: ac_obj_slab = slab_from_obj_addr(ac_ptr) if not ac_obj_slab: print("cached pointer {:#x} in {} not found in slab" .format(ac_ptr, acs[ac_ptr])) elif ac_obj_slab.kmem_cache.name != self.name: print("cached pointer {:#x} in {} belongs to wrong kmem cache {}" .format(ac_ptr, acs[ac_ptr], ac_obj_slab.kmem_cache.name)) else: ac_obj_obj = ac_obj_slab.contains_obj(ac_ptr) if ac_obj_obj[0] is False and ac_obj_obj[2] is None: print("cached pointer {:#x} in {} is not allocated: {}".format( ac_ptr, acs[ac_ptr], ac_obj_obj)) elif ac_obj_obj[1] != ac_ptr: print("cached pointer {:#x} in {} has wrong offset: ({}, {:#x}, {})" .format(ac_ptr, acs[ac_ptr], ac_obj_obj[0], ac_obj_obj[1], ac_obj_obj[2]))
[docs] def check_all(self) -> None: for (nid, node) in self.__get_nodelists(): try: # This is version and architecture specific lock = int(node["list_lock"]["rlock"]["raw_lock"]["slock"]) if lock != 0: print(col_error("unexpected lock value in kmem_list3 {:#x}: {:#x}" .format(int(node.address), lock))) except gdb.error: print("Can't check lock state -- locking implementation unknown.") free_declared = int(node["free_objects"]) free_counted = self.__check_slabs(node, slab_partial, nid) free_counted += self.__check_slabs(node, slab_full, nid) free_counted += self.__check_slabs(node, slab_free, nid) if free_declared != free_counted: print(col_error("free objects mismatch on node %d: declared=%d counted=%d" % (nid, free_declared, free_counted))) self.check_array_caches()
[docs]class KmemCacheNotFound(RuntimeError): """The specified kmem_cache could not be found."""
kmem_caches: Dict[str, KmemCache] = dict() kmem_caches_by_addr: Dict[int, KmemCache] = dict()
[docs]def setup_slab_caches(slab_caches: gdb.Symbol) -> None: list_caches = slab_caches.value() for cache in list_for_each_entry(list_caches, types.kmem_cache_type, KmemCache.head_name): name = cache["name"].string() kmem_cache = KmemCache(name, cache) kmem_caches[name] = kmem_cache kmem_caches_by_addr[int(cache.address)] = kmem_cache
[docs]def kmem_cache_from_addr(addr: int) -> KmemCache: try: return kmem_caches_by_addr[addr] except KeyError: raise KmemCacheNotFound(f"No kmem cache found for {addr}.")
[docs]def kmem_cache_from_name(name: str) -> KmemCache: try: return kmem_caches[name] except KeyError: raise KmemCacheNotFound(f"No kmem cache found for {name}.")
[docs]def kmem_cache_get_all() -> ValuesView[KmemCache]: return kmem_caches.values()
[docs]def slab_from_obj_addr(addr: int) -> Union[Slab, None]: page = page_from_addr(addr).compound_head() if not page.is_slab(): return None return Slab.from_page(page)
type_cbs = TypeCallbacks([('struct page', Slab.check_page_type), ('struct slab', Slab.check_slab_type), ('kmem_bufctl_t', Slab.check_bufctl_type), ('freelist_idx_t', Slab.check_bufctl_type), ('struct kmem_cache', KmemCache.check_kmem_cache_type), ('struct alien_cache', KmemCache.setup_alien_cache_type)]) symbol_cbs = SymbolCallbacks([('slab_caches', setup_slab_caches), ('cache_chain', setup_slab_caches)])