Tuesday, July 5, 2016

Linux kernel perf

root@OpenWrt:/# perf record -f -g -a
  Error: unknown switch `f'

 usage: perf record [<options>] [<command>]
    or: perf record [<options>] -- <command> [<options>]

    -e, --event <event>   event selector. use 'perf list' to list available events
        --filter <filter>
                          event filter
    -p, --pid <pid>       record events on existing process id
    -t, --tid <tid>       record events on existing thread id
    -r, --realtime <n>    collect data with this RT SCHED_FIFO priority
        --no-buffering    collect data without buffering
    -R, --raw-samples     collect raw sample records from all opened counters
    -a, --all-cpus        system-wide collection from all CPUs
    -C, --cpu <cpu>       list of cpus to monitor
    -c, --count <n>       event period to sample
    -o, --output <file>   output file name
    -i, --no-inherit      child tasks do not inherit counters
    -F, --freq <n>        profile at this frequency
    -m, --mmap-pages <pages>
                          number of mmap data pages
        --group           put the counters into a counter group
    -g                    enables call-graph recording
        --call-graph <mode[,dump_size]>
                          setup and enables call-graph (stack chain/backtrace) recording: fp
    -v, --verbose         be more verbose (show counter open errors, etc)
    -q, --quiet           don't print any message
    -s, --stat            per thread counts
    -d, --data            Sample addresses
    -T, --timestamp       Sample timestamps
    -P, --period          Sample period
    -n, --no-samples      don't sample
    -N, --no-buildid-cache
                          do not update the buildid cache
    -B, --no-buildid      do not collect buildids in perf.data
    -G, --cgroup <name>   monitor event in cgroup name only
    -D, --delay <n>       ms to wait before starting measurement after program start
    -u, --uid <user>      user to profile
    -b, --branch-any      sample any taken branches
    -j, --branch-filter <branch filter mask>
                          branch stack filter modes
    -W, --weight          sample by weight (on special events only)
        --transaction     sample transaction flags (special events only)
        --per-thread      use per-thread mmaps

root@OpenWrt:/#


perf

You can also use the standard Linux perf tool:
ps aux | grep chromium to find a particular browser/renderer/gpu process
perf record -f -g -p <pid> to capture that process
perf report for the profile output
perf annotate "<fully qualified function name>" for assembly language and (very approximate?) per-instruction cycle counts
By default this saves "perf.data" in the current working directory, which can be renamed.
perf report may be able to run on older data, but perf annotate will be inaccurate if you've since rebuilt the executable.

==========================================================================
  PID  PPID USER     STAT   VSZ %VSZ %CPU COMMAND
  784     2 root     SW       0   0%   3% [kworker/1:3]
   14     2 root     SW       0   0%   1% [ksoftirqd/1]
 
 
perf record -g -a -p 3

perf report -i xxx.data

perf annotate -vvv memcpy > /dev/null

X:\build_dir\target-arm_cortex-a7_uClibc-0.9.33.2_eabi\linux\vmlinux.debug

It also now looks at /sys/kernel/notes, gets the build-id, and looks up in the build-id cache.

================================== add debugging symbol to modules ==============================
Compile packages with debugging info
sdk/config/Config-build.in
config DEBUG
bool
prompt "Compile packages with debugging info"
default n
help
 Adds -g3 to the CFLAGS
    ->> CONFIG_DEBUG=y
->> CONFIG_NO_STRIP=y
->> CONFIG_USE_SSTRIP=y

*** CONFIG_DEBUG=y
sdk/rules.mk
TARGET_CFLAGS:=$(TARGET_OPTIMIZATION)$(if $(CONFIG_DEBUG), -g3) $(EXTRA_OPTIMIZATION)

backport/mac80211/Makefile
include $(TOPDIR)/rules.mk
***** working *****
MAKE_OPTS:= -C "$(PKG_BUILD_DIR)" \
EXTRA_CFLAGS="-g3 -I$(PKG_BUILD_DIR)/include -I$(STAGING_DIR)/usr/include/drv" \

================================== remove strip command ===========================================
*** CONFIG_NO_STRIP
sdk/rules.mk
# strip an entire directory
ifneq ($(CONFIG_NO_STRIP),)
 RSTRIP:=:
 STRIP:=:
else
 ifneq ($(CONFIG_USE_STRIP),)
STRIP:=$(TARGET_CROSS)strip $(call qstrip,$(CONFIG_STRIP_ARGS))
 else
ifneq ($(CONFIG_USE_SSTRIP),)
 STRIP:=$(STAGING_DIR_HOST)/bin/sstrip
endif
 endif
 RSTRIP:= \
export CROSS="$(TARGET_CROSS)" \
$(if $(CONFIG_KERNEL_KALLSYMS),NO_RENAME=1) \
$(if $(CONFIG_KERNEL_PROFILING),KEEP_SYMBOLS=1); \
NM="$(TARGET_CROSS)nm" \
STRIP="$(STRIP)" \
STRIP_KMOD="$(SCRIPT_DIR)/strip-kmod.sh" \
$(SCRIPT_DIR)/rstrip.sh
endif
***** working *****
sdk/src/linux/Makefile
KBUILD_LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds $(if $(CONFIG_PROFILING),,-s)
-->>
KBUILD_LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds

*** how to define CONFIG_XXX
backport/mac80211/Makefile
export CONFIG_NO_STRIP:=1
export CONFIG_NO_STRIP:=y
export CONFIG_DEBUG:=y
include $(TOPDIR)/rules.mk

================================== compiling with CONFIG options ===========================================
make package/backport/mac80211/{clean,compile} package/index V=s CONFIG_NO_STRIP=y CONFIG_DEBUG=y


================================== driver modules to be used ================================================
build_dir/target_xxxx/linux/backports/net/mac80211/mac80211.ko


================================== vmlinux to be used ================================================
sdk/build_dir/target_xxxx/linux/vmlinux.debug

================================= location to copy driver debug modules =========================================
/lib/modules/3.14.43/kernel/net/wireless/cfg80211.ko
/lib/modules/3.14.43/kernel/net/wireless/mac80211.ko
*** can be symbolic link

================================= perf command =========================================
perf record -g -a -p 3

perf report -i xxx.data

================================= perf command on x86=========================================
perf annotate -k vmlinux -i perf.tcp.dl.data --objdump=/home/sdk/build_dir/toolchain-arm_cortex-a7_gcc-4.8-linaro_uClibc-0.9.33.2_eabi/binutils-2.22/binutils/objdump


========================== perf annotate verbose ========================
symbol__annotate: filename=/root/.debug/.build-id/b5/15361e474796af29de9992b76a97cffb39b2a7, sym=__libc_disable_asynccancel, start=0x7fedc39d1d10, end=0x7fedc39d1d68
annotating [0x9d8aa0] /lib/x86_64-linux-gnu/libc-2.19.so : [0x108ec20]     __libc_disable_asynccancel
Executing: objdump  --start-address=0x0000000000108d10 --stop-address=0x0000000000108d69 -d --no-show-raw -S -C /root/.debug/.build-id/b5/15361e474796af29de9992b76a97cffb39b2a7 2>/dev/null|grep -v /root/.debug/.build-id/b5/15361e474796af29de9992b76a97cffb39b2a7|expand


    --symfs=<directory>
        Look for files with symbols relative to this directory.

========================== perf annotate -- locaion to copy modules ??? ========================
rootfs ./debug/modules/mac80211.ko

No comments:

Post a Comment