aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2022-01-19 23:57:50 +0900
committerJiri Olsa <jolsa@kernel.org>2022-02-14 14:08:18 +0100
commit00327b2f7c60bd84d8c5e887003987b2e688d159 (patch)
tree9ec95fb12de2638d376fbe879648dfd79e5cacc8
parent0215a780b449bf933254cff8a62cde25d712b8da (diff)
downloadperf-00327b2f7c60bd84d8c5e887003987b2e688d159.tar.gz
bpf: Add support to attach kprobe program with fprobe
Adding new link type BPF_LINK_TYPE_FPROBE that attaches kprobe program through fprobe API. The fprobe API allows to attach probe on multiple functions at once very fast, because it works on top of ftrace. On the other hand this limits the probe point to the function entry or return. The kprobe program gets the same pt_regs input ctx as when it's attached through the perf API. Adding new attach type BPF_TRACE_FPROBE that enables such link for kprobe program. User provides array of addresses or symbols with count to attach the kprobe program to. The new link_create uapi interface looks like: struct { __aligned_u64 syms; __aligned_u64 addrs; __u32 cnt; __u32 flags; } fprobe; The flags field allows single BPF_F_FPROBE_RETURN bit to create return fprobe. Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/trace_events.h5
-rw-r--r--include/uapi/linux/bpf.h13
-rw-r--r--kernel/bpf/syscall.c26
-rw-r--r--kernel/trace/bpf_trace.c208
-rw-r--r--tools/include/uapi/linux/bpf.h13
6 files changed, 261 insertions, 5 deletions
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 48a91c51c015d..e279cea46653f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -140,3 +140,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
#ifdef CONFIG_PERF_EVENTS
BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
#endif
+BPF_LINK_TYPE(BPF_LINK_TYPE_FPROBE, fprobe)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 70c069aef02ca..2122cf3b9fb07 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -736,6 +736,7 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
u64 *probe_offset, u64 *probe_addr);
+int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -777,6 +778,10 @@ static inline int bpf_get_perf_event_info(const struct perf_event *event,
{
return -EOPNOTSUPP;
}
+static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index afe3d0d7f5f25..0c2aa1f8a0867 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -997,6 +997,7 @@ enum bpf_attach_type {
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
+ BPF_TRACE_FPROBE,
__MAX_BPF_ATTACH_TYPE
};
@@ -1011,6 +1012,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
BPF_LINK_TYPE_PERF_EVENT = 7,
+ BPF_LINK_TYPE_FPROBE = 8,
MAX_BPF_LINK_TYPE,
};
@@ -1118,6 +1120,11 @@ enum bpf_link_type {
*/
#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+/* link_create.fprobe.flags used in LINK_CREATE command for
+ * BPF_TRACE_FPROBE attach type to create return probe.
+ */
+#define BPF_F_FPROBE_RETURN (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -1472,6 +1479,12 @@ union bpf_attr {
*/
__u64 bpf_cookie;
} perf_event;
+ struct {
+ __aligned_u64 syms;
+ __aligned_u64 addrs;
+ __u32 cnt;
+ __u32 flags;
+ } fprobe;
};
} link_create;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35646db3d950f..ea5210028b505 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -32,6 +32,7 @@
#include <linux/bpf-netns.h>
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
+#include <linux/trace_events.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -3018,6 +3019,11 @@ out_put_file:
fput(perf_file);
return err;
}
+#else
+static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_PERF_EVENTS */
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
@@ -4251,7 +4257,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
return -EINVAL;
}
-#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
+#define BPF_LINK_CREATE_LAST_FIELD link_create.fprobe.flags
static int link_create(union bpf_attr *attr, bpfptr_t uattr)
{
enum bpf_prog_type ptype;
@@ -4275,7 +4281,6 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = tracing_bpf_link_attach(attr, uattr, prog);
goto out;
case BPF_PROG_TYPE_PERF_EVENT:
- case BPF_PROG_TYPE_KPROBE:
case BPF_PROG_TYPE_TRACEPOINT:
if (attr->link_create.attach_type != BPF_PERF_EVENT) {
ret = -EINVAL;
@@ -4283,6 +4288,14 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
}
ptype = prog->type;
break;
+ case BPF_PROG_TYPE_KPROBE:
+ if (attr->link_create.attach_type != BPF_PERF_EVENT &&
+ attr->link_create.attach_type != BPF_TRACE_FPROBE) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ptype = prog->type;
+ break;
default:
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
@@ -4314,13 +4327,16 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_xdp_link_attach(attr, prog);
break;
#endif
-#ifdef CONFIG_PERF_EVENTS
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
- case BPF_PROG_TYPE_KPROBE:
ret = bpf_perf_link_attach(attr, prog);
break;
-#endif
+ case BPF_PROG_TYPE_KPROBE:
+ if (attr->link_create.attach_type == BPF_PERF_EVENT)
+ ret = bpf_perf_link_attach(attr, prog);
+ else
+ ret = bpf_fprobe_link_attach(attr, prog);
+ break;
default:
ret = -EINVAL;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a2024ba32a20d..0c68306350d68 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -17,6 +17,7 @@
#include <linux/error-injection.h>
#include <linux/btf_ids.h>
#include <linux/bpf_lsm.h>
+#include <linux/fprobe.h>
#include <net/bpf_sk_storage.h>
@@ -2181,3 +2182,210 @@ static int __init bpf_event_init(void)
fs_initcall(bpf_event_init);
#endif /* CONFIG_MODULES */
+
+#ifdef CONFIG_FPROBE
+struct bpf_fprobe_link {
+ struct bpf_link link;
+ struct fprobe fp;
+ unsigned long *addrs;
+};
+
+static void bpf_fprobe_link_release(struct bpf_link *link)
+{
+ struct bpf_fprobe_link *fprobe_link;
+
+ fprobe_link = container_of(link, struct bpf_fprobe_link, link);
+ unregister_fprobe(&fprobe_link->fp);
+}
+
+static void bpf_fprobe_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_fprobe_link *fprobe_link;
+
+ fprobe_link = container_of(link, struct bpf_fprobe_link, link);
+ kvfree(fprobe_link->addrs);
+ kfree(fprobe_link);
+}
+
+static const struct bpf_link_ops bpf_fprobe_link_lops = {
+ .release = bpf_fprobe_link_release,
+ .dealloc = bpf_fprobe_link_dealloc,
+};
+
+static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
+ struct pt_regs *regs)
+{
+ int err;
+
+ if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+ err = 0;
+ goto out;
+ }
+
+ rcu_read_lock();
+ migrate_disable();
+ err = bpf_prog_run(fprobe_link->link.prog, regs);
+ migrate_enable();
+ rcu_read_unlock();
+
+ out:
+ __this_cpu_dec(bpf_prog_active);
+ return err;
+}
+
+static void fprobe_link_handler(struct fprobe *fp, unsigned long entry_ip,
+ struct pt_regs *regs)
+{
+ unsigned long saved_ip = instruction_pointer(regs);
+ struct bpf_fprobe_link *fprobe_link;
+
+ /*
+ * Because fprobe's regs->ip is set to the next instruction of
+ * dynamic-ftrace instruction, correct entry ip must be set, so
+ * that the bpf program can access entry address via regs as same
+ * as kprobes.
+ */
+ instruction_pointer_set(regs, entry_ip);
+
+ fprobe_link = container_of(fp, struct bpf_fprobe_link, fp);
+ fprobe_link_prog_run(fprobe_link, regs);
+
+ instruction_pointer_set(regs, saved_ip);
+}
+
+static int fprobe_resolve_syms(const void *usyms, u32 cnt,
+ unsigned long *addrs)
+{
+ unsigned long addr, size;
+ const char **syms;
+ int err = -ENOMEM;
+ unsigned int i;
+ char *func;
+
+ size = cnt * sizeof(*syms);
+ syms = kvzalloc(size, GFP_KERNEL);
+ if (!syms)
+ return -ENOMEM;
+
+ func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL);
+ if (!func)
+ goto error;
+
+ if (copy_from_user(syms, usyms, size)) {
+ err = -EFAULT;
+ goto error;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN);
+ if (err == KSYM_NAME_LEN)
+ err = -E2BIG;
+ if (err < 0)
+ goto error;
+
+ err = -EINVAL;
+ if (func[0] == '\0')
+ goto error;
+ addr = kallsyms_lookup_name(func);
+ if (!addr)
+ goto error;
+ if (!kallsyms_lookup_size_offset(addr, &size, NULL))
+ size = MCOUNT_INSN_SIZE;
+ addr = ftrace_location_range(addr, addr + size - 1);
+ if (!addr)
+ goto error;
+ addrs[i] = addr;
+ }
+
+ err = 0;
+error:
+ kvfree(syms);
+ kfree(func);
+ return err;
+}
+
+int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_fprobe_link *link = NULL;
+ struct bpf_link_primer link_primer;
+ unsigned long *addrs;
+ u32 flags, cnt, size;
+ void __user *uaddrs;
+ void __user *usyms;
+ int err;
+
+ /* no support for 32bit archs yet */
+ if (sizeof(u64) != sizeof(void *))
+ return -EOPNOTSUPP;
+
+ if (prog->expected_attach_type != BPF_TRACE_FPROBE)
+ return -EINVAL;
+
+ flags = attr->link_create.fprobe.flags;
+ if (flags & ~BPF_F_FPROBE_RETURN)
+ return -EINVAL;
+
+ uaddrs = u64_to_user_ptr(attr->link_create.fprobe.addrs);
+ usyms = u64_to_user_ptr(attr->link_create.fprobe.syms);
+ if (!!uaddrs == !!usyms)
+ return -EINVAL;
+
+ cnt = attr->link_create.fprobe.cnt;
+ if (!cnt)
+ return -EINVAL;
+
+ size = cnt * sizeof(*addrs);
+ addrs = kvmalloc(size, GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
+
+ if (uaddrs) {
+ if (copy_from_user(addrs, uaddrs, size)) {
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ err = fprobe_resolve_syms(usyms, cnt, addrs);
+ if (err)
+ goto error;
+ }
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_FPROBE,
+ &bpf_fprobe_link_lops, prog);
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err)
+ goto error;
+
+ if (flags & BPF_F_FPROBE_RETURN)
+ link->fp.exit_handler = fprobe_link_handler;
+ else
+ link->fp.entry_handler = fprobe_link_handler;
+
+ link->addrs = addrs;
+
+ err = register_fprobe_ips(&link->fp, addrs, cnt);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+
+ return bpf_link_settle(&link_primer);
+
+error:
+ kfree(link);
+ kvfree(addrs);
+ return err;
+}
+#else /* !CONFIG_FPROBE */
+int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
+#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index afe3d0d7f5f25..0c2aa1f8a0867 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -997,6 +997,7 @@ enum bpf_attach_type {
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
+ BPF_TRACE_FPROBE,
__MAX_BPF_ATTACH_TYPE
};
@@ -1011,6 +1012,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
BPF_LINK_TYPE_PERF_EVENT = 7,
+ BPF_LINK_TYPE_FPROBE = 8,
MAX_BPF_LINK_TYPE,
};
@@ -1118,6 +1120,11 @@ enum bpf_link_type {
*/
#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+/* link_create.fprobe.flags used in LINK_CREATE command for
+ * BPF_TRACE_FPROBE attach type to create return probe.
+ */
+#define BPF_F_FPROBE_RETURN (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -1472,6 +1479,12 @@ union bpf_attr {
*/
__u64 bpf_cookie;
} perf_event;
+ struct {
+ __aligned_u64 syms;
+ __aligned_u64 addrs;
+ __u32 cnt;
+ __u32 flags;
+ } fprobe;
};
} link_create;