init eACGM

2025-08-07 10:14:54 +08:00
commit 7a4a0b1b14
51 changed files with 11495 additions and 0 deletions
--- a/demo/sampler_cuda.py
+++ b/demo/sampler_cuda.py
@@ -0,0 +1,114 @@
+import time
+import json
+
+from eacgm.bpf import BccBPF
+from eacgm.sampler import eBPFSampler
+from eacgm.collector import to_perfetto
+
+text = """
+// #include <cuda_runtime.h>
+#include <uapi/linux/ptrace.h>
+
+struct dim3 {
+    unsigned int x, y, z;
+};
+
+int cudaMallocEntry(struct pt_regs *ctx){
+    u64 malloc_ptr = PT_REGS_PARM1(ctx);
+    u64 byte_length = PT_REGS_PARM2(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaMalloc@%ld@%ld\\n", ts, malloc_ptr, byte_length);
+    return 0;
+};
+
+int cudaMallocExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaMalloc\\n", ts);
+    return 0;
+};
+
+int cudaMemcpyEntry(struct pt_regs *ctx){
+    u64 byte_length = PT_REGS_PARM3(ctx);
+    u64 memcpy_kind = PT_REGS_PARM4(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaMemcpy@%ld@%ld\\n", ts, memcpy_kind);
+    return 0;
+};
+
+int cudaMemcpyExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaMemcpy\\n", ts);
+    return 0;
+};
+
+int cudaFreeEntry(struct pt_regs *ctx){
+    u64 malloc_ptr = PT_REGS_PARM1(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaFree@%ld\\n", malloc_ptr, ts);
+    return 0;
+};
+
+int cudaFreeExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaFree\\n", ts);
+    return 0;
+};
+
+int cudaLaunchKernelEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u32 g_x = PT_REGS_PARM2(ctx) & 0xFFFF;
+    u32 g_y = PT_REGS_PARM2(ctx) >> 32;
+    u32 g_z = PT_REGS_PARM3(ctx) & 0xFFFF;
+    u32 b_x = PT_REGS_PARM4(ctx) & 0xFFFF;
+    u32 b_y = PT_REGS_PARM4(ctx) >> 32;
+    u32 b_z = PT_REGS_PARM5(ctx) & 0xFFFF;
+    // bpf_trace_printk("0 ----- cudaLaunchKernel %u %u %u\\n", g_x, g_y, g_z);
+    // bpf_trace_printk("0 ----- cudaLaunchKernel %u %u %u\\n", b_x, b_y, b_z);
+    u32 stream_num = g_x * g_y * g_z * b_x * b_y * b_z;
+    bpf_trace_printk("%ld@start@cudaLaunchKernel@%u\\n", ts, stream_num);
+    return 0;
+};
+
+int cudaLaunchKernelExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaLaunchKernel\\n", ts);
+    return 0;
+};
+"""
+
+bpf = BccBPF("CUDAeBPF", text, ["-w"])
+
+attach_config = [
+    {
+        "name": "CUDASampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/lib/python3.12/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12",
+        ],
+        "exe_sym": [
+            "cudaMalloc",
+            "cudaMemcpy",
+            "cudaFree",
+            "cudaLaunchKernel",
+        ]
+    },
+]
+
+sampler = eBPFSampler(bpf)
+
+sampler.run(attach_config)
+
+states = []
+while True:
+    try:
+        samples = sampler.sample(time_stamp=1)
+        states += samples
+        # for sample in samples:
+            # print(sample)
+        # print("---")
+    except KeyboardInterrupt:
+        break
+
+sampler.close()
+
+collector = to_perfetto(states)
+json.dump(collector, open("cuda.json", "w", encoding="utf-8"), indent=4)