init eACGM

2025-08-07 10:14:54 +08:00
commit 7a4a0b1b14
51 changed files with 11495 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,23 @@
+*.pyc
+
+*.egg-info
+
+temp.*
+
+python*.json
+torch.json
+cuda.json
+
+gpu.json
+nvml.json
+
+nccl.json
+ebpf.json
+eacg.json
+
+*.log
+
+.python-version
+uv.lock
+
+requirements.txt
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 eACGM
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,67 @@
+# eACGM
+
+**eACGM:** An **e**BPF-based **A**utomated **C**omprehensive **G**overnance and **M**onitoring framework for AI/ML systems.
+
+---
+
+:star: **[News] Our work has been accepted by [IEEE/ACM IWQoS 2025 (CCF-B)! ](https://iwqos2025.ieee-iwqos.org/)**
+
+**[Paper(Dropbox)](https://www.dropbox.com/scl/fi/q4vplv95usw4u5h3syx62/IWQoS_2025.pdf?rlkey=gv8h65oupkzrmv6zu1yu7s558&e=1&st=k8sttham&dl=0)**
+
+---
+
+eACGM provides zero-intrusive, low-overhead, full-stack observability for both hardware (GPU, NCCL) and software (CUDA, Python, PyTorch) layers in modern AI/ML workloads.
+
+![Architecture](asset/arch.png)
+
+## Features
+
+- [x] **Event tracing for CUDA Runtime** based on eBPF
+- [x] **Event tracing for NCCL GPU communication library** based on eBPF
+- [x] **Function call tracing for Python virtual machine** based on eBPF
+- [x] **Operator tracing for PyTorch** based on eBPF
+- [x] **Process-level GPU information monitoring** based on `libnvml`
+- [x] **Global GPU information monitoring** based on `libnvml`
+- [x] **Automatic eBPF program generation**
+- [x] **Comprehensive analysis** of all traced events and operators
+- [x] **Flexible integration** for multi-level tracing (CUDA, NCCL, PyTorch, Python, GPU)
+- [x] **Visualization-ready data output** for monitoring platforms
+
+## Visualization
+
+To visualize monitoring data, deploy Grafana and MySQL using Docker. Access the Grafana dashboard at [http://127.0.0.1:3000](http://127.0.0.1:3000).
+
+```bash
+cd grafana/
+sh ./launch.sh
+```
+
+Start the monitoring service with:
+
+```bash
+./service.sh
+```
+
+Stop the monitoring service with:
+
+```bash
+./stop.sh
+```
+
+## Case Demonstration
+
+The `demo` folder provides example programs to showcase the capabilities of eACGM:
+
+- `pytorch_example.py`: Multi-node, multi-GPU PyTorch training demo
+- `sampler_cuda.py`: Trace CUDA Runtime events using eBPF
+- `sampler_nccl.py`: Trace NCCL GPU communication events using eBPF
+- `sampler_torch.py`: Trace PyTorch operator events using eBPF
+- `sampler_python.py`: Trace Python VM function calls using eBPF
+- `sampler_gpu.py`: Monitor global GPU information using `libnvml`
+- `sampler_nccl.py`: Monitor process-level GPU information using `libnvml`
+- `sampler_eacg.py`: Combined monitoring of all supported sources
+- `webui.py`: Automatically visualize captured data in Grafana
+
+## Citation
+
+If you find this project helpful, please consider citing our IWQoS 2025 paper (In press, to appear).
--- a/asset/arch.png
+++ b/asset/arch.png
--- a/demo/sampler_cuda.py
+++ b/demo/sampler_cuda.py
@@ -0,0 +1,114 @@
+import time
+import json
+
+from eacgm.bpf import BccBPF
+from eacgm.sampler import eBPFSampler
+from eacgm.collector import to_perfetto
+
+text = """
+// #include <cuda_runtime.h>
+#include <uapi/linux/ptrace.h>
+
+struct dim3 {
+    unsigned int x, y, z;
+};
+
+int cudaMallocEntry(struct pt_regs *ctx){
+    u64 malloc_ptr = PT_REGS_PARM1(ctx);
+    u64 byte_length = PT_REGS_PARM2(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaMalloc@%ld@%ld\\n", ts, malloc_ptr, byte_length);
+    return 0;
+};
+
+int cudaMallocExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaMalloc\\n", ts);
+    return 0;
+};
+
+int cudaMemcpyEntry(struct pt_regs *ctx){
+    u64 byte_length = PT_REGS_PARM3(ctx);
+    u64 memcpy_kind = PT_REGS_PARM4(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaMemcpy@%ld@%ld\\n", ts, memcpy_kind);
+    return 0;
+};
+
+int cudaMemcpyExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaMemcpy\\n", ts);
+    return 0;
+};
+
+int cudaFreeEntry(struct pt_regs *ctx){
+    u64 malloc_ptr = PT_REGS_PARM1(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaFree@%ld\\n", malloc_ptr, ts);
+    return 0;
+};
+
+int cudaFreeExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaFree\\n", ts);
+    return 0;
+};
+
+int cudaLaunchKernelEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u32 g_x = PT_REGS_PARM2(ctx) & 0xFFFF;
+    u32 g_y = PT_REGS_PARM2(ctx) >> 32;
+    u32 g_z = PT_REGS_PARM3(ctx) & 0xFFFF;
+    u32 b_x = PT_REGS_PARM4(ctx) & 0xFFFF;
+    u32 b_y = PT_REGS_PARM4(ctx) >> 32;
+    u32 b_z = PT_REGS_PARM5(ctx) & 0xFFFF;
+    // bpf_trace_printk("0 ----- cudaLaunchKernel %u %u %u\\n", g_x, g_y, g_z);
+    // bpf_trace_printk("0 ----- cudaLaunchKernel %u %u %u\\n", b_x, b_y, b_z);
+    u32 stream_num = g_x * g_y * g_z * b_x * b_y * b_z;
+    bpf_trace_printk("%ld@start@cudaLaunchKernel@%u\\n", ts, stream_num);
+    return 0;
+};
+
+int cudaLaunchKernelExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaLaunchKernel\\n", ts);
+    return 0;
+};
+"""
+
+bpf = BccBPF("CUDAeBPF", text, ["-w"])
+
+attach_config = [
+    {
+        "name": "CUDASampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/lib/python3.12/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12",
+        ],
+        "exe_sym": [
+            "cudaMalloc",
+            "cudaMemcpy",
+            "cudaFree",
+            "cudaLaunchKernel",
+        ]
+    },
+]
+
+sampler = eBPFSampler(bpf)
+
+sampler.run(attach_config)
+
+states = []
+while True:
+    try:
+        samples = sampler.sample(time_stamp=1)
+        states += samples
+        # for sample in samples:
+            # print(sample)
+        # print("---")
+    except KeyboardInterrupt:
+        break
+
+sampler.close()
+
+collector = to_perfetto(states)
+json.dump(collector, open("cuda.json", "w", encoding="utf-8"), indent=4)
--- a/demo/sampler_eacg.py
+++ b/demo/sampler_eacg.py
@@ -0,0 +1,302 @@
+import os
+import time
+import json
+
+from eacgm.bpf import BccBPF
+from eacgm.sampler import eBPFSampler, NVMLSampler, GPUSampler
+from eacgm.collector import to_perfetto
+
+for filename in os.listdir("res"):
+    os.remove(os.path.join("res", filename))
+start_time = time.time_ns() - time.clock_gettime_ns(time.CLOCK_MONOTONIC)
+start_time /= 1_000
+
+torch_func_sym = {
+    "TorchAdd": "_ZN5torch8autogradL15THPVariable_addEP7_objectS2_S2_",
+    "TorchSub": "_ZN5torch8autogradL15THPVariable_subEP7_objectS2_S2_",
+    "TorchMul": "_ZN5torch8autogradL15THPVariable_mulEP7_objectS2_S2_",
+    "TorchMatmul": "_ZN5torch8autogradL18THPVariable_matmulEP7_objectS2_S2_",
+    "TorchDiv": "_ZN5torch8autogradL15THPVariable_divEP7_objectS2_S2_",
+    "TorchLinear": "_ZN5torch8autogradL18THPVariable_linearEP7_objectS2_S2_",
+    "TorchConv2d": "_ZN5torch8autogradL18THPVariable_conv2dEP7_objectS2_S2_",
+    "TorchReLU": "_ZN5torch8autogradL16THPVariable_reluEP7_objectS2_S2_",
+    "TorchSigmoid": "_ZN5torch8autogradL19THPVariable_sigmoidEP7_objectS2_S2_",
+    "TorchTanh": "_ZN5torch8autogradL16THPVariable_tanhEP7_objectS2_S2_",
+    "TorchSoftmax": "_ZN5torch8autogradL19THPVariable_softmaxEP7_objectS2_S2_",
+    "TorchMSELoss": "_ZN5torch8autogradL20THPVariable_mse_lossEP7_objectS2_S2_",
+    "TorchBCELoss": "_ZN5torch8autogradL32THPVariable_binary_cross_entropyEP7_objectS2_S2_",
+    "TorchCrossEntropyLoss": "_ZN5torch8autogradL30THPVariable_cross_entropy_lossEP7_objectS2_S2_",
+    "TorchConvTranspose2d": "_ZN5torch8autogradL28THPVariable_conv_transpose2dEP7_objectS2_S2_",
+    "TorchMaxUnpool2d": "_ZN5torch8autogradL24THPVariable_max_unpool2dEP7_objectS2_S2_",
+    "TorchBatchNorm2d": "_ZN5torch8autogradL22THPVariable_batch_normEP7_objectS2_S2_",
+    "TorchAvgPool2d": "_ZN5torch8autogradL22THPVariable_avg_pool2dEP7_objectS2_S2_",
+    "TorchMaxPool2d": "_ZN5torch8autogradL22THPVariable_max_pool2dEP7_objectS2_S2_",
+    "TorchDropout": "_ZN5torch8autogradL19THPVariable_dropoutEP7_objectS2_S2_",
+    "TorchEmbedding": "_ZN5torch8autogradL21THPVariable_embeddingEP7_objectS2_S2_",
+    "TorchLSTM": "_ZN5torch8autogradL16THPVariable_lstmEP7_objectS2_S2_",
+    "TorchAdaptiveMaxPool2d": "_ZN5torch8autogradL31THPVariable_adaptive_max_pool2dEP7_objectS2_S2_",
+    "TorchAdaptiveAvgPool2d": "_ZN5torch8autogradL31THPVariable_adaptive_avg_pool2dEP7_objectS2_S2_",
+}
+
+torch_template = """
+int <TorchSym>Entry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@<TorchFunc>\\n", ts);
+    return 0;
+};
+
+int <TorchSym>Exit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@<TorchFunc>\\n", ts);
+    return 0;
+};
+"""
+
+text = """
+#include <uapi/linux/ptrace.h>
+
+int ncclAllReduceEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    u64 reduce_op  = PT_REGS_PARM5(ctx);
+    bpf_trace_printk("%ld@start@ncclAllReduce@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclAllReduceExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclAllReduce\\n", ts);
+    return 0;
+};
+
+int ncclReduceEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    u64 reduce_op  = PT_REGS_PARM5(ctx);
+    bpf_trace_printk("%ld@start@ncclReduce@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclReduceExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclReduce\\n", ts);
+    return 0;
+};
+
+int ncclBroadcastEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    u64 root_id  = PT_REGS_PARM5(ctx);
+    bpf_trace_printk("%ld@start@ncclBroadcast@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclBroadcastExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclBroadcast\\n", ts);
+    return 0;
+};
+
+int ncclAllGatherEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    bpf_trace_printk("%ld@start@ncclAllGather@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclAllGatherExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclAllGather\\n", ts);
+    return 0;
+};
+
+int ncclSendEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM2(ctx);
+    u64 data_type  = PT_REGS_PARM3(ctx);
+    bpf_trace_printk("%ld@start@ncclSend@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclSendExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclSend\\n", ts);
+    return 0;
+};
+
+int ncclRecvEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM2(ctx);
+    u64 data_type  = PT_REGS_PARM3(ctx);
+    bpf_trace_printk("%ld@start@ncclRecv@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclRecvExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclRecv\\n", ts);
+    return 0;
+};
+
+int PyObject_CallFunctionEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld start PyObject_CallFunction\\n", ts);
+    return 0;
+};
+
+int PyObject_CallFunctionExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld end PyObject_CallFunction\\n", ts);
+    return 0;
+};
+
+struct dim3 {
+    unsigned int x, y, z;
+};
+
+int cudaMallocEntry(struct pt_regs *ctx){
+    u64 malloc_ptr = PT_REGS_PARM1(ctx);
+    u64 byte_length = PT_REGS_PARM2(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaMalloc@%ld@%ld\\n", ts, malloc_ptr, byte_length);
+    return 0;
+};
+
+int cudaMallocExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaMalloc\\n", ts);
+    return 0;
+};
+
+int cudaMemcpyEntry(struct pt_regs *ctx){
+    u64 byte_length = PT_REGS_PARM3(ctx);
+    u64 memcpy_kind = PT_REGS_PARM4(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaMemcpy@%ld@%ld\\n", ts, memcpy_kind);
+    return 0;
+};
+
+int cudaMemcpyExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaMemcpy\\n", ts);
+    return 0;
+};
+
+int cudaFreeEntry(struct pt_regs *ctx){
+    u64 malloc_ptr = PT_REGS_PARM1(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@cudaFree@%ld\\n", malloc_ptr, ts);
+    return 0;
+};
+
+int cudaFreeExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaFree\\n", ts);
+    return 0;
+};
+
+int cudaLaunchKernelEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u32 g_x = PT_REGS_PARM2(ctx) & 0xFFFF;
+    u32 g_y = PT_REGS_PARM2(ctx) >> 32;
+    u32 g_z = PT_REGS_PARM3(ctx) & 0xFFFF;
+    u32 b_x = PT_REGS_PARM4(ctx) & 0xFFFF;
+    u32 b_y = PT_REGS_PARM4(ctx) >> 32;
+    u32 b_z = PT_REGS_PARM5(ctx) & 0xFFFF;
+    // bpf_trace_printk("0 ----- cudaLaunchKernel %u %u %u\\n", g_x, g_y, g_z);
+    // bpf_trace_printk("0 ----- cudaLaunchKernel %u %u %u\\n", b_x, b_y, b_z);
+    u32 stream_num = g_x * g_y * g_z * b_x * b_y * b_z;
+    bpf_trace_printk("%ld@start@cudaLaunchKernel@%u\\n", ts, stream_num);
+    return 0;
+};
+
+int cudaLaunchKernelExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@cudaLaunchKernel\\n", ts);
+    return 0;
+};
+
+"""
+
+for func in torch_func_sym:
+    sym = torch_func_sym[func]
+    text += torch_template.replace("<TorchSym>", sym).replace("<TorchFunc>", func)
+
+bpf = BccBPF("eACGSampler", text, ["-w"])
+
+attach_config = [
+    {
+        "name": "CUDASampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/lib/python3.12/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12",
+        ],
+        "exe_sym": [
+            "cudaLaunchKernel",
+        ]
+    },
+    {
+        "name": "NCCLSampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/lib/python3.12/site-packages/nvidia/nccl/lib/libnccl.so.2",
+        ],
+        "exe_sym": [
+            "ncclAllReduce",
+        ]
+    },
+    {
+        "name": "PythonSampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/bin/python",
+        ],
+        "exe_sym": [
+            # "PyObject_CallFunction",
+        ]
+    },
+    {
+        "name": "TorchSampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/lib/python3.12/site-packages/torch/lib/libtorch_python.so",
+        ],
+        "exe_sym": [
+            torch_func_sym[func] for func in torch_func_sym
+        ]
+    },
+]
+
+eacg_sampler = eBPFSampler(bpf)
+nvml_sampler = NVMLSampler()
+gpu_sampler  = GPUSampler()
+
+eacg_sampler.run(attach_config)
+
+states = []
+while True:
+    try:
+        samples = []
+        samples += eacg_sampler.sample(time_stamp=1)
+        samples += nvml_sampler.sample(time_stamp=1)
+        samples += gpu_sampler.sample()
+        states += samples
+        for sample in samples:
+            print(sample)
+        print("---")
+    except KeyboardInterrupt:
+        break
+
+eacg_sampler.close()
+nvml_sampler.close()
+gpu_sampler.close()
+
+ebpf_collector = to_perfetto(states)
+json.dump(ebpf_collector, open("res/ebpf.json", "w", encoding="utf-8"), indent=4)
+eacg_collector = ebpf_collector
+for python_log in os.listdir("res"):
+    if "python" not in python_log:
+        continue
+    python_collector = json.load(open(os.path.join("res", python_log), "r", encoding="utf-8"))
+    eacg_collector += python_collector
+json.dump(eacg_collector, open("res/eacg.json", "w", encoding="utf-8"), indent=4)
--- a/demo/sampler_gpu.py
+++ b/demo/sampler_gpu.py
@@ -0,0 +1,35 @@
+import time
+import json
+
+from eacgm.sampler import GPUSampler
+
+sampler = GPUSampler()
+
+sampler.run()
+
+states = []
+while True:
+    try:
+        samples = sampler.sample()
+        for sample in samples:
+            states.append({
+                "ts": time.time_ns(),
+                "gpu": sample.gpu,
+                "gpu_utl": sample.sm,
+                "totMem": sample.totMem,
+                "usedMem": sample.usedMem,
+                "encode_utl": sample.enc,
+                "decode_utl": sample.dec,
+                "temperature": sample.tmp,
+                "fan_utl": sample.fan,
+                "usedPower": sample.usedPower,
+                "totPower": sample.totPower,
+            })
+            # print(sample)
+        time.sleep(1)
+        print("---")
+    except KeyboardInterrupt:
+        break
+
+sampler.close()
+json.dump(states, open("gpu.json", "w", encoding="utf-8"), indent=4)
--- a/demo/sampler_nccl.py
+++ b/demo/sampler_nccl.py
@@ -0,0 +1,135 @@
+import time
+import json
+
+from eacgm.bpf import BccBPF
+from eacgm.sampler import eBPFSampler
+from eacgm.collector import to_perfetto
+
+text = """
+#include <uapi/linux/ptrace.h>
+
+int ncclAllReduceEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    u64 reduce_op  = PT_REGS_PARM5(ctx);
+    bpf_trace_printk("%ld@start@ncclAllReduce@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclAllReduceExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclAllReduce\\n", ts);
+    return 0;
+};
+
+int ncclReduceEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    u64 reduce_op  = PT_REGS_PARM5(ctx);
+    bpf_trace_printk("%ld@start@ncclReduce@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclReduceExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclReduce\\n", ts);
+    return 0;
+};
+
+int ncclBroadcastEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    u64 root_id  = PT_REGS_PARM5(ctx);
+    bpf_trace_printk("%ld@start@ncclBroadcast@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclBroadcastExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclBroadcast\\n", ts);
+    return 0;
+};
+
+int ncclAllGatherEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM3(ctx);
+    u64 data_type  = PT_REGS_PARM4(ctx);
+    bpf_trace_printk("%ld@start@ncclAllGather@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclAllGatherExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclAllGather\\n", ts);
+    return 0;
+};
+
+int ncclSendEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM2(ctx);
+    u64 data_type  = PT_REGS_PARM3(ctx);
+    bpf_trace_printk("%ld@start@ncclSend@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclSendExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclSend\\n", ts);
+    return 0;
+};
+
+int ncclRecvEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    u64 size_count = PT_REGS_PARM2(ctx);
+    u64 data_type  = PT_REGS_PARM3(ctx);
+    bpf_trace_printk("%ld@start@ncclRecv@%ld\\n", ts, size_count * 8);
+    return 0;
+};
+
+int ncclRecvExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@ncclRecv\\n", ts);
+    return 0;
+};
+"""
+
+bpf = BccBPF("NCCLeBPF", text, ["-w"])
+
+attach_config = [
+    {
+        "name": "NCCLSampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/lib/python3.12/site-packages/nvidia/nccl/lib/libnccl.so.2",
+        ],
+        "exe_sym": [
+            "ncclAllReduce",
+            "ncclReduce",
+            "ncclBroadcast",
+            "ncclAllGather",
+            "ncclSend",
+            "ncclRecv",
+        ]
+    },
+]
+
+sampler = eBPFSampler(bpf)
+
+sampler.run(attach_config)
+
+states = []
+while True:
+    try:
+        samples = sampler.sample(time_stamp=1)
+        states += samples
+        # for sample in samples:
+        #     print(sample)
+        # print("---")
+    except KeyboardInterrupt:
+        break
+
+sampler.close()
+collector = to_perfetto(states)
+json.dump(collector, open("nccl.json", "w", encoding="utf-8"), indent=4)
--- a/demo/sampler_nvml.py
+++ b/demo/sampler_nvml.py
@@ -0,0 +1,30 @@
+import time
+import json
+
+from eacgm.sampler import NVMLSampler
+
+sampler = NVMLSampler()
+
+sampler.run()
+
+states = []
+while True:
+    try:
+        for sample in sampler.sample(time_stamp=1):
+            # print(sample)
+            states.append({
+                "ts": time.time_ns(),
+                "pid": sample.pid,
+                "gpu": sample.gpu,
+                "gpu_utl": sample.sm,
+                "mem": sample.mem,
+                "encode_utl": sample.enc,
+                "decode_utl": sample.dec,
+            })
+        time.sleep(2)
+        print("---")
+    except KeyboardInterrupt:
+        break
+
+sampler.close()
+json.dump(states, open("nvml.json", "w", encoding="utf-8"), indent=4)
--- a/demo/sampler_python.py
+++ b/demo/sampler_python.py
@@ -0,0 +1,50 @@
+import time
+import ctypes
+
+from eacgm.bpf import BccBPF
+from eacgm.sampler import eBPFSampler
+
+text = """
+#include <uapi/linux/ptrace.h>
+
+int PyObject_CallFunctionEntry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld start PyObject_CallFunction\\n", ts);
+    return 0;
+};
+
+int PyObject_CallFunctionExit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld end PyObject_CallFunction\\n", ts);
+    return 0;
+};
+"""
+
+bpf = BccBPF("PythoneBPF", text, ["-w"])
+
+attach_config = [
+    {
+        "name": "PythonSampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/py312-torch24-cu124/bin/python",
+        ],
+        "exe_sym": [
+            "PyObject_CallFunction",
+        ]
+    },
+]
+
+sampler = eBPFSampler(bpf)
+
+sampler.run(attach_config)
+
+while True:
+    try:
+        samples = sampler.sample(time_stamp=1)
+        for sample in samples:
+            print(sample)
+        print("---")
+    except KeyboardInterrupt:
+        break
+
+sampler.close()
--- a/demo/sampler_torch.py
+++ b/demo/sampler_torch.py
@@ -0,0 +1,109 @@
+import time
+import json
+
+from eacgm.bpf import BccBPF
+from eacgm.sampler import eBPFSampler
+from eacgm.collector import to_perfetto
+
+func_sym = {
+    "TorchAdd": "_ZN5torch8autogradL15THPVariable_addEP7_objectS2_S2_",
+    "TorchSub": "_ZN5torch8autogradL15THPVariable_subEP7_objectS2_S2_",
+    "TorchMul": "_ZN5torch8autogradL15THPVariable_mulEP7_objectS2_S2_",
+    "TorchMatmul": "_ZN5torch8autogradL18THPVariable_matmulEP7_objectS2_S2_",
+    "TorchDiv": "_ZN5torch8autogradL15THPVariable_divEP7_objectS2_S2_",
+    "TorchLinear": "_ZN5torch8autogradL18THPVariable_linearEP7_objectS2_S2_",
+    "TorchConv2d": "_ZN5torch8autogradL18THPVariable_conv2dEP7_objectS2_S2_",
+    "TorchReLU": "_ZN5torch8autogradL16THPVariable_reluEP7_objectS2_S2_",
+    "TorchSigmoid": "_ZN5torch8autogradL19THPVariable_sigmoidEP7_objectS2_S2_",
+    "TorchTanh": "_ZN5torch8autogradL16THPVariable_tanhEP7_objectS2_S2_",
+    "TorchSoftmax": "_ZN5torch8autogradL19THPVariable_softmaxEP7_objectS2_S2_",
+    "TorchMSELoss": "_ZN5torch8autogradL20THPVariable_mse_lossEP7_objectS2_S2_",
+    "TorchBCELoss": "_ZN5torch8autogradL32THPVariable_binary_cross_entropyEP7_objectS2_S2_",
+    "TorchCrossEntropyLoss": "_ZN5torch8autogradL30THPVariable_cross_entropy_lossEP7_objectS2_S2_",
+    "TorchConvTranspose2d": "_ZN5torch8autogradL28THPVariable_conv_transpose2dEP7_objectS2_S2_",
+    "TorchMaxUnpool2d": "_ZN5torch8autogradL24THPVariable_max_unpool2dEP7_objectS2_S2_",
+    "TorchBatchNorm2d": "_ZN5torch8autogradL22THPVariable_batch_normEP7_objectS2_S2_",
+    "TorchAvgPool2d": "_ZN5torch8autogradL22THPVariable_avg_pool2dEP7_objectS2_S2_",
+    "TorchMaxPool2d": "_ZN5torch8autogradL22THPVariable_max_pool2dEP7_objectS2_S2_",
+    "TorchDropout": "_ZN5torch8autogradL19THPVariable_dropoutEP7_objectS2_S2_",
+    "TorchEmbedding": "_ZN5torch8autogradL21THPVariable_embeddingEP7_objectS2_S2_",
+    "TorchLSTM": "_ZN5torch8autogradL16THPVariable_lstmEP7_objectS2_S2_",
+    "TorchAdaptiveMaxPool2d": "_ZN5torch8autogradL31THPVariable_adaptive_max_pool2dEP7_objectS2_S2_",
+    "TorchAdaptiveAvgPool2d": "_ZN5torch8autogradL31THPVariable_adaptive_avg_pool2dEP7_objectS2_S2_",
+}
+
+template = """
+int <TorchSym>Entry(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@start@<TorchFunc>\\n", ts);
+    return 0;
+};
+
+int <TorchSym>Exit(struct pt_regs *ctx){
+    u64 ts = bpf_ktime_get_ns();
+    bpf_trace_printk("%ld@end@<TorchFunc>\\n", ts);
+    return 0;
+};
+"""
+
+text = ""
+for func in func_sym:
+    sym = func_sym[func]
+    text += template.replace("<TorchSym>", sym).replace("<TorchFunc>", func)
+
+bpf = BccBPF("TorcheBPF", text, ["-w"])
+
+attach_config = [
+    {
+        "name": "TorchSampler",
+        "exe_path": [
+            "/home/txx/data/miniconda3/envs/eACGM/lib/python3.12/site-packages/torch/./lib/libtorch_python.so",
+        ],
+        "exe_sym": [
+            "_ZN5torch8autogradL15THPVariable_addEP7_objectS2_S2_",
+            "_ZN5torch8autogradL15THPVariable_subEP7_objectS2_S2_",
+            "_ZN5torch8autogradL15THPVariable_mulEP7_objectS2_S2_",
+            "_ZN5torch8autogradL18THPVariable_matmulEP7_objectS2_S2_",
+            "_ZN5torch8autogradL15THPVariable_divEP7_objectS2_S2_",
+            "_ZN5torch8autogradL18THPVariable_linearEP7_objectS2_S2_",
+            "_ZN5torch8autogradL18THPVariable_conv2dEP7_objectS2_S2_",
+            "_ZN5torch8autogradL16THPVariable_reluEP7_objectS2_S2_",
+            "_ZN5torch8autogradL19THPVariable_sigmoidEP7_objectS2_S2_",
+            "_ZN5torch8autogradL16THPVariable_tanhEP7_objectS2_S2_",
+            "_ZN5torch8autogradL19THPVariable_softmaxEP7_objectS2_S2_",
+            "_ZN5torch8autogradL20THPVariable_mse_lossEP7_objectS2_S2_",
+            "_ZN5torch8autogradL32THPVariable_binary_cross_entropyEP7_objectS2_S2_",
+            "_ZN5torch8autogradL30THPVariable_cross_entropy_lossEP7_objectS2_S2_",
+            "_ZN5torch8autogradL28THPVariable_conv_transpose2dEP7_objectS2_S2_",
+            "_ZN5torch8autogradL24THPVariable_max_unpool2dEP7_objectS2_S2_",
+            "_ZN5torch8autogradL22THPVariable_batch_normEP7_objectS2_S2_",
+            "_ZN5torch8autogradL22THPVariable_avg_pool2dEP7_objectS2_S2_",
+            "_ZN5torch8autogradL22THPVariable_max_pool2dEP7_objectS2_S2_",
+            "_ZN5torch8autogradL19THPVariable_dropoutEP7_objectS2_S2_",
+            "_ZN5torch8autogradL21THPVariable_embeddingEP7_objectS2_S2_",
+            "_ZN5torch8autogradL16THPVariable_lstmEP7_objectS2_S2_",
+            "_ZN5torch8autogradL31THPVariable_adaptive_max_pool2dEP7_objectS2_S2_",
+            "_ZN5torch8autogradL31THPVariable_adaptive_avg_pool2dEP7_objectS2_S2_",
+        ]
+    },
+]
+
+sampler = eBPFSampler(bpf)
+
+sampler.run(attach_config)
+
+states = []
+while True:
+    try:
+        samples = sampler.sample(time_stamp=1)
+        states += samples
+        # for sample in samples:
+            # print(sample)
+        # print("---")
+    except KeyboardInterrupt:
+        break
+
+sampler.close()
+
+collector = to_perfetto(states)
+json.dump(collector, open("torch.json", "w", encoding="utf-8"), indent=4)
--- a/demo/webui.py
+++ b/demo/webui.py
@@ -0,0 +1,15 @@
+from eacgm.webui import log_reader, database, push_log
+
+ip = "127.0.0.1"
+port = 3306
+user = "node1"
+pwd = "mysql114514"
+data_base = "grafana"
+table = "CudaEvent"
+
+if __name__ == "__main__":
+    log_file = "log/transformer.log"
+    
+    log = log_reader(log_file)
+    db = database(ip, port, user, pwd, data_base)
+    push_log(db, log)
--- a/eacgm/init.py
+++ b/eacgm/init.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
--- a/eacgm/bpf/init.py
+++ b/eacgm/bpf/init.py
@@ -0,0 +1,2 @@
+from .base import BPFState, BaseBPF
+from .bccBPF import BccBPF
--- a/eacgm/bpf/base.py
+++ b/eacgm/bpf/base.py
@@ -0,0 +1,38 @@
+class BPFState:
+    task:str
+    pid:int
+    cpu:int
+    timestamp:int
+    message:str
+
+    def __init__(self) -> None:
+        self.task = None
+        self.pid = None
+        self.cpu = None
+        self.timestamp = None
+        self.message = None
+        return
+    
+    def is_none(self) -> bool:
+        return self.task is None
+
+    def __repr__(self) -> str:
+        info = f"BPFState {self.task} {self.pid} { self.cpu} {self.timestamp} {self.message}"
+        return info
+
+class BaseBPF:
+    def __init__(self, name:str) -> None:
+        self.name = name
+        return
+
+    def attach_uprobe(self, exe_path:str, exe_sym:str, bpf_func:str) -> bool:
+        raise NotADirectoryError
+
+    def attach_uretprobe(self, exe_path:str, exe_sym:str, bpf_func:str) -> bool:
+        raise NotADirectoryError
+    
+    def cleanup(self) -> None:
+        raise NotADirectoryError
+    
+    def trace_ebpf(self) -> BPFState:
+        raise NotADirectoryError
--- a/eacgm/bpf/bccBPF.py
+++ b/eacgm/bpf/bccBPF.py
@@ -0,0 +1,34 @@
+from bcc import BPF
+from typing import List
+
+from .base import BPFState, BaseBPF
+
+class BccBPF(BaseBPF):
+    def __init__(self, name:str, text:str, cflags:List=[]) -> None:
+        super().__init__(name)
+        self.bpf = BPF(text=text, cflags=cflags)
+        return
+
+    def attach_uprobe(self, exe_path:str, exe_sym:str, bpf_func:str) -> bool:
+        self.bpf.attach_uprobe(exe_path, exe_sym, fn_name=bpf_func)
+        return
+
+    def attach_uretprobe(self, exe_path:str, exe_sym:str, bpf_func:str) -> bool:
+        self.bpf.attach_uretprobe(exe_path, exe_sym, fn_name=bpf_func)
+        return
+    
+    def cleanup(self) -> None:
+        self.bpf.cleanup()
+        return
+    
+    def trace_ebpf(self, nonblocking:bool) -> BPFState:
+        (task, pid, cpu, _, _, message) = self.bpf.trace_fields(nonblocking)
+        state = BPFState()
+        if task is not None:
+            message = message.decode("utf-8")
+            state.task = task.decode("utf-8")
+            state.pid  = int(pid)
+            state.cpu  = int(cpu)
+            state.timestamp = int(message.split("@")[0])
+            state.message   = message.split("@")[1:]
+        return state
--- a/eacgm/collector/init.py
+++ b/eacgm/collector/init.py
@@ -0,0 +1 @@
+from .profetto import to_perfetto
--- a/eacgm/collector/profetto.py
+++ b/eacgm/collector/profetto.py
@@ -0,0 +1,21 @@
+from typing import List
+
+from eacgm.sampler import eBPFSamplerState
+
+def to_perfetto(states:List[eBPFSamplerState]) -> List:
+    res = []
+    last_event = {}
+    for state in states:
+        if not isinstance(state, eBPFSamplerState):
+            continue
+        state = state.collect()
+        name = f"{state['name']}-{state['pid']}"
+        last_state = last_event.get(name, None)
+        if last_state is None:
+            last_event[name] = state
+            continue
+        if last_state["ph"] == "B" and state["ph"] == "E":
+            res.append(last_state)
+            res.append(state)
+        last_event[name] = state
+    return res
--- a/eacgm/sampler/init.py
+++ b/eacgm/sampler/init.py
@@ -0,0 +1,4 @@
+from .base import BaseSampler
+from .ebpfsampler import eBPFSampler, eBPFSamplerState
+from .nvmlsampler import NVMLSampler, NVMLSamplerState
+from .gpusampler import GPUSampler, GPUSamplerState
--- a/eacgm/sampler/base.py
+++ b/eacgm/sampler/base.py
@@ -0,0 +1,35 @@
+class BaseSamplerState:
+    task:str
+    pid:int
+    cpu:int
+    timestamp:int
+    message:str
+
+    def __init__(self) -> None:
+        self.task = None
+        self.pid = None
+        self.cpu = None
+        self.timestamp = None
+        self.message = None
+        return
+    
+    def is_none(self) -> bool:
+        return self.task is None
+    
+    def __repr__(self) -> str:
+        info = f"{self.task} {self.pid} {self.cpu} {self.timestamp} {self.message}"
+        return info
+
+class BaseSampler:
+    def __init__(self, name:str) -> None:
+        self.name = name
+        return
+    
+    def run(self) -> None:
+        raise NotImplementedError
+    
+    def sample(self):
+        raise NotImplementedError
+
+    def close(self) -> None:
+        raise NotImplementedError
--- a/eacgm/sampler/ebpfsampler.py
+++ b/eacgm/sampler/ebpfsampler.py
@@ -0,0 +1,88 @@
+import time
+from typing import Dict, List
+
+from .base import BaseSamplerState, BaseSampler
+from eacgm.bpf import BPFState, BccBPF
+
+class eBPFSamplerState(BaseSamplerState):
+    def __init__(self) -> None:
+        super().__init__()
+        return
+    
+    def from_ebpfstate(other:BPFState) -> "eBPFSamplerState":
+        state = eBPFSamplerState()
+        state.task = other.task
+        state.pid  = other.pid
+        state.cpu  = other.cpu
+        state.timestamp = other.timestamp
+        state.message   = other.message
+        return state
+    
+    def collect(self) -> Dict:
+        event = self.message[1]
+        if "cuda" in event:
+            cat = "cuda"
+        elif "Py" in event:
+            cat = "python"
+        elif "nccl" in event:
+            cat = "nccl"
+        elif "Torch" in event:
+            cat = "torch"
+        else:
+            cat = "other"
+        ph = "B" if self.message[0] == "start" else "E"
+        res = {
+            "name": event,
+            "cat": cat,
+            "pid": self.pid,
+            "tid": self.pid,
+            "cpu": self.cpu,
+            "ts": self.timestamp / 1_000,
+            "ph": ph,
+            "message": self.message[2:],
+        }
+        return res
+    
+    def __repr__(self) -> str:
+        info = f"eBPFSamplerState {super().__repr__()}"
+        return info
+
+class eBPFSampler(BaseSampler):
+    def __init__(self, bpf:BccBPF) -> None:
+        super().__init__(name="eBPFSampler")
+        self.bpf = bpf
+        return
+    
+    def run(self, attach_config:List) -> None:
+        for attach_info in attach_config:
+            name = attach_info["name"]
+            exe_path = attach_info["exe_path"]
+            exe_sym = attach_info["exe_sym"]
+            for path in exe_path:
+                for sym in exe_sym:
+                    try:
+                        self.bpf.attach_uprobe(path, sym, sym + "Entry")
+                        self.bpf.attach_uretprobe(path, sym, sym + "Exit")
+                    except Exception as e:
+                        print(e)
+        return
+    
+    def sample(self, time_stamp:float) -> List[eBPFSamplerState]:
+        samples = []
+        start_time = time.perf_counter()
+
+        flag = True
+        while flag:
+            if time.perf_counter() > start_time + time_stamp:
+                flag = False
+            state = self.bpf.trace_ebpf(True)
+            if state.is_none():
+                continue
+            state = eBPFSamplerState.from_ebpfstate(state)
+            samples.append(state)
+
+        return samples
+
+    def close(self) -> None:
+        self.bpf.cleanup()
+        return
--- a/eacgm/sampler/gpusampler.py
+++ b/eacgm/sampler/gpusampler.py
@@ -0,0 +1,64 @@
+import time
+import pynvml
+from typing import List
+
+from .base import BaseSampler
+
+class GPUSamplerState:
+    def __init__(self) -> None:
+        super().__init__()
+        self.gpu:int = None
+        self.name:str = None
+        self.sm:int = None
+        self.totMem:int = None
+        self.usedMem:int = None
+        self.enc:int = None
+        self.dec:int = None
+        self.tmp:int = None
+        self.fan:int = None
+        self.usedPower:float = None
+        self.totPower:float = None
+        return
+    
+    def __repr__(self) -> str:
+        info = f"GPUSamplerState {self.gpu} {self.name} {self.sm} {self.usedMem} {self.totMem} {self.enc} {self.dec} {self.tmp} {self.fan} {self.usedPower} {self.totPower}"
+        return info
+
+class GPUSampler(BaseSampler):
+    def __init__(self) -> None:
+        super().__init__(name="GPUSampler")
+        pynvml.nvmlInit()
+        self.deviceCount:int = pynvml.nvmlDeviceGetCount()
+        self.nvDevices:List  = [pynvml.nvmlDeviceGetHandleByIndex(idx) for idx in range(self.deviceCount)]
+        return
+    
+    def run(self) -> None:
+        return
+    
+    def sample(self) -> List[GPUSamplerState]:
+        samples = []
+        for gpu_idx in range(self.deviceCount):
+            gpu_handle = self.nvDevices[gpu_idx]
+            try:
+                sample = GPUSamplerState()
+                sample.gpu = pynvml.nvmlDeviceGetIndex(gpu_handle)
+                sample.name = pynvml.nvmlDeviceGetName(gpu_handle)
+                sample.sm = pynvml.nvmlDeviceGetUtilizationRates(gpu_handle).gpu
+                mem_info = pynvml.nvmlDeviceGetMemoryInfo(gpu_handle)
+                sample.totMem = mem_info.total
+                sample.usedMem = mem_info.used
+                sample.enc = pynvml.nvmlDeviceGetEncoderUtilization(gpu_handle)[0]
+                sample.dec = pynvml.nvmlDeviceGetDecoderUtilization(gpu_handle)[0]
+                sample.tmp = pynvml.nvmlDeviceGetTemperature(gpu_handle, pynvml.NVML_TEMPERATURE_GPU)
+                sample.fan = pynvml.nvmlDeviceGetFanSpeed(gpu_handle)
+                sample.usedPower = pynvml.nvmlDeviceGetPowerUsage(gpu_handle) / 1000.0
+                sample.totPower = pynvml.nvmlDeviceGetPowerManagementLimit(gpu_handle) / 1000.0
+                samples.append(sample)
+            except pynvml.NVMLError as e:
+                print(e)
+                pass
+        return samples
+
+    def close(self) -> None:
+        pynvml.nvmlShutdown()
+        return
--- a/eacgm/sampler/nvmlsampler.py
+++ b/eacgm/sampler/nvmlsampler.py
@@ -0,0 +1,57 @@
+import time
+import pynvml
+from typing import List
+
+from .base import BaseSamplerState, BaseSampler
+
+class NVMLSamplerState(BaseSamplerState):
+    def __init__(self) -> None:
+        super().__init__()
+        self.gpu:int = None
+        self.sm:int = None
+        self.mem:int = None
+        self.enc:int = None
+        self.dec:int = None
+        return
+    
+    def __repr__(self) -> str:
+        info = f"NVMLSamplerState {self.gpu} {self.sm} {self.mem} {self.enc} {self.dec} {super().__repr__()}"
+        return info
+
+class NVMLSampler(BaseSampler):
+    def __init__(self) -> None:
+        super().__init__(name="NVMLSampler")
+        pynvml.nvmlInit()
+        self.deviceCount:int = pynvml.nvmlDeviceGetCount()
+        self.nvDevices:List  = [pynvml.nvmlDeviceGetHandleByIndex(idx) for idx in range(self.deviceCount)]
+        return
+    
+    def run(self) -> None:
+        return
+    
+    def sample(self, time_stamp:float) -> List[NVMLSamplerState]:
+        samples = []
+        for gpu_idx in range(self.deviceCount):
+            gpu_handle = self.nvDevices[gpu_idx]
+            try:
+                processes = pynvml.nvmlDeviceGetProcessUtilization(gpu_handle, time.time_ns() // 1000 - 1000_000 * time_stamp)
+                for process in processes:
+                    state = NVMLSamplerState()
+                    state.task = None
+                    state.pid  = process.pid
+                    state.cpu  = None
+                    state.timestamp = process.timeStamp
+                    state.message   = None
+                    state.gpu = gpu_idx
+                    state.sm  = process.smUtil
+                    state.mem = process.memUtil
+                    state.enc = process.encUtil
+                    state.dec = process.decUtil
+                    samples.append(state)
+            except pynvml.NVMLError as e:
+                pass
+        return samples
+
+    def close(self) -> None:
+        pynvml.nvmlShutdown()
+        return
--- a/eacgm/webui/init.py
+++ b/eacgm/webui/init.py
@@ -0,0 +1,3 @@
+from reader import log_reader
+from connect import database
+from insert import push_log
--- a/eacgm/webui/connect.py
+++ b/eacgm/webui/connect.py
@@ -0,0 +1,19 @@
+# connect to mysql database
+import mysql.connector
+
+class database:
+    def __init__(self, ip, port, user, pwd, database) -> None:
+        self.conn = mysql.connector.connect(
+            host = ip,
+            port = port,
+            user = user,
+            password = pwd,
+            database = database
+        )
+        self.cursor = self.conn.cursor()
+        
+    def exec(self, cmd: str):
+        self.cursor.execute(cmd)
+        result = self.cursor.fetchall()
+        self.conn.commit()
+        return result
--- a/eacgm/webui/insert.py
+++ b/eacgm/webui/insert.py
@@ -0,0 +1,113 @@
+# insert data into mysql database
+import argparse
+from reader import log_reader
+from reader import log_reader
+from connect import database
+import time
+
+def get_col_num(db) -> int:
+    col_num = db.exec(
+            f"SELECT COUNT(*) FROM information_schema.COLUMNS where `TABLE_SCHEMA` = 'grafana' and `TABLE_NAME` = 'CudaEvent';"
+        )
+    col_num = col_num[0][0]
+    return col_num
+
+def lts_cuda_event(db) -> list:
+    """to get the latest cuda event before
+    """
+    ret = db.exec(f"SELECT * FROM grafana.`CudaEvent` ORDER BY time DESC LIMIT 1;")
+    # print(ret)
+    if len(ret) == 0:
+        col_num = get_col_num(db)
+        lts_event = [None] * (col_num - 1)
+    else:
+        lts_event = list(ret[0][1:])
+    return lts_event
+
+def lts_event_cnt(db) -> dict:
+    """to get the latest data of event count
+    """
+    ret = db.exec(
+        """SELECT * FROM grafana.events;"""
+    )
+    d = dict()
+    for name, cnt in ret:
+        d[name] = cnt
+    return d
+
+def add_col(db):
+    col_num = get_col_num(db)
+    db.exec(f"""ALTER TABLE grafana.`CudaEvent` ADD COLUMN event{col_num} CHAR(255)""")
+
+def del_col(db, col_num):
+    db.exec(f"""ALTER TABLE grafana.`CudaEvent` DROP COLUMN event{col_num};""")
+
+def add_empty(max_time, db):
+    col_num = get_col_num(db)
+    db.exec(f"""INSERT INTO grafana.`CudaEvent` VALUES ({max_time}, {','.join(['NULL'] * (col_num - 1))})""")
+    
+def push_log(db, log):
+    max_time = 0
+    ## latest cuda event
+    cuda_event = lts_cuda_event(db)
+    ## latest event cnt
+    event_cnt = lts_event_cnt(db)
+    cmd = f"INSERT INTO grafana.CudaEvent VALUES " 
+    for line_idx, l in enumerate(log):
+        if l['op'] == 'start':
+            if l['name'] in event_cnt:
+                event_cnt[l['name']] += 1
+            else:
+                event_cnt[l["name"]] = 1
+            empty_col = False
+            i = 0
+            for e in cuda_event:
+                if e is None:
+                    cuda_event[i] = l['name']
+                    empty_col = True
+                    break
+                i += 1
+            if not empty_col:
+                if len(cmd) > 37:
+                    cmd = cmd[:-1] + ";"
+                    # print(cmd)
+                    # print('------')
+                    db.exec(cmd)
+                    cmd = f"INSERT INTO grafana.CudaEvent VALUES "
+                add_col(db)
+                cuda_event.append(l['name'])
+        elif l['op'] == 'end':
+            if l['name'] in event_cnt:
+                if event_cnt[l["name"]] == 0:
+                    print(f"[!]: in line {line_idx + 1}: event {l['name']} ended more than starting")
+                    #raise ValueError(f"in line {line_idx + 1}: event {l['name']} ended more than starting")
+                    continue
+                event_cnt[l["name"]] -= 1
+                for i, e in enumerate(cuda_event[::-1]):
+                    if e == l["name"]:
+                        cuda_event[len(cuda_event)- 1 - i] = None
+                        break
+            if l["name"] not in event_cnt:
+                print(f"[!]: in line {line_idx + 1}: event {l['name']} ended without starting")
+                # raise ValueError(f"in line {line_idx + 1}: event {l['name']} ended without starting")
+                continue
+
+        else:
+            raise ValueError(f"in line {line_idx + 1}: unknown operation {l['op']}")
+        tmp_cmd = f"({l['time']}, "
+        max_time = max(max_time, float(l['time']))
+        for e in cuda_event:
+            if e is None:
+                tmp_cmd += "NULL, "
+            else:
+                tmp_cmd += f"'{e}', "
+        tmp_cmd = tmp_cmd[:-2] + "),"
+        cmd += tmp_cmd
+    if len(cmd) > 37:
+        cmd = cmd[:-1] + ";"
+        # print(cmd)
+        # print("------")
+        db.exec(cmd)
+    # print(cuda_event)
+    # print(event_cnt)
+    add_empty(max_time,db)
--- a/eacgm/webui/reader.py
+++ b/eacgm/webui/reader.py
@@ -0,0 +1,13 @@
+def log_reader(path):
+    with open(path, 'r') as f:
+        data = f.readlines()
+    for i, d in enumerate(data):
+        data[i] = d.strip().split(' ')
+    ret = []
+    for d in data:
+        tmp = dict()
+        tmp['time'] = d[3]
+        tmp['op'] = d[5]
+        tmp['name'] = d[6]
+        ret.append(tmp)
+    return ret
--- a/grafana/compose/docker-compose.yml
+++ b/grafana/compose/docker-compose.yml
@@ -0,0 +1,41 @@
+version: '2.1'
+services:
+  mysql:
+    build: 
+      context: ./mysql
+      dockerfile: dockerfile
+    ports:
+      - "3306:3306"
+    volumes:
+      - ../volumes/mysql/data:/var/lib/mysql
+    environment:
+      - "MYSQL_ROOT_PASSWORD=adminpwd"
+    container_name: gf-mysql
+    networks:
+      - gf-network
+  grafana:
+    build:
+      context: ./grafana
+      dockerfile: dockerfile
+    container_name: gf-grafana
+    ports:
+      - "3000:3000"
+    environment:
+      - "GF_SECURITY_ADMIN_PASSWORD=admin"
+    depends_on:
+      - mysql
+    networks:
+      - gf-network
+    links:
+      - mysql
+    
+
+networks:
+  gf-network:
+    driver: bridge
+    ipam:
+      driver: default
+      config:
+        - subnet: 192.168.114.0/24
+          gateway: 192.168.114.254
+    
--- a/grafana/compose/grafana/dockerfile
+++ b/grafana/compose/grafana/dockerfile
@@ -0,0 +1,2 @@
+FROM grafana/grafana
+COPY --chown=grafana:grafana grafana.db /var/lib/grafana/grafana.db
--- a/grafana/compose/grafana/grafana.db
+++ b/grafana/compose/grafana/grafana.db
--- a/grafana/compose/grafana/grafana.ini
+++ b/grafana/compose/grafana/grafana.ini
--- a/grafana/compose/mysql/dockerfile
+++ b/grafana/compose/mysql/dockerfile
@@ -0,0 +1,5 @@
+FROM mysql:8.0
+
+COPY ./init.sql /docker-entrypoint-initdb.d/
+
+EXPOSE 3306
--- a/grafana/compose/mysql/init.sql
+++ b/grafana/compose/mysql/init.sql
@@ -0,0 +1,62 @@
+CREATE USER 'node1' @'%' IDENTIFIED BY 'mysql114514';
+
+GRANT ALL PRIVILEGES ON *.* TO 'node1' @'%' WITH GRANT OPTION;
+
+FLUSH PRIVILEGES;
+-- grafana database
+CREATE DATABASE IF NOT EXISTS grafana;
+-- state timeline
+CREATE TABLE IF NOT EXISTS grafana.CudaEvent (
+    time DOUBLE,
+    event1 CHAR(255),
+    event2 CHAR(255),
+    event3 CHAR(255)
+);
+
+CREATE TABLE IF NOT EXISTS grafana.events (
+    name CHAR(255) PRIMARY KEY,
+    cnt INT
+);
+-- top
+CREATE Table IF NOT EXISTS grafana.gauge (
+    TIME DATETIME,
+    cpu DOUBLE,
+    mem DOUBLE,
+    gpu_load DOUBLE,
+    gpu_mem DOUBLE
+);
+
+CREATE TABLE IF NOT EXISTS grafana.memory (
+    TIME DATETIME,
+    total DOUBLE,
+    used DOUBLE
+);
+
+CREATE TABLE IF NOT EXISTS grafana.gpumem (
+    TIME DATETIME,
+    total DOUBLE,
+    used DOUBLE
+);
+
+CREATE TABLE IF NOT EXISTS grafana.diskio (
+    TIME DATETIME,
+    read_rate DOUBLE,
+    write_rate DOUBLE
+);
+
+CREATE TABLE IF NOT EXISTS grafana.netio (
+    TIME DATETIME,
+    send_rate DOUBLE,
+    recv_rate DOUBLE
+);
+-- ollamanet
+CREATE TABLE IF NOT EXISTS grafana.ollamanet (
+    time DATETIME,
+    request DOUBLE,
+    token DOUBLE
+);
+
+CREATE TABLE IF NOT EXISTS grafana.ipport (
+    ipport CHAR(255) PRIMARY KEY,
+    cnt INT
+);
--- a/grafana/launch.sh
+++ b/grafana/launch.sh
@@ -0,0 +1,9 @@
+sudo apt install tcpdump make 
+pip install mysql-connector-python psutil GPUtil 
+echo -e "\x1b[32m[+] Successfully installed required packages\x1b[0m"
+
+cd compose && docker-compose up -d && cd ..
+echo -e "\x1b[32m[+] Successfully launched docker containers gf-mysql and gf-grafana\x1b[0m"
+
+echo -e "\x1b[32m[+] grafana is now available at http://127.0.0.1:3000 \x1b[0m"
+echo -e "\x1b[32m[+] default username: admin, password: admin \x1b[0m"
--- a/grafana/service.sh
+++ b/grafana/service.sh
@@ -0,0 +1,8 @@
+cd src/ollamanet && make run && cd ../..
+echo -e "\x1b[32m[+] Successfully launched ollamanet\x1b[0m"
+
+cd src/top
+nohup python top.py > log/top.log 2>&1 &
+cd ../..
+
+echo -e "\x1b[32m[+] Successfully launched top\x1b[0m"
--- a/grafana/src/ollamanet/connect.py
+++ b/grafana/src/ollamanet/connect.py
@@ -0,0 +1,29 @@
+# connect to mysql database
+import mysql.connector
+
+class database:
+    def __init__(self, ip, port, user, pwd, database) -> None:
+        self.conn = mysql.connector.connect(
+            host = ip,
+            port = port,
+            user = user,
+            password = pwd,
+            database = database
+        )
+        self.cursor = self.conn.cursor()
+    def exec(self, cmd: str):
+        self.cursor.execute(cmd)
+        result = self.cursor.fetchall()
+        self.conn.commit()
+        return result
+
+
+
+if __name__ == '__main__':
+    db = database(
+        ip="127.0.0.1",
+        port=3306,
+        user="node1",
+        pwd="mysql114514",
+        database="grafana",
+    )
--- a/grafana/src/ollamanet/init_database.sql
+++ b/grafana/src/ollamanet/init_database.sql
@@ -0,0 +1,13 @@
+DROP Table IF EXISTS grafana.ollamanet;
+DROP TABLE if EXISTS grafana.ipport;
+CREATE TABLE IF NOT EXISTS grafana.ollamanet
+(
+    time DATETIME,
+    request DOUBLE,
+    token DOUBLE
+);
+CREATE TABLE IF NOT EXISTS grafana.ipport
+(
+    ipport CHAR(255) PRIMARY KEY,
+    cnt INT
+);
--- a/grafana/src/ollamanet/listen.sh
+++ b/grafana/src/ollamanet/listen.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+sudo tcpdump -i any port 11434 -n -l | awk '
+BEGIN {
+    start_time = systime()
+    packets = 0
+    inp = 0
+    out = 0
+}
+{
+    if ($3 == "In"){
+        inp++
+    }else{
+        out++
+    }
+    packets++
+    current_time = systime()
+    table[$5]++
+    dest = substr($7, 1, length($7) - 1)
+    table[dest]++
+    if (current_time - start_time >= 5) {
+        start_time = current_time
+        timestamp = strftime("%Y-%m-%d %H:%M:%S", current_time)
+        print current_time,  packets >> "trace.txt"
+        print inp, out >> "trace.txt"
+        
+        for (i in table) {
+            print i, table[i] >> "trace.txt"
+        }
+        print "---" >> "trace.txt"
+        fflush("trace.txt")
+        packets = 0
+        inp=0
+        out=0
+        delete table
+    }
+}' 
--- a/grafana/src/ollamanet/makefile
+++ b/grafana/src/ollamanet/makefile
@@ -0,0 +1,3 @@
+run:
+	nohup ./listen.sh > log/listen.log 2>&1 &
+	nohup python tailf.py > log/tailf.log 2>&1 &
--- a/grafana/src/ollamanet/tailf.py
+++ b/grafana/src/ollamanet/tailf.py
@@ -0,0 +1,85 @@
+import time
+import os
+import argparse
+from connect import database
+
+interval = 5
+max_time = 0
+
+def tail_f(args, db, filename):
+    with open(filename, 'r') as file:
+        # 移动文件指针到文件末尾
+        file.seek(0, 2)
+        global max_time
+        while True:
+            # 读取新行
+            line = file.readline()
+            
+            if not line:
+                time.sleep(1)  # 如果没有新行，暂停一秒后继续检查
+                ts = int(time.time())
+                if ts - max_time > interval:
+                    db.exec(f"""INSERT INTO {args.database}.ollamanet VALUES (NOW(), 0, 0)""")
+                    max_time = ts
+                continue
+            
+            yield line
+
+def main(db:database, args):
+    global interval, max_time
+    log_file = args.file
+    interval = args.interval
+    if not os.path.exists(log_file):
+        os.system(f"touch {log_file}")
+    buf = []
+    for line in tail_f(args, db, log_file):
+        line = line.strip()
+        if line.strip() == "---":
+            l0 = buf[0].split(' ')
+            ts = int(l0[0])
+            max_time = max(max_time, ts)
+            cnt = int(l0[1]) / interval
+            l1 = buf[1].split(' ')
+            recv = int(l1[0]) / interval
+            send = int(l1[1]) / interval
+            # print(f"{ts} {cnt} {recv} {send}")
+            # print(buf)
+            db.exec(f"""INSERT INTO {args.database}.ollamanet VALUES (NOW(), {recv}, {send});""")
+            i = 2
+            while i < len(buf) - 1:
+                l = buf[i].split(' ')
+                ipport = l[0]
+                ipport = ipport[:ipport.rfind('.')]
+                i += 1
+                if ipport == args.local:
+                    continue
+                cnt = int(l[1])
+                
+                all = db.exec(f"""SELECT cnt from {args.database}.ipport where ipport='{ipport}';""")
+                
+                if not all:
+                    all = cnt
+                    db.exec(f"""INSERT INTO {args.database}.ipport VALUES ('{ipport}', {cnt});""")
+                else:
+                    all = all[0][0]
+                    all += cnt
+                    db.exec(f"""UPDATE {args.database}.ipport SET cnt={all} where ipport='{ipport}';""")
+            buf = []
+            continue
+        buf.append(line)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--file', type=str, default='trace.txt', help='log file')
+    parser.add_argument('--interval', type=int, default=5, help='interval (s)')
+    parser.add_argument('--ip', type=str, default='127.0.0.1', help='ip')
+    parser.add_argument('--port', type=int, default=3306, help='port')
+    parser.add_argument('--user', type=str, default='node1', help='user')
+    parser.add_argument('--password', type=str, default='mysql114514', help='password')
+    parser.add_argument("--database", type=str, default="grafana", help="database")
+    parser.add_argument("--local", type=str, default="127.0.0.1.11434")
+    args = parser.parse_args()
+    db = database(args.ip, args.port, args.user, args.password, args.database)
+    main(db, args)
+            
--- a/grafana/src/ollamanet/trace.txt
+++ b/grafana/src/ollamanet/trace.txt
@@ -0,0 +1,60 @@
+1722135153 93
+34 59
+100.77.22.47.18099 6
+100.77.22.47.18188 87
+100.82.183.119.11434 93
+---
+1722135153 93
+34 59
+100.77.22.47.18099 1
+100.77.22.47.18188 92
+100.82.183.119.11434 93
+---
+1722135155 93
+33 60
+100.77.22.47.18188 93
+100.82.183.119.11434 93
+---
+1722135155 1
+1 0
+100.82.183.119.11434 1
+100.77.22.47.17946 1
+---
+1722135230 62
+23 39
+100.82.183.119.11434 62
+100.77.22.47.17946 62
+---
+1722135230 93
+36 57
+100.77.22.47.18188 19
+100.82.183.119.11434 93
+100.77.22.47.17946 74
+---
+1722135230 93
+35 58
+100.77.22.47.18188 14
+100.82.183.119.11434 93
+100.77.22.47.17946 79
+---
+1722135231 94
+35 59
+100.77.22.47.18188 12
+100.82.183.119.11434 94
+100.77.22.47.17946 82
+---
+1722137005 1
+1 0
+100.82.183.119.11434 1
+100.77.22.47.8112 1
+---
+1722137235 126
+44 82
+100.82.183.119.11434 126
+100.77.22.47.8112 126
+---
+1722137236 1
+1 0
+100.82.183.119.11434 1
+100.77.22.47.55880 1
+---
--- a/grafana/src/state_timeline/connect.py
+++ b/grafana/src/state_timeline/connect.py
@@ -0,0 +1,29 @@
+# connect to mysql database
+import mysql.connector
+
+class database:
+    def __init__(self, ip, port, user, pwd, database) -> None:
+        self.conn = mysql.connector.connect(
+            host = ip,
+            port = port,
+            user = user,
+            password = pwd,
+            database = database
+        )
+        self.cursor = self.conn.cursor()
+    def exec(self, cmd: str):
+        self.cursor.execute(cmd)
+        result = self.cursor.fetchall()
+        self.conn.commit()
+        return result
+
+
+
+if __name__ == '__main__':
+    db = database(
+        ip="127.0.0.1",
+        port=3306,
+        user="node1",
+        pwd="mysql114514",
+        database="grafana",
+    )
--- a/grafana/src/state_timeline/init_database.sql
+++ b/grafana/src/state_timeline/init_database.sql
@@ -0,0 +1,14 @@
+-- WARNING: database grafana will be cleared if exists
+DROP DATABASE IF EXISTS grafana;
+CREATE DATABASE IF NOT EXISTS grafana;
+drop TABLE if EXISTS grafana.CudaEvent;
+CREATE TABLE IF NOT EXISTS grafana.CudaEvent(
+    time DOUBLE,
+    event1 CHAR(255),
+    event2 CHAR(255),
+    event3 CHAR(255)
+);
+CREATE TABLE IF NOT EXISTS grafana.events(
+    name CHAR(255) PRIMARY KEY,
+    cnt INT
+);
--- a/grafana/src/state_timeline/insert_data.py
+++ b/grafana/src/state_timeline/insert_data.py
@@ -0,0 +1,120 @@
+# insert data into mysql database
+import argparse
+from log_reader import reader, ollama_reader
+from log_reader import reader
+from connect import database
+import time
+
+def get_col_num(db) -> int:
+    col_num = db.exec(
+            f"SELECT COUNT(*) FROM information_schema.COLUMNS where `TABLE_SCHEMA` = 'grafana' and `TABLE_NAME` = 'CudaEvent';"
+        )
+    col_num = col_num[0][0]
+    return col_num
+
+def lts_cuda_event(db) -> list:
+    """to get the latest cuda event before
+    """
+    ret = db.exec(f"SELECT * FROM grafana.`CudaEvent` ORDER BY time DESC LIMIT 1;")
+    # print(ret)
+    if len(ret) == 0:
+        col_num = get_col_num(db)
+        lts_event = [None] * (col_num - 1)
+    else:
+        lts_event = list(ret[0][1:])
+    return lts_event
+
+def lts_event_cnt(db) -> dict:
+    """to get the latest data of event count
+    """
+    ret = db.exec(
+        """SELECT * FROM grafana.events;"""
+    )
+    d = dict()
+    for name, cnt in ret:
+        d[name] = cnt
+    return d
+
+def add_col(db):
+    col_num = get_col_num(db)
+    db.exec(f"""ALTER TABLE grafana.`CudaEvent` ADD COLUMN event{col_num} CHAR(255)""")
+
+def del_col(db, col_num):
+    db.exec(f"""ALTER TABLE grafana.`CudaEvent` DROP COLUMN event{col_num};""")
+
+def add_empty(max_time, db):
+    col_num = get_col_num(db)
+    db.exec(f"""INSERT INTO grafana.`CudaEvent` VALUES ({max_time}, {','.join(['NULL'] * (col_num - 1))})""")
+def push_log(db, log):
+    max_time = 0
+    ## latest cuda event
+    cuda_event = lts_cuda_event(db)
+    ## latest event cnt
+    event_cnt = lts_event_cnt(db)
+    cmd = f"INSERT INTO grafana.CudaEvent VALUES " 
+    for line_idx, l in enumerate(log):
+        if l['op'] == 'start':
+            if l['name'] in event_cnt:
+                event_cnt[l['name']] += 1
+            else:
+                event_cnt[l["name"]] = 1
+            empty_col = False
+            i = 0
+            for e in cuda_event:
+                if e is None:
+                    cuda_event[i] = l['name']
+                    empty_col = True
+                    break
+                i += 1
+            if not empty_col:
+                if len(cmd) > 37:
+                    cmd = cmd[:-1] + ";"
+                    # print(cmd)
+                    # print('------')
+                    db.exec(cmd)
+                    cmd = f"INSERT INTO grafana.CudaEvent VALUES "
+                add_col(db)
+                cuda_event.append(l['name'])
+        elif l['op'] == 'end':
+            if l['name'] in event_cnt:
+                if event_cnt[l["name"]] == 0:
+                    print(f"[!]: in line {line_idx + 1}: event {l['name']} ended more than starting")
+                    #raise ValueError(f"in line {line_idx + 1}: event {l['name']} ended more than starting")
+                    continue
+                event_cnt[l["name"]] -= 1
+                for i, e in enumerate(cuda_event[::-1]):
+                    if e == l["name"]:
+                        cuda_event[len(cuda_event)- 1 - i] = None
+                        break
+            if l["name"] not in event_cnt:
+                print(f"[!]: in line {line_idx + 1}: event {l['name']} ended without starting")
+                # raise ValueError(f"in line {line_idx + 1}: event {l['name']} ended without starting")
+                continue
+
+        else:
+            raise ValueError(f"in line {line_idx + 1}: unknown operation {l['op']}")
+        tmp_cmd = f"({l['time']}, "
+        max_time = max(max_time, float(l['time']))
+        for e in cuda_event:
+            if e is None:
+                tmp_cmd += "NULL, "
+            else:
+                tmp_cmd += f"'{e}', "
+        tmp_cmd = tmp_cmd[:-2] + "),"
+        cmd += tmp_cmd
+    if len(cmd) > 37:
+        cmd = cmd[:-1] + ";"
+        # print(cmd)
+        # print("------")
+        db.exec(cmd)
+    # print(cuda_event)
+    # print(event_cnt)
+    add_empty(max_time,db)
+
+def main(ip:str="127.0.0.1", port:int=3306, user:str="node1", pwd:str="mysql114514", data_base:str="grafana", table:str="CudaEvent", log_file:str="log/transformer.log"):
+    log = reader(log_file)
+    db = database(ip, port, user, pwd, data_base)
+    push_log(db, log)
+
+if __name__ == '__main__':
+    main()
--- a/grafana/src/state_timeline/log/trace_ollama.txt
+++ b/grafana/src/state_timeline/log/trace_ollama.txt
--- a/grafana/src/state_timeline/log_reader.py
+++ b/grafana/src/state_timeline/log_reader.py
@@ -0,0 +1,31 @@
+def reader(path):
+    with open(path, 'r') as f:
+        data = f.readlines()
+    for i, d in enumerate(data):
+        data[i] = d.strip().split(' ')
+    ret = []
+    for d in data:
+        tmp = dict()
+        tmp['time'] = d[3]
+        tmp['op'] = d[5]
+        tmp['name'] = d[6]
+        ret.append(tmp)
+    return ret
+
+def ollama_reader(path):
+    with open(path, 'r') as f:
+        data = f.readlines()
+    for i, d in enumerate(data):
+        data[i] = d.strip().split(' ')
+    ret = []
+    for d in data:
+        tmp = dict()
+        tmp['time'] = d[0]
+        tmp['op'] = "start" if d[2] == "B" else "end"
+        tmp['name'] = d[3]
+        ret.append(tmp)
+    return ret
+
+if __name__ == '__main__':
+    data = reader('log/transformer.log')
+    print(data)
--- a/grafana/src/top/connect.py
+++ b/grafana/src/top/connect.py
@@ -0,0 +1,29 @@
+# connect to mysql database
+import mysql.connector
+
+class database:
+    def __init__(self, ip, port, user, pwd, database) -> None:
+        self.conn = mysql.connector.connect(
+            host = ip,
+            port = port,
+            user = user,
+            password = pwd,
+            database = database
+        )
+        self.cursor = self.conn.cursor()
+    def exec(self, cmd: str):
+        self.cursor.execute(cmd)
+        result = self.cursor.fetchall()
+        self.conn.commit()
+        return result
+
+
+
+if __name__ == '__main__':
+    db = database(
+        ip="127.0.0.1",
+        port=3306,
+        user="node1",
+        pwd="mysql114514",
+        database="grafana",
+    )
--- a/grafana/src/top/init_database.sql
+++ b/grafana/src/top/init_database.sql
@@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS grafana.gauge;
+DROP TABLE IF EXISTS grafana.memory;
+DROP TABLE IF EXISTS grafana.gpumem;
+DROP TABLE IF EXISTS grafana.diskio;
+DROP TABLE IF EXISTS grafana.netio;
+CREATE Table IF NOT EXISTS grafana.gauge(
+    TIME DATETIME,
+    cpu DOUBLE,
+    mem DOUBLE,
+    gpu_load DOUBLE,
+    gpu_mem DOUBLE
+);
+CREATE TABLE IF NOT EXISTS grafana.memory(
+    TIME DATETIME,
+    total DOUBLE,
+    used DOUBLE
+);
+CREATE TABLE IF NOT EXISTS grafana.gpumem(
+    TIME DATETIME,
+    total DOUBLE,
+    used DOUBLE
+);
+CREATE TABLE IF NOT EXISTS grafana.diskio(
+    TIME DATETIME,
+    read_rate DOUBLE,
+    write_rate DOUBLE
+);
+CREATE TABLE IF NOT EXISTS grafana.netio(
+    TIME DATETIME,
+    send_rate DOUBLE,
+    recv_rate DOUBLE
+);
--- a/grafana/src/top/top.py
+++ b/grafana/src/top/top.py
@@ -0,0 +1,96 @@
+import psutil
+from connect import database
+import GPUtil
+from time import sleep
+from time import time
+
+def avg(lst):
+    return sum(lst) / len(lst)
+# print(f"{avg(psutil.cpu_percent(interval=0.5, percpu=True))}%")
+def get_cpu_percent():
+    return avg(psutil.cpu_percent(interval=0.5, percpu=True))
+def get_mem_percent():
+    return psutil.virtual_memory().percent
+
+def get_mem_total():
+    return psutil.virtual_memory().total/(1024*1024)
+
+def get_mem_used():
+    return psutil.virtual_memory().used/(1024*1024)
+
+disk_io_start = psutil.disk_io_counters()
+last_time = time()
+def get_disk_io_rate():
+    global disk_io_start, last_time
+    disk_io_end = psutil.disk_io_counters()
+    current_time = time()
+    read_bytes = disk_io_end.read_bytes - disk_io_start.read_bytes
+    write_bytes = disk_io_end.write_bytes - disk_io_start.write_bytes
+
+    read_rate = read_bytes / (current_time - last_time)
+    write_rate = write_bytes / (current_time - last_time)
+
+    disk_io_start = disk_io_end
+    last_time = current_time
+    return read_rate, write_rate
+
+net_io_start = psutil.net_io_counters()
+last_time_net = time()
+def get_network_traffic():
+    global net_io_start, last_time_net
+    net_io_end = psutil.net_io_counters()
+    current_time = time()
+    send_bytes = net_io_end.bytes_sent - net_io_start.bytes_sent
+    recv_bytes = net_io_end.bytes_recv - net_io_start.bytes_recv
+    
+    send_rate = send_bytes / (current_time - last_time_net)
+    recv_rate = recv_bytes / (current_time - last_time_net)
+    
+    net_io_start = net_io_end
+    last_time_net = current_time
+    return send_rate, recv_rate
+
+def get_gpu():
+    """
+    Returns: gpu load, gpu memory percentage, gpu memory used, gpu memory total, gpu temperature
+    """
+    GPUs = GPUtil.getGPUs()
+    if len(GPUs) == 0:
+        return 0, 0
+    else:
+        return GPUs[0].load, GPUs[0].memoryUtil, GPUs[0].memoryUsed, GPUs[0].memoryTotal, GPUs[0].temperature
+    
+def main(ip:str="127.0.0.1", port:int=3306, user:str="node1", pwd:str="mysql114514", data_base:str="grafana", log_file:str="log/transformer.log", flush:int=10):
+    db = database(
+        ip=ip,
+        port=port,
+        user=user,
+        pwd=pwd,
+        database=data_base,
+    )
+    while True:
+        cpu_percent = get_cpu_percent()
+        mem_percent = get_mem_percent()
+        gpu_load, gpu_mem_percent, gpu_mem_used, gpu_mem_total, gpu_temp = get_gpu()
+        db.exec(
+            f"""INSERT INTO {data_base}.gauge (time, cpu, mem, gpu_load, gpu_mem) VALUES (NOW(), {cpu_percent}, {mem_percent}, {gpu_load}, {gpu_mem_percent});"""
+        )
+        db.exec(
+            f"""INSERT INTO {data_base}.memory (time, total, used) VALUES (NOW(), {get_mem_total()}, {get_mem_used()});"""
+        )
+        db.exec(
+            f"""INSERT INTO {data_base}.gpumem (time, total, used) VALUES (NOW(), {gpu_mem_total}, {gpu_mem_used});"""
+        )
+        sleep(flush)
+        read_rate, write_rate = get_disk_io_rate()
+        db.exec(
+            f"""INSERT INTO {data_base}.diskio (time, read_rate, write_rate) VALUES (NOW(), {read_rate / 1024/1024}, {write_rate / 1024/1024});"""
+        )
+        send_rate, recv_rate = get_network_traffic()
+        db.exec(
+            f"""INSERT INTO {data_base}.netio (time, send_rate, recv_rate) VALUES (NOW(), {send_rate / 1024/1024}, {recv_rate / 1024/1024});"""
+        )
+
+
+if __name__ == "__main__":
+    main()
--- a/grafana/stop.sh
+++ b/grafana/stop.sh
@@ -0,0 +1,45 @@
+cd compose
+docker-compose stop
+echo -e "\x1b[32m[+]Successfully stopped docker containers gf-mysql and gf-grafana\x1b[0m"
+
+PROCESS_NAME="tailf.py"
+PIDS=$(ps aux | grep $PROCESS_NAME | grep -v grep | awk '{print $2}')
+
+# 检查是否找到了进程
+if [ -z "$PIDS" ]; then
+  echo "Unable to find $PROCESS_NAME"
+fi
+
+# 杀死找到的进程
+for PID in $PIDS; do
+  sudo kill $PID
+  echo "Killed process $PID#$PROCESS_NAME"
+done
+
+PROCESS_NAME="listen.sh"
+PIDS=$(ps aux | grep $PROCESS_NAME | grep -v grep | awk '{print $2}')
+
+# 检查是否找到了进程
+if [ -z "$PIDS" ]; then
+  echo "Unable to find $PROCESS_NAME"
+fi
+
+# 杀死找到的进程
+for PID in $PIDS; do
+  sudo kill $PID
+  echo "Killed process $PID#$PROCESS_NAME"
+done
+
+PROCESS_NAME="top.py"
+PIDS=$(ps aux | grep $PROCESS_NAME | grep -v grep | awk '{print $2}')
+
+# 检查是否找到了进程
+if [ -z "$PIDS" ]; then
+  echo "Unable to find $PROCESS_NAME"
+fi
+
+# 杀死找到的进程
+for PID in $PIDS; do
+  sudo kill $PID
+  echo "Killed process $PID#$PROCESS_NAME"
+done
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "eACGM"
+version = "0.1.0"
+description = "eACGM: An eBPF-based Automated Comprehensive Governance and Monitoring framework for AI/ML systems. (IWQoS 2025)"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "bcc>=0.1.10",
+    "pymysql>=1.1.1",
+    "pynvml>=12.0.0",
+    "tqdm>=4.67.1",
+]