Go死锁检测与预防 - Golang高级面试题

死锁是并发编程中的经典问题，Go提供了运行时死锁检测和多种预防机制。本章深入探讨死锁的检测方法、预防策略和诊断技术。

📋 重点面试题

面试题 1：Go运行时死锁检测机制

难度级别：⭐⭐⭐⭐⭐
考察范围：并发编程/运行时机制
技术标签：deadlock detection runtime goroutine blocking concurrency debugging

详细解答

1. Go运行时死锁检测原理

基础实现实际应用

package main

import (
    "context"
    "fmt"
    "runtime"
    "sync"
    "time"
)

func demonstrateGoDeadlockDetection() {
    fmt.Println("=== Go运行时死锁检测机制 ===")
    
    /*
    Go运行时死锁检测原理：
    
    1. 检测条件：
       - 所有goroutine都处于阻塞状态
       - 没有可运行的goroutine
       - 系统无法继续进行
    
    2. 检测时机：
       - 调度器无法找到可运行的goroutine
       - 所有P都处于空闲状态
       - 系统陷入完全停滞
    
    3. 报告信息：
       - "fatal error: all goroutines are asleep - deadlock!"
       - 打印所有goroutine的栈跟踪
       - 显示阻塞位置和原因
    
    4. 限制：
       - 只能检测全局死锁
       - 无法检测部分goroutine死锁
       - 不检测活锁（livelock）
    */
    
    fmt.Println("Go运行时死锁检测特点：")
    fmt.Println("1. 自动检测全局死锁")
    fmt.Println("2. 提供详细的goroutine堆栈信息")
    fmt.Println("3. 只在所有goroutine阻塞时触发")
    fmt.Println("4. 无法检测局部死锁和活锁")
    
    // 演示不同类型的死锁
    demonstrateDeadlockTypes()
    
    // 演示死锁检测的限制
    demonstrateDetectionLimitations()
    
    // 演示死锁信息解读
    demonstrateDeadlockInformation()
}

func demonstrateDeadlockTypes() {
    fmt.Println("\n--- 不同类型的死锁 ---")
    
    // 注意：以下代码片段用于说明，实际运行会导致死锁
    fmt.Println("1. Channel死锁示例:")
    fmt.Println("```go")
    fmt.Println("func channelDeadlock() {")
    fmt.Println("    ch := make(chan int)")
    fmt.Println("    ch <- 42  // 阻塞：无缓冲channel无接收者")
    fmt.Println("}")
    fmt.Println("```")
    
    fmt.Println("\n2. Mutex死锁示例:")
    fmt.Println("```go")
    fmt.Println("func mutexDeadlock() {")
    fmt.Println("    var mu sync.Mutex")
    fmt.Println("    mu.Lock()")
    fmt.Println("    mu.Lock()  // 死锁：同一goroutine重复加锁")
    fmt.Println("}")
    fmt.Println("```")
    
    fmt.Println("\n3. 循环等待死锁示例:")
    fmt.Println("```go")
    fmt.Println("func cyclicDeadlock() {")
    fmt.Println("    var mu1, mu2 sync.Mutex")
    fmt.Println("    go func() {")
    fmt.Println("        mu1.Lock(); mu2.Lock()  // 顺序1")
    fmt.Println("        defer mu1.Unlock(); defer mu2.Unlock()")
    fmt.Println("    }()")
    fmt.Println("    go func() {")
    fmt.Println("        mu2.Lock(); mu1.Lock()  // 顺序2：相反")
    fmt.Println("        defer mu2.Unlock(); defer mu1.Unlock()")
    fmt.Println("    }()")
    fmt.Println("}")
    fmt.Println("```")
    
    // 安全演示：使用timeout避免真正的死锁
    demonstrateSafeDeadlockExamples()
}

func demonstrateSafeDeadlockExamples() {
    fmt.Println("\n安全死锁演示（使用超时）:")
    
    // 1. Channel死锁模拟
    func() {
        defer func() {
            if r := recover(); r != nil {
                fmt.Printf("Channel死锁恢复: %v\n", r)
            }
        }()
        
        done := make(chan bool, 1)
        go func() {
            ch := make(chan int)
            select {
            case ch <- 42:
                fmt.Println("发送成功")
            case <-time.After(100 * time.Millisecond):
                fmt.Println("检测到channel死锁模式")
                done <- true
            }
        }()
        <-done
    }()
    
    // 2. Mutex死锁模拟
    func() {
        var mu sync.Mutex
        timeout := time.NewTimer(100 * time.Millisecond)
        defer timeout.Stop()
        
        mu.Lock()
        go func() {
            select {
            case <-timeout.C:
                fmt.Println("检测到mutex重入死锁模式")
            }
        }()
        
        // 模拟尝试重入
        locked := make(chan bool, 1)
        go func() {
            mu.Lock() // 这会阻塞
            mu.Unlock()
            locked <- true
        }()
        
        select {
        case <-locked:
            fmt.Println("获取锁成功")
        case <-timeout.C:
            fmt.Println("Mutex重入超时，避免死锁")
        }
        
        mu.Unlock()
    }()
    
    // 3. 循环等待模拟
    func() {
        var mu1, mu2 sync.Mutex
        var wg sync.WaitGroup
        
        wg.Add(2)
        
        // Goroutine 1
        go func() {
            defer wg.Done()
            mu1.Lock()
            defer mu1.Unlock()
            
            fmt.Println("Goroutine 1: 获得锁1，尝试获取锁2")
            
            // 使用超时避免真正死锁
            timeout := time.NewTimer(50 * time.Millisecond)
            defer timeout.Stop()
            
            locked := make(chan bool, 1)
            go func() {
                mu2.Lock()
                mu2.Unlock()
                locked <- true
            }()
            
            select {
            case <-locked:
                fmt.Println("Goroutine 1: 成功获取锁2")
            case <-timeout.C:
                fmt.Println("Goroutine 1: 获取锁2超时")
            }
        }()
        
        // Goroutine 2
        go func() {
            defer wg.Done()
            time.Sleep(10 * time.Millisecond) // 稍作延迟
            
            mu2.Lock()
            defer mu2.Unlock()
            
            fmt.Println("Goroutine 2: 获得锁2，尝试获取锁1")
            
            timeout := time.NewTimer(50 * time.Millisecond)
            defer timeout.Stop()
            
            locked := make(chan bool, 1)
            go func() {
                mu1.Lock()
                mu1.Unlock()
                locked <- true
            }()
            
            select {
            case <-locked:
                fmt.Println("Goroutine 2: 成功获取锁1")
            case <-timeout.C:
                fmt.Println("Goroutine 2: 获取锁1超时，避免循环等待")
            }
        }()
        
        wg.Wait()
    }()
}

func demonstrateDetectionLimitations() {
    fmt.Println("\n--- 死锁检测的限制 ---")
    
    fmt.Println("Go运行时死锁检测的限制：")
    
    // 1. 只检测全局死锁
    fmt.Println("\n1. 只检测全局死锁:")
    fmt.Println("   - 必须所有goroutine都阻塞")
    fmt.Println("   - 部分goroutine死锁不会被检测")
    
    // 2. 不检测活锁
    fmt.Println("\n2. 不检测活锁:")
    fmt.Println("   - goroutine在运行但无进展")
    fmt.Println("   - 需要外部监控检测")
    
    // 3. 不检测资源泄漏
    fmt.Println("\n3. 不检测资源泄漏:")
    fmt.Println("   - goroutine泄漏")
    fmt.Println("   - 资源未释放")
    
    // 演示部分死锁场景
    demonstratePartialDeadlock()
    
    // 演示活锁场景
    demonstrateLivelock()
}

func demonstratePartialDeadlock() {
    fmt.Println("\n部分死锁演示（不会被检测）:")
    
    var wg sync.WaitGroup
    ch1 := make(chan int)
    ch2 := make(chan int)
    
    // 启动一个正常工作的goroutine
    wg.Add(1)
    go func() {
        defer wg.Done()
        for i := 0; i < 3; i++ {
            fmt.Printf("工作goroutine: %d\n", i)
            time.Sleep(100 * time.Millisecond)
        }
    }()
    
    // 启动两个相互等待的goroutine（部分死锁）
    go func() {
        fmt.Println("Goroutine A: 等待channel 1")
        <-ch1 // 永远等待
        fmt.Println("Goroutine A: 完成")
    }()
    
    go func() {
        fmt.Println("Goroutine B: 等待channel 2")
        <-ch2 // 永远等待
        fmt.Println("Goroutine B: 完成")
    }()
    
    // 等待正常goroutine完成
    wg.Wait()
    
    fmt.Println("主goroutine完成，但A和B仍在等待（部分死锁）")
    fmt.Printf("当前goroutine数量: %d\n", runtime.NumGoroutine())
}

func demonstrateLivelock() {
    fmt.Println("\n活锁演示:")
    
    var wg sync.WaitGroup
    ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
    defer cancel()
    
    counter1 := 0
    counter2 := 0
    
    // 两个goroutine相互让步，导致活锁
    wg.Add(2)
    
    go func() {
        defer wg.Done()
        for {
            select {
            case <-ctx.Done():
                fmt.Printf("Goroutine 1 退出，计数: %d\n", counter1)
                return
            default:
                counter1++
                if counter1%100 == 0 {
                    fmt.Printf("Goroutine 1 让步，计数: %d\n", counter1)
                    runtime.Gosched() // 主动让出CPU
                }
            }
        }
    }()
    
    go func() {
        defer wg.Done()
        for {
            select {
            case <-ctx.Done():
                fmt.Printf("Goroutine 2 退出，计数: %d\n", counter2)
                return
            default:
                counter2++
                if counter2%100 == 0 {
                    fmt.Printf("Goroutine 2 让步，计数: %d\n", counter2)
                    runtime.Gosched() // 主动让出CPU
                }
            }
        }
    }()
    
    wg.Wait()
    fmt.Println("活锁演示完成（goroutine在运行但可能无实际进展）")
}

func demonstrateDeadlockInformation() {
    fmt.Println("\n--- 死锁信息解读 ---")
    
    fmt.Println("典型的Go死锁报告格式:")
    fmt.Println("```")
    fmt.Println("fatal error: all goroutines are asleep - deadlock!")
    fmt.Println("")
    fmt.Println("goroutine 1 [chan send]:")
    fmt.Println("main.channelDeadlock()")
    fmt.Println("    /path/to/file.go:10 +0x50")
    fmt.Println("main.main()")
    fmt.Println("    /path/to/file.go:5 +0x20")
    fmt.Println("")
    fmt.Println("goroutine 2 [chan receive]:")
    fmt.Println("main.receiver()")
    fmt.Println("    /path/to/file.go:15 +0x30")
    fmt.Println("created by main.main")
    fmt.Println("    /path/to/file.go:8 +0x40")
    fmt.Println("```")
    
    fmt.Println("\n信息解读:")
    fmt.Println("1. 'fatal error' - 致命错误，程序退出")
    fmt.Println("2. 'all goroutines are asleep' - 所有goroutine都阻塞")
    fmt.Println("3. '[chan send]' - goroutine阻塞在channel发送")
    fmt.Println("4. '[chan receive]' - goroutine阻塞在channel接收")
    fmt.Println("5. 栈跟踪显示阻塞的具体位置")
    fmt.Println("6. 'created by' - 显示goroutine的创建位置")
    
    // 演示不同的阻塞状态
    demonstrateBlockingStates()
}

func demonstrateBlockingStates() {
    fmt.Println("\n常见的goroutine阻塞状态:")
    
    states := map[string]string{
        "chan send":       "阻塞在channel发送操作",
        "chan receive":    "阻塞在channel接收操作",
        "sync.Mutex.Lock": "阻塞在互斥锁获取",
        "sync.RWMutex.RLock": "阻塞在读写锁读锁获取",
        "sync.RWMutex.Lock":  "阻塞在读写锁写锁获取",
        "sync.WaitGroup.Wait": "阻塞在WaitGroup等待",
        "sync.Cond.Wait":     "阻塞在条件变量等待",
        "select":             "阻塞在select语句",
        "IO wait":            "阻塞在I/O操作",
        "semacquire":         "阻塞在信号量获取",
        "sleep":              "阻塞在time.Sleep",
    }
    
    for state, description := range states {
        fmt.Printf("  %-20s: %s\n", state, description)
    }
}

func demonstrateDeadlockPrevention() {
    fmt.Println("\n=== 死锁预防策略 ===")
    
    /*
    死锁预防的四个基本策略：
    
    1. 破坏互斥条件：
       - 使用无锁数据结构
       - 原子操作代替锁
    
    2. 破坏请求和保持条件：
       - 一次性获取所有资源
       - 释放已持有资源再重新申请
    
    3. 破坏不可剥夺条件：
       - 使用超时机制
       - 支持资源抢占
    
    4. 破坏循环等待条件：
       - 资源排序
       - 统一加锁顺序
    */
    
    // 演示各种预防策略
    demonstrateLockOrdering()
    demonstrateTimeoutMechanism()
    demonstrateResourceAcquisition()
    demonstrateLockFreeApproach()
}

func demonstrateLockOrdering() {
    fmt.Println("\n--- 锁排序预防死锁 ---")
    
    // 错误方式：不一致的加锁顺序
    fmt.Println("错误的加锁顺序示例（会导致死锁）:")
    fmt.Println("```go")
    fmt.Println("func badLockOrder() {")
    fmt.Println("    go func() { mu1.Lock(); mu2.Lock() }()  // 顺序1")
    fmt.Println("    go func() { mu2.Lock(); mu1.Lock() }()  // 顺序2")
    fmt.Println("}")
    fmt.Println("```")
    
    // 正确方式：统一的加锁顺序
    type OrderedMutex struct {
        id int
        mu sync.Mutex
    }
    
    func lockInOrder(mutexes ...*OrderedMutex) func() {
        // 按ID排序以确保一致的加锁顺序
        sortedMutexes := make([]*OrderedMutex, len(mutexes))
        copy(sortedMutexes, mutexes)
        
        // 简单的排序（在实际应用中可以使用sort包）
        for i := 0; i < len(sortedMutexes); i++ {
            for j := i + 1; j < len(sortedMutexes); j++ {
                if sortedMutexes[i].id > sortedMutexes[j].id {
                    sortedMutexes[i], sortedMutexes[j] = sortedMutexes[j], sortedMutexes[i]
                }
            }
        }
        
        // 按顺序加锁
        for _, mutex := range sortedMutexes {
            mutex.mu.Lock()
        }
        
        // 返回解锁函数
        return func() {
            // 逆序解锁
            for i := len(sortedMutexes) - 1; i >= 0; i-- {
                sortedMutexes[i].mu.Unlock()
            }
        }
    }
    
    // 演示正确的加锁顺序
    mu1 := &OrderedMutex{id: 1}
    mu2 := &OrderedMutex{id: 2}
    mu3 := &OrderedMutex{id: 3}
    
    var wg sync.WaitGroup
    
    // 两个goroutine使用相同的加锁顺序
    wg.Add(2)
    
    go func() {
        defer wg.Done()
        fmt.Println("Goroutine 1: 开始获取锁（按顺序）")
        unlock := lockInOrder(mu3, mu1, mu2) // 参数顺序不重要
        defer unlock()
        
        fmt.Println("Goroutine 1: 获得所有锁，工作中...")
        time.Sleep(50 * time.Millisecond)
        fmt.Println("Goroutine 1: 完成工作")
    }()
    
    go func() {
        defer wg.Done()
        time.Sleep(10 * time.Millisecond)
        
        fmt.Println("Goroutine 2: 开始获取锁（按顺序）")
        unlock := lockInOrder(mu2, mu3, mu1) // 参数顺序不重要
        defer unlock()
        
        fmt.Println("Goroutine 2: 获得所有锁，工作中...")
        time.Sleep(50 * time.Millisecond)
        fmt.Println("Goroutine 2: 完成工作")
    }()
    
    wg.Wait()
    fmt.Println("锁排序演示完成，无死锁")
}

func demonstrateTimeoutMechanism() {
    fmt.Println("\n--- 超时机制预防死锁 ---")
    
    type TimeoutMutex struct {
        mu   sync.Mutex
        name string
    }
    
    func (tm *TimeoutMutex) TryLock(timeout time.Duration) bool {
        done := make(chan bool, 1)
        
        go func() {
            tm.mu.Lock()
            done <- true
        }()
        
        select {
        case <-done:
            return true
        case <-time.After(timeout):
            return false
        }
    }
    
    func (tm *TimeoutMutex) Unlock() {
        tm.mu.Unlock()
    }
    
    // 使用超时机制的安全操作
    safeOperation := func(mu1, mu2 *TimeoutMutex, timeout time.Duration) bool {
        // 尝试获取第一个锁
        if !mu1.TryLock(timeout) {
            fmt.Printf("获取锁 %s 超时\n", mu1.name)
            return false
        }
        defer mu1.Unlock()
        
        fmt.Printf("获得锁 %s\n", mu1.name)
        
        // 尝试获取第二个锁
        if !mu2.TryLock(timeout) {
            fmt.Printf("获取锁 %s 超时\n", mu2.name)
            return false
        }
        defer mu2.Unlock()
        
        fmt.Printf("获得锁 %s\n", mu2.name)
        
        // 执行需要两个锁的操作
        fmt.Printf("执行需要 %s 和 %s 的操作\n", mu1.name, mu2.name)
        time.Sleep(30 * time.Millisecond)
        
        return true
    }
    
    mu1 := &TimeoutMutex{name: "mutex1"}
    mu2 := &TimeoutMutex{name: "mutex2"}
    
    var wg sync.WaitGroup
    
    // 两个goroutine尝试以不同顺序获取锁
    wg.Add(2)
    
    go func() {
        defer wg.Done()
        fmt.Println("Goroutine 1: 尝试 mutex1 -> mutex2")
        if safeOperation(mu1, mu2, 100*time.Millisecond) {
            fmt.Println("Goroutine 1: 操作成功")
        } else {
            fmt.Println("Goroutine 1: 操作失败，避免了死锁")
        }
    }()
    
    go func() {
        defer wg.Done()
        time.Sleep(20 * time.Millisecond)
        
        fmt.Println("Goroutine 2: 尝试 mutex2 -> mutex1")
        if safeOperation(mu2, mu1, 100*time.Millisecond) {
            fmt.Println("Goroutine 2: 操作成功")
        } else {
            fmt.Println("Goroutine 2: 操作失败，避免了死锁")
        }
    }()
    
    wg.Wait()
    fmt.Println("超时机制演示完成")
}

func demonstrateResourceAcquisition() {
    fmt.Println("\n--- 资源一次性获取 ---")
    
    type Resource struct {
        id     int
        inUse  bool
        mu     sync.Mutex
        name   string
    }
    
    func (r *Resource) TryAcquire() bool {
        r.mu.Lock()
        defer r.mu.Unlock()
        
        if r.inUse {
            return false
        }
        
        r.inUse = true
        return true
    }
    
    func (r *Resource) Release() {
        r.mu.Lock()
        defer r.mu.Unlock()
        r.inUse = false
    }
    
    type ResourceManager struct {
        resources []*Resource
        mu        sync.Mutex
    }
    
    func NewResourceManager() *ResourceManager {
        return &ResourceManager{
            resources: []*Resource{
                {id: 1, name: "resource1"},
                {id: 2, name: "resource2"},
                {id: 3, name: "resource3"},
            },
        }
    }
    
    // 一次性获取多个资源
    func (rm *ResourceManager) AcquireAll(ids []int) ([]*Resource, bool) {
        rm.mu.Lock()
        defer rm.mu.Unlock()
        
        var acquired []*Resource
        
        // 检查所有资源是否可用
        for _, id := range ids {
            for _, resource := range rm.resources {
                if resource.id == id {
                    if resource.inUse {
                        // 释放已获取的资源
                        for _, res := range acquired {
                            res.inUse = false
                        }
                        return nil, false
                    }
                    acquired = append(acquired, resource)
                    break
                }
            }
        }
        
        // 一次性获取所有资源
        for _, resource := range acquired {
            resource.inUse = true
        }
        
        return acquired, true
    }
    
    func (rm *ResourceManager) ReleaseAll(resources []*Resource) {
        rm.mu.Lock()
        defer rm.mu.Unlock()
        
        for _, resource := range resources {
            resource.inUse = false
        }
    }
    
    // 演示一次性资源获取
    manager := NewResourceManager()
    var wg sync.WaitGroup
    
    wg.Add(2)
    
    go func() {
        defer wg.Done()
        fmt.Println("Goroutine 1: 尝试获取资源 [1, 2]")
        
        if resources, ok := manager.AcquireAll([]int{1, 2}); ok {
            fmt.Println("Goroutine 1: 成功获取资源 [1, 2]")
            defer manager.ReleaseAll(resources)
            
            time.Sleep(100 * time.Millisecond)
            fmt.Println("Goroutine 1: 完成工作，释放资源")
        } else {
            fmt.Println("Goroutine 1: 获取资源失败")
        }
    }()
    
    go func() {
        defer wg.Done()
        time.Sleep(50 * time.Millisecond)
        
        fmt.Println("Goroutine 2: 尝试获取资源 [2, 3]")
        
        if resources, ok := manager.AcquireAll([]int{2, 3}); ok {
            fmt.Println("Goroutine 2: 成功获取资源 [2, 3]")
            defer manager.ReleaseAll(resources)
            
            time.Sleep(100 * time.Millisecond)
            fmt.Println("Goroutine 2: 完成工作，释放资源")
        } else {
            fmt.Println("Goroutine 2: 获取资源失败，等待资源释放")
            
            // 等待后重试
            time.Sleep(150 * time.Millisecond)
            if resources, ok := manager.AcquireAll([]int{2, 3}); ok {
                fmt.Println("Goroutine 2: 重试成功获取资源 [2, 3]")
                defer manager.ReleaseAll(resources)
                
                time.Sleep(50 * time.Millisecond)
                fmt.Println("Goroutine 2: 完成工作，释放资源")
            }
        }
    }()
    
    wg.Wait()
    fmt.Println("资源一次性获取演示完成")
}

func demonstrateLockFreeApproach() {
    fmt.Println("\n--- 无锁方法预防死锁 ---")
    
    import "sync/atomic"
    
    // 无锁计数器
    type LockFreeCounter struct {
        value int64
    }
    
    func (lfc *LockFreeCounter) Increment() int64 {
        return atomic.AddInt64(&lfc.value, 1)
    }
    
    func (lfc *LockFreeCounter) Get() int64 {
        return atomic.LoadInt64(&lfc.value)
    }
    
    // 无锁栈
    type LockFreeStack struct {
        head unsafe.Pointer
    }
    
    type node struct {
        value int
        next  unsafe.Pointer
    }
    
    func (lfs *LockFreeStack) Push(value int) {
        newNode := &node{value: value}
        
        for {
            head := atomic.LoadPointer(&lfs.head)
            newNode.next = head
            
            if atomic.CompareAndSwapPointer(&lfs.head, head, unsafe.Pointer(newNode)) {
                break
            }
        }
    }
    
    func (lfs *LockFreeStack) Pop() (int, bool) {
        for {
            head := atomic.LoadPointer(&lfs.head)
            if head == nil {
                return 0, false
            }
            
            headNode := (*node)(head)
            next := atomic.LoadPointer(&headNode.next)
            
            if atomic.CompareAndSwapPointer(&lfs.head, head, next) {
                return headNode.value, true
            }
        }
    }
    
    // 演示无锁数据结构
    counter := &LockFreeCounter{}
    stack := &LockFreeStack{}
    
    var wg sync.WaitGroup
    const numGoroutines = 10
    const numOperations = 1000
    
    wg.Add(numGoroutines)
    
    // 多个goroutine并发操作无锁数据结构
    for i := 0; i < numGoroutines; i++ {
        go func(id int) {
            defer wg.Done()
            
            for j := 0; j < numOperations; j++ {
                // 无锁计数器操作
                count := counter.Increment()
                
                // 无锁栈操作
                stack.Push(id*numOperations + j)
                
                if j%100 == 0 {
                    if value, ok := stack.Pop(); ok {
                        _ = value
                    }
                }
                
                if j%200 == 0 {
                    fmt.Printf("Goroutine %d: 当前计数 %d\n", id, count)
                }
            }
        }(i)
    }
    
    wg.Wait()
    
    finalCount := counter.Get()
    fmt.Printf("最终计数: %d (预期: %d)\n", finalCount, numGoroutines*numOperations)
    
    // 清空栈
    poppedCount := 0
    for {
        if _, ok := stack.Pop(); !ok {
            break
        }
        poppedCount++
    }
    fmt.Printf("从栈中弹出 %d 个元素\n", poppedCount)
    fmt.Println("无锁方法演示完成，无死锁风险")
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447

面试题 3：死锁诊断和监控工具

难度级别：⭐⭐⭐⭐⭐
考察范围：故障诊断/系统监控
技术标签：deadlock diagnosis monitoring debugging tools runtime analysis

详细解答

1. 死锁诊断工具和技术

func demonstrateDeadlockDiagnosis() {
    fmt.Println("\n=== 死锁诊断和监控 ===")
    
    /*
    死锁诊断工具和技术：
    
    1. 运行时信息：
       - runtime.Stack()：获取goroutine堆栈
       - runtime.NumGoroutine()：监控goroutine数量
       - pprof：性能分析和goroutine分析
    
    2. 自定义监控：
       - 超时检测
       - 资源使用监控
       - 死锁模式识别
    
    3. 第三方工具：
       - go-deadlock：增强的死锁检测
       - trace工具：执行跟踪分析
       - 监控系统集成
    */
    
    // 演示诊断工具
    demonstrateRuntimeDiagnosis()
    demonstrateCustomMonitoring()
    demonstrateAdvancedDiagnosis()
}

func demonstrateRuntimeDiagnosis() {
    fmt.Println("\n--- 运行时诊断工具 ---")
    
    // 创建一个可能死锁的场景进行诊断
    ch1 := make(chan int)
    ch2 := make(chan int)
    var wg sync.WaitGroup
    
    // 启动监控goroutine
    ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond)
    defer cancel()
    
    go func() {
        ticker := time.NewTicker(50 * time.Millisecond)
        defer ticker.Stop()
        
        for {
            select {
            case <-ctx.Done():
                return
            case <-ticker.C:
                // 收集运行时信息
                numGoroutines := runtime.NumGoroutine()
                
                // 获取goroutine堆栈信息
                buf := make([]byte, 1024*10)
                stackSize := runtime.Stack(buf, true)
                
                fmt.Printf("监控: Goroutine数量=%d\n", numGoroutines)
                
                // 分析堆栈信息（简化版本）
                if stackSize > 0 {
                    stackInfo := string(buf[:stackSize])
                    if contains(stackInfo, "chan send") || contains(stackInfo, "chan receive") {
                        fmt.Println("检测到可能的channel阻塞")
                    }
                }
            }
        }
    }()
    
    // 创建潜在死锁场景
    wg.Add(2)
    
    go func() {
        defer wg.Done()
        fmt.Println("Goroutine A: 向ch1发送，等待ch2")
        
        select {
        case ch1 <- 1:
            fmt.Println("Goroutine A: ch1发送成功")
        case <-time.After(100 * time.Millisecond):
            fmt.Println("Goroutine A: ch1发送超时")
        }
        
        select {
        case <-ch2:
            fmt.Println("Goroutine A: 从ch2接收成功")
        case <-time.After(100 * time.Millisecond):
            fmt.Println("Goroutine A: 从ch2接收超时")
        }
    }()
    
    go func() {
        defer wg.Done()
        time.Sleep(20 * time.Millisecond)
        
        fmt.Println("Goroutine B: 向ch2发送，等待ch1")
        
        select {
        case ch2 <- 2:
            fmt.Println("Goroutine B: ch2发送成功")
        case <-time.After(100 * time.Millisecond):
            fmt.Println("Goroutine B: ch2发送超时")
        }
        
        select {
        case <-ch1:
            fmt.Println("Goroutine B: 从ch1接收成功")
        case <-time.After(100 * time.Millisecond):
            fmt.Println("Goroutine B: 从ch1接收超时")
        }
    }()
    
    wg.Wait()
    fmt.Println("运行时诊断演示完成")
}

func demonstrateCustomMonitoring() {
    fmt.Println("\n--- 自定义死锁监控 ---")
    
    type DeadlockMonitor struct {
        resources map[string]*Resource
        waitGraph map[string][]string // 等待图
        mu        sync.RWMutex
        alerts    chan DeadlockAlert
    }
    
    type Resource struct {
        name   string
        holder string
        waiters []string
    }
    
    type DeadlockAlert struct {
        Type        string
        Description string
        Cycle       []string
        Timestamp   time.Time
    }
    
    func NewDeadlockMonitor() *DeadlockMonitor {
        return &DeadlockMonitor{
            resources: make(map[string]*Resource),
            waitGraph: make(map[string][]string),
            alerts:    make(chan DeadlockAlert, 10),
        }
    }
    
    func (dm *DeadlockMonitor) RequestResource(goroutineID, resourceName string) {
        dm.mu.Lock()
        defer dm.mu.Unlock()
        
        resource := dm.getOrCreateResource(resourceName)
        
        if resource.holder == "" {
            // 资源可用，直接分配
            resource.holder = goroutineID
            fmt.Printf("Monitor: %s 获得资源 %s\n", goroutineID, resourceName)
        } else {
            // 资源被占用，加入等待列表
            resource.waiters = append(resource.waiters, goroutineID)
            dm.waitGraph[goroutineID] = append(dm.waitGraph[goroutineID], resource.holder)
            
            fmt.Printf("Monitor: %s 等待资源 %s (持有者: %s)\n", 
                goroutineID, resourceName, resource.holder)
            
            // 检测死锁
            if cycle := dm.detectCycle(); len(cycle) > 0 {
                alert := DeadlockAlert{
                    Type:        "circular_wait",
                    Description: "检测到循环等待",
                    Cycle:       cycle,
                    Timestamp:   time.Now(),
                }
                
                select {
                case dm.alerts <- alert:
                default:
                    // 告警队列满
                }
            }
        }
    }
    
    func (dm *DeadlockMonitor) ReleaseResource(goroutineID, resourceName string) {
        dm.mu.Lock()
        defer dm.mu.Unlock()
        
        resource := dm.getOrCreateResource(resourceName)
        
        if resource.holder == goroutineID {
            resource.holder = ""
            
            // 分配给下一个等待者
            if len(resource.waiters) > 0 {
                nextHolder := resource.waiters[0]
                resource.waiters = resource.waiters[1:]
                resource.holder = nextHolder
                
                // 更新等待图
                delete(dm.waitGraph, nextHolder)
                
                fmt.Printf("Monitor: %s 释放资源 %s，分配给 %s\n", 
                    goroutineID, resourceName, nextHolder)
            } else {
                fmt.Printf("Monitor: %s 释放资源 %s\n", goroutineID, resourceName)
            }
        }
    }
    
    func (dm *DeadlockMonitor) getOrCreateResource(name string) *Resource {
        if resource, exists := dm.resources[name]; exists {
            return resource
        }
        
        resource := &Resource{
            name:    name,
            waiters: make([]string, 0),
        }
        dm.resources[name] = resource
        return resource
    }
    
    func (dm *DeadlockMonitor) detectCycle() []string {
        visited := make(map[string]bool)
        recStack := make(map[string]bool)
        
        for node := range dm.waitGraph {
            if !visited[node] {
                if cycle := dm.dfs(node, visited, recStack, []string{}); len(cycle) > 0 {
                    return cycle
                }
            }
        }
        
        return nil
    }
    
    func (dm *DeadlockMonitor) dfs(node string, visited, recStack map[string]bool, path []string) []string {
        visited[node] = true
        recStack[node] = true
        path = append(path, node)
        
        for _, neighbor := range dm.waitGraph[node] {
            if !visited[neighbor] {
                if cycle := dm.dfs(neighbor, visited, recStack, path); len(cycle) > 0 {
                    return cycle
                }
            } else if recStack[neighbor] {
                // 找到循环
                cycleStart := -1
                for i, n := range path {
                    if n == neighbor {
                        cycleStart = i
                        break
                    }
                }
                if cycleStart >= 0 {
                    return append(path[cycleStart:], neighbor)
                }
            }
        }
        
        recStack[node] = false
        return nil
    }
    
    func (dm *DeadlockMonitor) StartMonitoring(ctx context.Context) {
        go func() {
            for {
                select {
                case <-ctx.Done():
                    return
                case alert := <-dm.alerts:
                    fmt.Printf("🚨 死锁告警: %s\n", alert.Description)
                    fmt.Printf("   循环: %v\n", alert.Cycle)
                    fmt.Printf("   时间: %v\n", alert.Timestamp)
                }
            }
        }()
    }
    
    // 演示自定义监控
    monitor := NewDeadlockMonitor()
    ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
    defer cancel()
    
    monitor.StartMonitoring(ctx)
    
    var wg sync.WaitGroup
    
    // 模拟潜在的死锁场景
    wg.Add(2)
    
    go func() {
        defer wg.Done()
        monitor.RequestResource("G1", "ResourceA")
        time.Sleep(50 * time.Millisecond)
        monitor.RequestResource("G1", "ResourceB")
        time.Sleep(100 * time.Millisecond)
        monitor.ReleaseResource("G1", "ResourceB")
        monitor.ReleaseResource("G1", "ResourceA")
    }()
    
    go func() {
        defer wg.Done()
        time.Sleep(30 * time.Millisecond)
        monitor.RequestResource("G2", "ResourceB")
        time.Sleep(50 * time.Millisecond)
        monitor.RequestResource("G2", "ResourceA")
        time.Sleep(100 * time.Millisecond)
        monitor.ReleaseResource("G2", "ResourceA")
        monitor.ReleaseResource("G2", "ResourceB")
    }()
    
    wg.Wait()
    fmt.Println("自定义监控演示完成")
}

func demonstrateAdvancedDiagnosis() {
    fmt.Println("\n--- 高级诊断技术 ---")
    
    // 死锁检测器
    type AdvancedDeadlockDetector struct {
        lockEvents   []LockEvent
        goroutines   map[string]*GoroutineInfo
        dependencies map[string][]string
        mu           sync.Mutex
    }
    
    type LockEvent struct {
        Timestamp    time.Time
        GoroutineID  string
        ResourceID   string
        EventType    string // "acquire", "release", "wait"
        StackTrace   string
    }
    
    type GoroutineInfo struct {
        ID           string
        State        string
        HeldLocks    []string
        WaitingFor   string
        StackTrace   string
        LastActivity time.Time
    }
    
    func NewAdvancedDeadlockDetector() *AdvancedDeadlockDetector {
        return &AdvancedDeadlockDetector{
            lockEvents:   make([]LockEvent, 0),
            goroutines:   make(map[string]*GoroutineInfo),
            dependencies: make(map[string][]string),
        }
    }
    
    func (add *AdvancedDeadlockDetector) RecordEvent(event LockEvent) {
        add.mu.Lock()
        defer add.mu.Unlock()
        
        add.lockEvents = append(add.lockEvents, event)
        
        // 更新goroutine信息
        if _, exists := add.goroutines[event.GoroutineID]; !exists {
            add.goroutines[event.GoroutineID] = &GoroutineInfo{
                ID:        event.GoroutineID,
                HeldLocks: make([]string, 0),
            }
        }
        
        goroutine := add.goroutines[event.GoroutineID]
        goroutine.LastActivity = event.Timestamp
        goroutine.StackTrace = event.StackTrace
        
        switch event.EventType {
        case "acquire":
            goroutine.HeldLocks = append(goroutine.HeldLocks, event.ResourceID)
            goroutine.State = "running"
            goroutine.WaitingFor = ""
            
        case "release":
            // 从持有锁列表中移除
            for i, lock := range goroutine.HeldLocks {
                if lock == event.ResourceID {
                    goroutine.HeldLocks = append(goroutine.HeldLocks[:i], goroutine.HeldLocks[i+1:]...)
                    break
                }
            }
            
        case "wait":
            goroutine.State = "waiting"
            goroutine.WaitingFor = event.ResourceID
        }
        
        // 更新依赖关系
        add.updateDependencies()
    }
    
    func (add *AdvancedDeadlockDetector) updateDependencies() {
        // 清空现有依赖关系
        add.dependencies = make(map[string][]string)
        
        // 重建依赖关系图
        for _, goroutine := range add.goroutines {
            if goroutine.WaitingFor != "" {
                // 找到持有目标资源的goroutine
                for _, other := range add.goroutines {
                    if other.ID != goroutine.ID {
                        for _, heldLock := range other.HeldLocks {
                            if heldLock == goroutine.WaitingFor {
                                add.dependencies[goroutine.ID] = append(add.dependencies[goroutine.ID], other.ID)
                            }
                        }
                    }
                }
            }
        }
    }
    
    func (add *AdvancedDeadlockDetector) AnalyzeDeadlocks() []DeadlockReport {
        add.mu.Lock()
        defer add.mu.Unlock()
        
        var reports []DeadlockReport
        
        // 检测循环依赖
        cycles := add.findCycles()
        for _, cycle := range cycles {
            report := DeadlockReport{
                Type:        "Circular Wait",
                Cycle:       cycle,
                Timestamp:   time.Now(),
                Goroutines:  make([]GoroutineInfo, 0),
            }
            
            // 收集相关goroutine信息
            for _, gid := range cycle {
                if info, exists := add.goroutines[gid]; exists {
                    report.Goroutines = append(report.Goroutines, *info)
                }
            }
            
            reports = append(reports, report)
        }
        
        // 检测长时间等待
        threshold := 5 * time.Second
        now := time.Now()
        
        for _, goroutine := range add.goroutines {
            if goroutine.State == "waiting" && now.Sub(goroutine.LastActivity) > threshold {
                report := DeadlockReport{
                    Type:       "Long Wait",
                    Timestamp:  now,
                    Goroutines: []GoroutineInfo{*goroutine},
                }
                reports = append(reports, report)
            }
        }
        
        return reports
    }
    
    func (add *AdvancedDeadlockDetector) findCycles() [][]string {
        var cycles [][]string
        visited := make(map[string]bool)
        recStack := make(map[string]bool)
        
        for gid := range add.goroutines {
            if !visited[gid] {
                if cycle := add.dfsCycle(gid, visited, recStack, []string{}); len(cycle) > 0 {
                    cycles = append(cycles, cycle)
                }
            }
        }
        
        return cycles
    }
    
    func (add *AdvancedDeadlockDetector) dfsCycle(gid string, visited, recStack map[string]bool, path []string) []string {
        visited[gid] = true
        recStack[gid] = true
        path = append(path, gid)
        
        for _, dep := range add.dependencies[gid] {
            if !visited[dep] {
                if cycle := add.dfsCycle(dep, visited, recStack, path); len(cycle) > 0 {
                    return cycle
                }
            } else if recStack[dep] {
                // 找到循环
                for i, node := range path {
                    if node == dep {
                        return append(path[i:], dep)
                    }
                }
            }
        }
        
        recStack[gid] = false
        return nil
    }
    
    type DeadlockReport struct {
        Type       string
        Cycle      []string
        Timestamp  time.Time
        Goroutines []GoroutineInfo
    }
    
    // 演示高级诊断
    detector := NewAdvancedDeadlockDetector()
    
    // 模拟一些锁事件
    events := []LockEvent{
        {time.Now(), "G1", "Lock1", "acquire", "stack1"},
        {time.Now().Add(10 * time.Millisecond), "G2", "Lock2", "acquire", "stack2"},
        {time.Now().Add(20 * time.Millisecond), "G1", "Lock2", "wait", "stack1"},
        {time.Now().Add(30 * time.Millisecond), "G2", "Lock1", "wait", "stack2"},
    }
    
    for _, event := range events {
        detector.RecordEvent(event)
        fmt.Printf("记录事件: %s %s %s\n", event.GoroutineID, event.EventType, event.ResourceID)
    }
    
    // 分析死锁
    reports := detector.AnalyzeDeadlocks()
    for _, report := range reports {
        fmt.Printf("诊断报告: %s\n", report.Type)
        if len(report.Cycle) > 0 {
            fmt.Printf("  循环: %v\n", report.Cycle)
        }
        fmt.Printf("  涉及%d个goroutine\n", len(report.Goroutines))
    }
    
    fmt.Println("高级诊断演示完成")
}

func contains(s, substr string) bool {
    return len(s) >= len(substr) && 
           func() bool {
               for i := 0; i <= len(s)-len(substr); i++ {
                   if s[i:i+len(substr)] == substr {
                       return true
                   }
               }
               return false
           }()
}

func main() {
    demonstrateGoDeadlockDetection()
    demonstrateDeadlockPrevention()
    demonstrateDeadlockDiagnosis()
}

🎯 核心知识点总结

Go运行时死锁检测要点

检测条件: 所有goroutine都处于阻塞状态且无法继续
检测时机: 调度器无法找到可运行的goroutine时
报告信息: 详细的goroutine堆栈和阻塞位置
检测限制: 只能检测全局死锁，无法检测局部死锁和活锁

死锁预防策略要点

锁排序: 统一的资源获取顺序避免循环等待
超时机制: 使用超时避免无限等待
资源一次性获取: 原子性获取所有需要的资源
无锁方法: 使用原子操作和无锁数据结构

死锁诊断技术要点

运行时信息: 使用runtime包获取goroutine状态
自定义监控: 构建资源依赖图检测循环等待
事件记录: 记录锁获取/释放事件进行分析
模式识别: 识别常见的死锁模式和长时间等待

最佳实践要点

设计原则: 在设计阶段考虑死锁预防
监控体系: 建立完善的死锁监控和告警机制
测试验证: 通过压力测试验证并发安全性
工具使用: 熟练使用各种死锁检测和诊断工具

🔍 面试准备建议

理解检测原理: 深入掌握Go运行时死锁检测机制
掌握预防策略: 熟练运用各种死锁预防技术
诊断技能: 学会使用工具诊断和分析死锁问题
实践经验: 在项目中积累并发编程和死锁处理经验
系统思维: 从系统设计角度考虑死锁预防和处理

Go死锁检测与预防 - Golang高级面试题 ​

📋 重点面试题 ​

面试题 1：Go运行时死锁检测机制 ​

详细解答 ​

1. Go运行时死锁检测原理 ​

面试题 3：死锁诊断和监控工具 ​

详细解答 ​

1. 死锁诊断工具和技术 ​

🎯 核心知识点总结 ​

Go运行时死锁检测要点 ​

死锁预防策略要点 ​

死锁诊断技术要点 ​

最佳实践要点 ​

🔍 面试准备建议 ​

Go死锁检测与预防 - Golang高级面试题

📋 重点面试题

面试题 1：Go运行时死锁检测机制

详细解答

1. Go运行时死锁检测原理

面试题 3：死锁诊断和监控工具

详细解答

1. 死锁诊断工具和技术

🎯 核心知识点总结

Go运行时死锁检测要点

死锁预防策略要点

死锁诊断技术要点

最佳实践要点

🔍 面试准备建议