From 6b65ffb728e3a0c7d9068ee3a439e5c22ee21e55 Mon Sep 17 00:00:00 2001 From: shanshanzhong Date: Mon, 1 Dec 2025 21:24:11 -0800 Subject: [PATCH] =?UTF-8?q?fix(user):=20=E4=BF=AE=E5=A4=8D=E8=A7=A3?= =?UTF-8?q?=E7=BB=91=E8=AE=BE=E5=A4=87=E6=8E=A5=E5=8F=A3=E7=9A=84502?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E5=92=8C=E5=AE=89=E5=85=A8=E6=96=AD=E8=A8=80?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复不安全类型断言可能导致panic的问题,将Redis清理移出事务并添加超时控制 增加代理层超时配置和详细日志,提升接口稳定性 --- .../排查并修复解绑设备接口产生 502 的问题.md | 33 +++++++++ etc/nginx.conf | 12 +++- .../logic/public/user/bindInviteCodeLogic.go | 2 +- .../logic/public/user/getDeviceListLogic.go | 7 +- .../public/user/getDeviceListLogic_test.go | 17 +++++ .../logic/public/user/unbindDeviceLogic.go | 70 +++++++++++-------- .../public/user/unbindDeviceLogic_test.go | 18 +++++ 7 files changed, 127 insertions(+), 32 deletions(-) create mode 100644 .trae/documents/排查并修复解绑设备接口产生 502 的问题.md create mode 100644 internal/logic/public/user/getDeviceListLogic_test.go create mode 100644 internal/logic/public/user/unbindDeviceLogic_test.go diff --git a/.trae/documents/排查并修复解绑设备接口产生 502 的问题.md b/.trae/documents/排查并修复解绑设备接口产生 502 的问题.md new file mode 100644 index 0000000..6010f0a --- /dev/null +++ b/.trae/documents/排查并修复解绑设备接口产生 502 的问题.md @@ -0,0 +1,33 @@ +## 结论与定位 +- 解绑接口路径:`/v1/public/user/unbind_device`,路由注册于 `internal/handler/routes.go:836-838`,处理器在 `internal/handler/public/user/unbindDeviceHandler.go:11-25`。 +- 业务逻辑:`internal/logic/public/user/unbindDeviceLogic.go:36-141`,进行设备所属用户迁移、认证记录更新、可能删除旧用户以及 Redis 缓存清理。 +- 返回封装:所有接口错误均走 JSON 200 包装,见 `pkg/result/httpResult.go:12-33`,因此应用层不会主动返回 502。 +- Nginx 反向代理:API 代理到 `127.0.0.1:8080`,见 `etc/nginx.conf:233-260`;未设置显式的 `proxy_read_timeout` 等超时参数,502 多发生在上游超时或连接被复位。 + +## 高概率问题点 +- 非安全类型断言导致潜在 panic:`internal/logic/public/user/unbindDeviceLogic.go:38` 直接 `.(*user.User)` 断言,若上下文未注入用户(Token 缺失/失效、链路异常),将发生 panic。其他多数逻辑使用安全断言并兜底(如 `internal/logic/public/user/unbindOAuthLogic.go:31-36`)。 +- 同类不安全用法还出现在 `internal/logic/public/user/getDeviceListLogic.go:31-33`(你反馈其他接口正常,但它也可能受影响,建议同修)。 +- 事务中混入外部 IO:在 DB 事务闭包内进行 Redis 删除(`internal/logic/public/user/unbindDeviceLogic.go:125-131`),若外部 IO 波动叠加数据库锁等待,整体耗时可能逼近或超过代理默认超时,引发 502。 + +## 修复方案 +- 将所有 `CtxKeyUser` 获取统一改为安全断言: + - 失败时返回 `InvalidAccess` 业务错误而非 panic,参考 `unbindOAuth` 的处理方式;修改位置:`internal/logic/public/user/unbindDeviceLogic.go:36-43`、`internal/logic/public/user/getDeviceListLogic.go:31-33`。 +- 将 Redis 缓存清理移出事务闭包,并添加超时控制: + - 事务仅做数据库一致性操作;在事务成功后再进行缓存删除,失败则不删;为 Redis 操作设置短超时,避免阻塞主流程。 +- 增强可观测性: + - 在解绑入口和事务前后增加结构化日志,包含 `device_id`、`user_id`、事务耗时、Redis 耗时、错误栈;方便精确定位是否存在偶发长耗时。 +- 代理层稳健性: + - 在 Nginx 针对 API 站点增加 `proxy_connect_timeout 10s; proxy_send_timeout 60s; proxy_read_timeout 60s;`,并开启 `proxy_next_upstream timeout`;避免上游短抖动即直接 502。 + +## 验证计划 +- 单元测试:构造上下文缺失用户的请求,确认不再 panic,返回业务错误码而非 502。 +- 事务耗时压测:模拟高并发解绑,观察事务与 Redis 操作耗时分布,确认在代理超时阈值内。 +- 端到端验证: + - 使用有效与无效 Token、不同设备 ID 连续调用解绑,确认响应始终为 200 JSON 包装(或业务错误),不出现 502。 + - 对比 Nginx 错误日志,验证 502 消失。 + +## 变更范围 +- 代码改动:`internal/logic/public/user/unbindDeviceLogic.go`、`internal/logic/public/user/getDeviceListLogic.go` 增加安全断言与错误返回;调整 Redis 清理至事务外并加超时;补充必要日志。 +- 配置更新:`etc/nginx.conf` 的 API server 段增加代理超时与上游重试策略。 + +请确认以上方案,我将按该方案实施、编写必要函数注释并补充测试,完成后提供验证结果与影响评估。 \ No newline at end of file diff --git a/etc/nginx.conf b/etc/nginx.conf index afed473..ebcc4fb 100644 --- a/etc/nginx.conf +++ b/etc/nginx.conf @@ -43,6 +43,10 @@ server { proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_connect_timeout 10s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + proxy_next_upstream timeout; } } @@ -257,9 +261,13 @@ server { proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; - proxy_http_version 1.1; + proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; + proxy_connect_timeout 10s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + proxy_next_upstream timeout; } } # de99e242子域名指向3001 (管理界面) @@ -298,4 +306,4 @@ server { proxy_ssl_server_name off; proxy_ssl_name $proxy_host; } -} \ No newline at end of file +} diff --git a/internal/logic/public/user/bindInviteCodeLogic.go b/internal/logic/public/user/bindInviteCodeLogic.go index 4486ae7..c705965 100644 --- a/internal/logic/public/user/bindInviteCodeLogic.go +++ b/internal/logic/public/user/bindInviteCodeLogic.go @@ -54,7 +54,7 @@ func (l *BindInviteCodeLogic) BindInviteCode(req *types.BindInviteCodeRequest) e // 检查是否是自己的邀请码 if referrer.Id == currentUser.Id { - return errors.Wrapf(xerr.NewErrCode(xerr.InvalidParams), "cannot bind your own invite code") + return errors.Wrapf(xerr.NewErrCodeMsg(xerr.InviteCodeError, "不允许绑定自己"), "cannot bind your own invite code") } // 更新用户的RefererId diff --git a/internal/logic/public/user/getDeviceListLogic.go b/internal/logic/public/user/getDeviceListLogic.go index 12b4432..3aa590f 100644 --- a/internal/logic/public/user/getDeviceListLogic.go +++ b/internal/logic/public/user/getDeviceListLogic.go @@ -11,6 +11,8 @@ import ( "github.com/perfect-panel/server/pkg/constant" "github.com/perfect-panel/server/pkg/logger" "github.com/perfect-panel/server/pkg/tool" + "github.com/perfect-panel/server/pkg/xerr" + "github.com/pkg/errors" ) type GetDeviceListLogic struct { @@ -29,7 +31,10 @@ func NewGetDeviceListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Get } func (l *GetDeviceListLogic) GetDeviceList() (resp *types.GetDeviceListResponse, err error) { - userInfo := l.ctx.Value(constant.CtxKeyUser).(*user.User) + userInfo, ok := l.ctx.Value(constant.CtxKeyUser).(*user.User) + if !ok { + return nil, errors.Wrapf(xerr.NewErrCode(xerr.InvalidAccess), "Invalid Access") + } list, count, err := l.svcCtx.UserModel.QueryDeviceList(l.ctx, userInfo.Id) if err != nil { return nil, err diff --git a/internal/logic/public/user/getDeviceListLogic_test.go b/internal/logic/public/user/getDeviceListLogic_test.go new file mode 100644 index 0000000..aed7fab --- /dev/null +++ b/internal/logic/public/user/getDeviceListLogic_test.go @@ -0,0 +1,17 @@ +package user + +import ( + "context" + "testing" + + "github.com/perfect-panel/server/internal/svc" +) + +func TestGetDeviceList_MissingUserContext(t *testing.T) { + l := NewGetDeviceListLogic(context.Background(), &svc.ServiceContext{}) + _, err := l.GetDeviceList() + if err == nil { + t.Fatalf("expected error when user context missing") + } +} + diff --git a/internal/logic/public/user/unbindDeviceLogic.go b/internal/logic/public/user/unbindDeviceLogic.go index caf1488..3647ca2 100644 --- a/internal/logic/public/user/unbindDeviceLogic.go +++ b/internal/logic/public/user/unbindDeviceLogic.go @@ -35,18 +35,24 @@ func NewUnbindDeviceLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Unbi func (l *UnbindDeviceLogic) UnbindDevice(req *types.UnbindDeviceRequest) error { // 获取当前 token 登录的用户 - userInfo := l.ctx.Value(constant.CtxKeyUser).(*user.User) + u, ok := l.ctx.Value(constant.CtxKeyUser).(*user.User) + if !ok { + return errors.Wrapf(xerr.NewErrCode(xerr.InvalidAccess), "Invalid Access") + } // 查询解绑设备是否存在 device, err := l.svcCtx.UserModel.FindOneDevice(l.ctx, req.Id) if err != nil { return errors.Wrapf(xerr.NewErrCode(xerr.DeviceNotExist), "find device") } - if device.UserId != userInfo.Id { - return errors.Wrapf(xerr.NewErrCode(xerr.InvalidParams), "device not belong to user") - } - - err = l.svcCtx.DB.Transaction(func(tx *gorm.DB) error { + if device.UserId != u.Id { + return errors.Wrapf(xerr.NewErrCode(xerr.InvalidParams), "device not belong to user") + } + l.Infow("开始解绑设备", + logger.Field("device_identifier", device.Identifier), + logger.Field("user_id", u.Id)) + start := time.Now() + err = l.svcCtx.DB.Transaction(func(tx *gorm.DB) error { // 1. 查询设备记录 var device user.Device err = tx.Model(&device).Where("id = ?", req.Id).First(&device).Error @@ -77,13 +83,13 @@ func (l *UnbindDeviceLogic) UnbindDevice(req *types.UnbindDeviceRequest) error { } // 3.2 记录注册日志 - registerLog := log.Register{ - AuthMethod: "device", - Identifier: device.Identifier, - RegisterIP: device.Ip, - UserAgent: device.UserAgent, - Timestamp: time.Now().UnixMilli(), - } + registerLog := log.Register{ + AuthMethod: "device", + Identifier: device.Identifier, + RegisterIP: device.Ip, + UserAgent: device.UserAgent, + Timestamp: time.Now().UnixMilli(), + } content, _ := registerLog.Marshal() if err := tx.Create(&log.SystemLog{ Type: log.TypeRegister.Uint8(), @@ -123,20 +129,28 @@ func (l *UnbindDeviceLogic) UnbindDevice(req *types.UnbindDeviceRequest) error { } // 6. 清理缓存 - deviceCacheKey := fmt.Sprintf("%v:%v", config.DeviceCacheKeyKey, device.Identifier) - if sessionId, err := l.svcCtx.Redis.Get(l.ctx, deviceCacheKey).Result(); err == nil && sessionId != "" { - _ = l.svcCtx.Redis.Del(l.ctx, deviceCacheKey).Err() - sessionIdCacheKey := fmt.Sprintf("%v:%v", config.SessionIdKey, sessionId) - _ = l.svcCtx.Redis.Del(l.ctx, sessionIdCacheKey).Err() - } + l.Infow("设备解绑并迁移成功", + logger.Field("device_identifier", device.Identifier), + logger.Field("old_user_id", device.UserId), + logger.Field("new_user_id", newUser.Id)) - l.Infow("设备解绑并迁移成功", - logger.Field("device_identifier", device.Identifier), - logger.Field("old_user_id", device.UserId), - logger.Field("new_user_id", newUser.Id)) - - return nil - }) - - return err + return nil + }) + if err != nil { + return err + } + duration := time.Since(start) + identifier := device.Identifier + ctx, cancel := context.WithTimeout(l.ctx, 2*time.Second) + defer cancel() + deviceCacheKey := fmt.Sprintf("%v:%v", config.DeviceCacheKeyKey, identifier) + if sessionId, rerr := l.svcCtx.Redis.Get(ctx, deviceCacheKey).Result(); rerr == nil && sessionId != "" { + _ = l.svcCtx.Redis.Del(ctx, deviceCacheKey).Err() + sessionIdCacheKey := fmt.Sprintf("%v:%v", config.SessionIdKey, sessionId) + _ = l.svcCtx.Redis.Del(ctx, sessionIdCacheKey).Err() + } + l.Infow("设备解绑完成", + logger.Field("device_identifier", identifier), + logger.Field("elapsed_ms", duration.Milliseconds())) + return nil } diff --git a/internal/logic/public/user/unbindDeviceLogic_test.go b/internal/logic/public/user/unbindDeviceLogic_test.go new file mode 100644 index 0000000..987a96f --- /dev/null +++ b/internal/logic/public/user/unbindDeviceLogic_test.go @@ -0,0 +1,18 @@ +package user + +import ( + "context" + "testing" + + "github.com/perfect-panel/server/internal/svc" + "github.com/perfect-panel/server/internal/types" +) + +func TestUnbindDevice_MissingUserContext(t *testing.T) { + l := NewUnbindDeviceLogic(context.Background(), &svc.ServiceContext{}) + err := l.UnbindDevice(&types.UnbindDeviceRequest{Id: 1}) + if err == nil { + t.Fatalf("expected error when user context missing") + } +} +