人工介入与审核

张开发
2026/4/15 11:43:08 15 分钟阅读

分享文章

人工介入与审核
特别是当你的 Agent 拥有“执行代码”Tester 沙盒甚至未来拥有“修改文件”、“操作数据库”的权限时我们绝对不能让它完全脱缰。它必须在执行危险动作前把控制权交还给人类。在 LangGraph 中实现这个功能极其优雅。我们不需要拆毁现有的图结构只需要用到两个核心武器interrupt_before断点拦截和update_state状态篡改。让我们分两步对你的后端和前端进行一次“微创手术”第一步后端微调在沙盒前踩刹车打开你写好的test6.py文件拉到最底部图编译compile的地方。我们只需要加上一行代码告诉图引擎“每次跑到tester节点之前你都必须给我停下来”# test6.py 约 220 行左右修改图的编译参数 memory MemorySaver() graph builder.compile( checkpointermemory, interrupt_before[tester] # ✨ 【核心新增】在进入 tester 前强行挂起 ) print( Skills Creator 智能体引擎启动完毕\n)就这么简单后端引擎的修改已经全部完成。现在图引擎一旦流转到coder - tester这条边上就会自动进入休眠状态。第二步前端大改造渲染审批控制台当图被挂起时我们需要在 Streamlit 前端页面上捕获这个“暂停状态”并渲染出审批按钮。我们需要对app.py的结构进行优化。因为 Streamlit 是基于事件重新运行的当图暂停时我们要锁住用户的聊天输入框强制用户先处理审批。请将你的app.py替换为以下结构import sys import os import uuid import streamlit as st sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) st.set_page_config(page_titleSkills Creator, page_icon, layoutwide) st.title( Skills Creator — AI 代码生成引擎) st.caption(输入你的需求AI 将自动规划、编写并由你审核后进行沙盒测试。) st.cache_resource(show_spinner正在启动引擎请稍候...) def load_graph(): from test7 import graph return graph graph load_graph() # ────────────────────────────────────────── # Session State 初始化 # phase: idle | running | awaiting_approval | resuming | done # ────────────────────────────────────────── defaults { phase: idle, thread_config: None, logs: [], pending_code: , pending_test_code: , final_code: , final_test_code: , final_iter: 0, final_success: False, history: [], user_req: , review_round: 0, } for k, v in defaults.items(): if k not in st.session_state: st.session_state[k] v # ────────────────────────────────────────── # 侧边栏历史任务 # ────────────────────────────────────────── with st.sidebar: st.header(历史任务) if st.session_state.history: for i, item in enumerate(reversed(st.session_state.history)): label item[req][:28] ... if len(item[req]) 28 else item[req] with st.expander(f任务 {len(st.session_state.history) - i}: {label}, expandedFalse): st.markdown(f**状态**: {✅ 成功 if item[success] else ❌ 放弃/失败}) st.markdown(f**迭代次数**: {item.get(iterations, -)}) if item.get(code): st.code(item[code], languagepython) else: st.info(暂无历史任务) phase st.session_state.phase NODE_LABELS { planner: 规划中Planner, coder: 编写/修复代码Coder, tools: 搜索资料Tools, tester: 沙盒测试Tester, } def render_logs(): if st.session_state.logs: with st.expander(执行日志, expandedTrue): st.markdown(\n.join(st.session_state.logs)) # ══════════════════════════════════════════ # IDLE显示输入表单 # ══════════════════════════════════════════ if phase idle: col_input, col_guide st.columns([1, 1], gaplarge) with col_input: st.subheader(需求输入) user_req st.text_area( 描述你想要的 Python 功能, height180, placeholder例如写一个函数判断字符串是否为回文忽略大小写和空格提供至少 3 个测试用例。, ) if st.button( 开始生成, typeprimary, disablednot user_req.strip(), use_container_widthTrue): st.session_state.phase running st.session_state.user_req user_req.strip() st.session_state.logs [] st.session_state.review_round 0 st.session_state.thread_config {configurable: {thread_id: ftask_{uuid.uuid4().hex[:8]}}} st.rerun() with col_guide: st.subheader(工作流程) st.markdown( | 阶段 | 说明 | |------|------| | Planner | AI 将需求拆解为开发步骤 | | Coder | AI 编写业务代码和测试代码 | | ⏸️ **人工审核** | **你来检查代码可修改后再批准** | | Tester | 在沙盒中运行测试失败则循环修复 | ) # ══════════════════════════════════════════ # RUNNING执行图直到中断点 # ══════════════════════════════════════════ elif phase running: st.info(⏳ AI 正在规划和编写代码请稍候...) log_box st.empty() initial_input { user_requirement: st.session_state.user_req, iteration_count: 0, execution_logs: [], } logs st.session_state.logs try: for event in graph.stream(initial_input, configst.session_state.thread_config, stream_modeupdates): for node_name, node_output in event.items(): label NODE_LABELS.get(node_name, node_name) logs.append(f**✓** {label}) if node_name planner and plan in node_output: for j, s in enumerate(node_output[plan]): logs.append(f - 步骤 {j 1}: {s}) elif node_name coder and node_output.get(current_code): logs.append( - 代码已生成等待审核) log_box.markdown(\n.join(logs)) # 检查是否在 tester 前被中断 snapshot graph.get_state(st.session_state.thread_config) if snapshot.next and tester in snapshot.next: vals snapshot.values st.session_state.pending_code vals.get(current_code, ) st.session_state.pending_test_code vals.get(current_test_code, ) st.session_state.review_round 1 st.session_state.phase awaiting_approval else: # 意外直接完成 vals snapshot.values st.session_state.final_code vals.get(current_code, ) st.session_state.final_test_code vals.get(current_test_code, ) st.session_state.final_iter vals.get(iteration_count, 0) st.session_state.final_success vals.get(error_message) is None st.session_state.phase done except Exception as e: st.error(f执行出错: {e}) st.session_state.phase idle st.rerun() # ══════════════════════════════════════════ # AWAITING_APPROVAL人工审核代码 # ══════════════════════════════════════════ elif phase awaiting_approval: round_num st.session_state.review_round st.warning(f⏸️ 第 {round_num} 轮审核AI 已完成代码编写请检查后决定是否进行沙盒测试。) render_logs() st.markdown(---) st.subheader(代码审核区可直接修改) col_code, col_test st.columns([1, 1], gaplarge) with col_code: st.markdown(**业务代码**) edited_code st.text_area( 业务代码, valuest.session_state.pending_code, height400, label_visibilitycollapsed, keyfcode_editor_{round_num}, ) with col_test: st.markdown(**测试代码**) edited_test st.text_area( 测试代码, valuest.session_state.pending_test_code, height400, label_visibilitycollapsed, keyftest_editor_{round_num}, ) st.markdown(---) col_approve, col_reject st.columns([1, 1]) with col_approve: if st.button(✅ 批准并进行沙盒测试, typeprimary, use_container_widthTrue): # 将用户可能修改过的代码写回图状态 graph.update_state(st.session_state.thread_config, { current_code: edited_code, current_test_code: edited_test, }) st.session_state.logs.append(f\n** 人工审核通过第 {round_num} 轮**进入沙盒测试...) st.session_state.phase resuming st.rerun() with col_reject: if st.button(❌ 放弃此次生成, use_container_widthTrue): st.session_state.history.append({ req: st.session_state.user_req, code: st.session_state.pending_code, test_code: st.session_state.pending_test_code, iterations: round_num, success: False, }) st.session_state.phase idle st.rerun() # ══════════════════════════════════════════ # RESUMING批准后继续执行 # ══════════════════════════════════════════ elif phase resuming: st.info( 正在进行沙盒测试请稍候...) log_box st.empty() logs st.session_state.logs try: for event in graph.stream(None, configst.session_state.thread_config, stream_modeupdates): for node_name, node_output in event.items(): label NODE_LABELS.get(node_name, node_name) logs.append(f**✓** {label}) if node_name tester: err node_output.get(error_message) if err is None: logs.append( - ✅ 测试通过) else: logs.append( - ❌ 测试失败AI 正在分析错误...) log_box.markdown(\n.join(logs)) # 检查执行后状态是否再次在 tester 前中断修复循环 snapshot graph.get_state(st.session_state.thread_config) if snapshot.next and tester in snapshot.next: # AI 修复了代码需要再次人工审核 vals snapshot.values st.session_state.pending_code vals.get(current_code, ) st.session_state.pending_test_code vals.get(current_test_code, ) st.session_state.review_round 1 st.session_state.logs.append(f\n** AI 已修复代码进入第 {st.session_state.review_round} 轮审核**) st.session_state.phase awaiting_approval else: # 真正完成 vals snapshot.values st.session_state.final_code vals.get(current_code, ) st.session_state.final_test_code vals.get(current_test_code, ) st.session_state.final_iter vals.get(iteration_count, 0) st.session_state.final_success vals.get(error_message) is None st.session_state.phase done except Exception as e: st.error(f测试执行出错: {e}) st.session_state.phase idle st.rerun() # ══════════════════════════════════════════ # DONE展示最终结果 # ══════════════════════════════════════════ elif phase done: if st.session_state.final_success: st.success(✅ 所有测试通过代码交付完成) else: st.warning(⚠️ 已达最大迭代次数以下为最新版本代码。) render_logs() st.markdown(---) tab1, tab2 st.tabs([ 业务代码, 测试代码]) with tab1: st.code(st.session_state.final_code or 未生成, languagepython) with tab2: st.code(st.session_state.final_test_code or 未生成, languagepython) st.markdown(f**总迭代次数**: {st.session_state.final_iter}) st.markdown(---) if st.button( 开始新任务, typeprimary): st.session_state.history.append({ req: st.session_state.user_req, code: st.session_state.final_code, test_code: st.session_state.final_test_code, iterations: st.session_state.final_iter, success: st.session_state.final_success, }) st.session_state.phase idle st.rerun() 架构层面的思考与解读难点解析这段代码中有一个极度硬核的 LangGraph 架构级黑魔法就在**【打回重写】**这个按钮的逻辑里业务痛点我们的图结构是Coder - Tester。现在我们在中间拦截了如果不合格我们怎么把它送回Coder呢如果修改图的连线会变得非常臃肿。巧妙借力as_node机制回忆一下你在test6.py里写的route_after_test十字路口逻辑只要 Tester 节点运行完且error_message里面有内容就会被打回给 Coder。所以我们在前端代码里写了graph.update_state(..., as_nodetester)。我们根本没有运行真正的tester函数我们是以“人类裁判”的身份冒名顶替了tester节点向系统的 State 里写入了一条人造的错误信息【人工审查打回】: 你忘记导包了。图引擎被唤醒后它以为tester刚跑完并报错了于是原封不动地触发了原来的错误路由完美地将带有你意见的工单送回了 Coder 节点这就是状态机引擎的最高魅力只要你符合状态的契约Schema人类和机器可以随时在流水线上互换角色现在在终端重新运行streamlit run app.py。输入一个需求你会发现进度条走到coder后戛然而止页面上会弹出一个极具科技感的“审批控制台”等待你的指令去试试看吧

更多文章