📌 内容摘要

  • 从零开始,一步一步搭建一个功能完整的本地 AI 助手,含命令行版和 Web 版。
  • 核心功能:多轮对话、流式输出、对话历史持久化、自定义角色设定、用量统计。
  • Web 版使用 FastAPI + 原生 HTML,无需前端框架,部署简单。
  • 文末提供 Docker 容器化部署方案,一行命令启动。

一、项目概览

我们要构建的 AI 助手包含以下功能:

  • 多轮对话,Claude 能记住整个对话上下文
  • 流式输出,回复逐字显示
  • 对话历史自动保存到本地 JSON 文件,重启后可继续
  • 可配置的角色设定(System Prompt)
  • 实时用量统计(token 消耗和费用估算)
  • 命令行版本 + Web 版本

最终目录结构

claude-assistant/
├── .env                  # API Key 配置
├── .env.example          # 配置模板
├── requirements.txt      # Python 依赖
├── config.py             # 配置管理
├── assistant.py          # 核心助手逻辑
├── cli.py                # 命令行界面
├── server.py             # Web 服务器(FastAPI)
├── static/
│   └── index.html        # Web 前端页面
├── data/
│   └── history/          # 对话历史存储目录
└── Dockerfile            # 容器化配置

二、项目初始化

mkdir claude-assistant && cd claude-assistant
python -m venv venv
source venv/bin/activate      # Windows: venv\Scripts\activate

pip install anthropic fastapi uvicorn python-dotenv rich

.env 文件

ANTHROPIC_API_KEY=sk-ant-api03-你的key
DEFAULT_MODEL=claude-sonnet-4-6
MAX_TOKENS=4096
SYSTEM_PROMPT=你是一个有帮助、诚实、无害的AI助手。你的回答简洁清晰,遇到不确定的问题会主动说明。
HISTORY_DIR=./data/history

config.py — 配置管理

from dotenv import load_dotenv
import os

load_dotenv()

class Config:
    API_KEY: str = os.environ["ANTHROPIC_API_KEY"]
    MODEL: str = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6")
    MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", "4096"))
    SYSTEM_PROMPT: str = os.getenv(
        "SYSTEM_PROMPT",
        "你是一个有帮助、诚实、无害的AI助手。"
    )
    HISTORY_DIR: str = os.getenv("HISTORY_DIR", "./data/history")

    # 各模型定价(每百万 token,美元)
    PRICING = {
        "claude-opus-4-6":           {"input": 5.0,  "output": 25.0},
        "claude-sonnet-4-6":         {"input": 3.0,  "output": 15.0},
        "claude-haiku-4-5-20251001": {"input": 1.0,  "output": 5.0},
    }

config = Config()

三、核心助手逻辑(assistant.py)

import anthropic
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Generator
from config import config


class UsageTracker:
    """追踪 token 用量和费用"""

    def __init__(self, model: str):
        self.model = model
        self.total_input = 0
        self.total_output = 0

    def add(self, input_tokens: int, output_tokens: int):
        self.total_input += input_tokens
        self.total_output += output_tokens

    @property
    def cost(self) -> float:
        pricing = config.PRICING.get(self.model, {"input": 3.0, "output": 15.0})
        return (
            self.total_input * pricing["input"] +
            self.total_output * pricing["output"]
        ) / 1_000_000

    def summary(self) -> str:
        return (
            f"总用量:{self.total_input:,} 输入 / "
            f"{self.total_output:,} 输出 tokens | "
            f"预估费用:${self.cost:.4f}"
        )


class ConversationHistory:
    """对话历史管理,支持本地持久化"""

    def __init__(self, session_id: str):
        self.session_id = session_id
        self.messages: list[dict] = []
        self.path = Path(config.HISTORY_DIR) / f"{session_id}.json"
        self.path.parent.mkdir(parents=True, exist_ok=True)
        self._load()

    def _load(self):
        if self.path.exists():
            with open(self.path, "r", encoding="utf-8") as f:
                data = json.load(f)
                self.messages = data.get("messages", [])

    def save(self):
        with open(self.path, "w", encoding="utf-8") as f:
            json.dump(
                {"session_id": self.session_id, "messages": self.messages},
                f,
                ensure_ascii=False,
                indent=2
            )

    def add(self, role: str, content: str):
        self.messages.append({"role": role, "content": content})
        self.save()

    def clear(self):
        self.messages = []
        if self.path.exists():
            self.path.unlink()

    def __len__(self):
        return len(self.messages)


class ClaudeAssistant:
    """核心 AI 助手类"""

    def __init__(self, session_id: str = "default", system: str = ""):
        self.client = anthropic.Anthropic(api_key=config.API_KEY)
        self.history = ConversationHistory(session_id)
        self.system = system or config.SYSTEM_PROMPT
        self.usage = UsageTracker(config.MODEL)

    def chat_stream(self, user_input: str) -> Generator[str, None, None]:
        """流式对话,逐块 yield 文本"""
        self.history.add("user", user_input)
        full_reply = ""

        with self.client.messages.stream(
            model=config.MODEL,
            max_tokens=config.MAX_TOKENS,
            system=self.system,
            messages=self.history.messages[:-1] + [
                {"role": "user", "content": user_input}
            ],
        ) as stream:
            for text in stream.text_stream:
                full_reply += text
                yield text

            final = stream.get_final_message()
            self.usage.add(
                final.usage.input_tokens,
                final.usage.output_tokens
            )

        self.history.add("assistant", full_reply)

    def chat(self, user_input: str) -> str:
        """普通对话,返回完整回复"""
        return "".join(self.chat_stream(user_input))

    def reset(self):
        self.history.clear()

    @property
    def stats(self) -> str:
        return self.usage.summary()

四、命令行界面(cli.py)

from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from rich.prompt import Prompt
from rich.live import Live
from rich.text import Text
import sys
from assistant import ClaudeAssistant
from config import config

console = Console()


def print_welcome():
    console.print(Panel(
        f"[bold]Claude AI 助手[/bold]\n"
        f"模型:{config.MODEL}\n"
        f"[dim]输入 /help 查看命令,/quit 退出[/dim]",
        border_style="cyan"
    ))


def handle_command(cmd: str, assistant: ClaudeAssistant) -> bool:
    """处理斜杠命令,返回是否继续对话"""
    match cmd.strip().lower():
        case "/quit" | "/exit" | "/q":
            console.print(f"\n[dim]{assistant.stats}[/dim]")
            console.print("[yellow]再见![/yellow]")
            sys.exit(0)

        case "/reset" | "/clear":
            assistant.reset()
            console.print("[green]对话已清空[/green]")

        case "/stats":
            console.print(f"[dim]{assistant.stats}[/dim]")

        case "/history":
            msgs = assistant.history.messages
            if not msgs:
                console.print("[dim]暂无对话记录[/dim]")
            else:
                for msg in msgs:
                    role_color = "cyan" if msg["role"] == "user" else "green"
                    console.print(
                        f"[{role_color}]{msg['role']}:[/{role_color}] "
                        f"{msg['content'][:100]}..."
                    )

        case "/help":
            console.print(
                "[bold]可用命令:[/bold]\n"
                "  /reset   清空对话历史\n"
                "  /stats   查看用量统计\n"
                "  /history 查看对话记录\n"
                "  /quit    退出程序"
            )

        case _:
            console.print(f"[red]未知命令:{cmd}[/red]")

    return True


def main():
    print_welcome()
    assistant = ClaudeAssistant()

    if len(assistant.history) > 0:
        console.print(
            f"[dim]已加载 {len(assistant.history)} 条历史记录[/dim]\n"
        )

    while True:
        try:
            user_input = Prompt.ask("\n[cyan]你[/cyan]")
        except (KeyboardInterrupt, EOFError):
            console.print("\n[yellow]已退出[/yellow]")
            break

        if not user_input.strip():
            continue

        if user_input.startswith("/"):
            handle_command(user_input, assistant)
            continue

        # 流式输出到终端
        console.print("\n[green]Claude[/green]:", end="")
        full_reply = ""

        with Live(Text(""), refresh_per_second=20, console=console) as live:
            for chunk in assistant.chat_stream(user_input):
                full_reply += chunk
                live.update(Text(full_reply))

        # 如果回复包含 Markdown,重新渲染
        if any(c in full_reply for c in ["#", "```", "**", "- "]):
            console.print()
            console.print(Markdown(full_reply))


if __name__ == "__main__":
    main()

五、Web 服务器(server.py)

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import json
from pathlib import Path
from assistant import ClaudeAssistant

app = FastAPI(title="Claude Assistant")

# 挂载静态文件
app.mount("/static", StaticFiles(directory="static"), name="static")

# 每个 session 维护独立的 assistant 实例
sessions: dict[str, ClaudeAssistant] = {}

def get_assistant(session_id: str = "default") -> ClaudeAssistant:
    if session_id not in sessions:
        sessions[session_id] = ClaudeAssistant(session_id=session_id)
    return sessions[session_id]


class ChatRequest(BaseModel):
    message: str
    session_id: str = "default"


@app.get("/")
async def index():
    return FileResponse("static/index.html")


@app.post("/api/chat/stream")
async def chat_stream(req: ChatRequest):
    assistant = get_assistant(req.session_id)

    def generate():
        for chunk in assistant.chat_stream(req.message):
            yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
        yield f"data: {json.dumps({'done': True, 'stats': assistant.stats})}\n\n"

    return StreamingResponse(
        generate(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "X-Accel-Buffering": "no",
        }
    )


@app.post("/api/reset")
async def reset(session_id: str = "default"):
    assistant = get_assistant(session_id)
    assistant.reset()
    return {"status": "ok", "message": "对话已清空"}


@app.get("/api/history")
async def history(session_id: str = "default"):
    assistant = get_assistant(session_id)
    return {"messages": assistant.history.messages}


@app.get("/api/stats")
async def stats(session_id: str = "default"):
    assistant = get_assistant(session_id)
    return {"stats": assistant.stats}


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

六、Web 前端(static/index.html)

<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Claude 本地助手</title>
<style>
  * { box-sizing: border-box; margin: 0; padding: 0; }
  body { font-family: system-ui, sans-serif; background: #f5f5f5;
         display: flex; flex-direction: column; height: 100vh; }
  header { background: #1a1208; color: #fff; padding: 14px 20px;
           display: flex; justify-content: space-between; align-items: center; }
  header h1 { font-size: 16px; font-weight: 600; }
  header button { background: rgba(255,255,255,0.15); color: #fff;
                  border: none; padding: 6px 14px; border-radius: 4px;
                  cursor: pointer; font-size: 13px; }
  #messages { flex: 1; overflow-y: auto; padding: 20px; display: flex;
              flex-direction: column; gap: 16px; }
  .msg { max-width: 75%; padding: 12px 16px; border-radius: 12px;
         font-size: 14px; line-height: 1.7; white-space: pre-wrap; }
  .user { background: #1a6b6b; color: #fff; align-self: flex-end;
          border-bottom-right-radius: 4px; }
  .assistant { background: #fff; color: #1a1208; align-self: flex-start;
               border: 1px solid #e0e0e0; border-bottom-left-radius: 4px; }
  .typing::after { content: "▋"; animation: blink .7s infinite; }
  @keyframes blink { 50% { opacity: 0; } }
  footer { background: #fff; border-top: 1px solid #e0e0e0;
           padding: 14px 20px; display: flex; gap: 10px; }
  #input { flex: 1; padding: 10px 14px; border: 1px solid #ddd;
           border-radius: 8px; font-size: 14px; resize: none; height: 44px;
           max-height: 120px; outline: none; font-family: inherit; }
  #input:focus { border-color: #1a6b6b; }
  #send { background: #1a6b6b; color: #fff; border: none; padding: 10px 20px;
          border-radius: 8px; cursor: pointer; font-size: 14px; font-weight: 500; }
  #send:disabled { opacity: 0.5; cursor: not-allowed; }
  #stats { font-size: 11px; color: #888; padding: 4px 20px;
           background: #fafafa; border-top: 1px solid #f0f0f0; }
</style>
</head>
<body>
<header>
  <h1>Claude 本地助手</h1>
  <button onclick="resetChat()">清空对话</button>
</header>
<div id="messages"></div>
<div id="stats">就绪</div>
<footer>
  <textarea id="input" placeholder="输入消息,Enter 发送,Shift+Enter 换行"></textarea>
  <button id="send" onclick="sendMessage()">发送</button>
</footer>

<script>
const messagesEl = document.getElementById("messages");
const inputEl = document.getElementById("input");
const sendBtn = document.getElementById("send");
const statsEl = document.getElementById("stats");
const SESSION_ID = "default";

function addMessage(role, content = "") {
  const div = document.createElement("div");
  div.className = `msg ${role}`;
  div.textContent = content;
  messagesEl.appendChild(div);
  messagesEl.scrollTop = messagesEl.scrollHeight;
  return div;
}

async function sendMessage() {
  const text = inputEl.value.trim();
  if (!text || sendBtn.disabled) return;

  inputEl.value = "";
  inputEl.style.height = "44px";
  sendBtn.disabled = true;

  addMessage("user", text);
  const replyEl = addMessage("assistant");
  replyEl.classList.add("typing");

  try {
    const res = await fetch("/api/chat/stream", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ message: text, session_id: SESSION_ID }),
    });

    const reader = res.body.getReader();
    const decoder = new TextDecoder();
    let buffer = "", fullText = "";

    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      buffer += decoder.decode(value, { stream: true });
      const lines = buffer.split("\n");
      buffer = lines.pop() ?? "";

      for (const line of lines) {
        if (!line.startsWith("data: ")) continue;
        try {
          const data = JSON.parse(line.slice(6));
          if (data.text) {
            fullText += data.text;
            replyEl.textContent = fullText;
            messagesEl.scrollTop = messagesEl.scrollHeight;
          }
          if (data.done) {
            statsEl.textContent = data.stats || "";
          }
        } catch {}
      }
    }
  } catch (e) {
    replyEl.textContent = "请求失败:" + e.message;
  } finally {
    replyEl.classList.remove("typing");
    sendBtn.disabled = false;
    inputEl.focus();
  }
}

async function resetChat() {
  await fetch(`/api/reset?session_id=${SESSION_ID}`, { method: "POST" });
  messagesEl.innerHTML = "";
  statsEl.textContent = "对话已清空";
}

// Enter 发送,Shift+Enter 换行
inputEl.addEventListener("keydown", (e) => {
  if (e.key === "Enter" && !e.shiftKey) {
    e.preventDefault();
    sendMessage();
  }
});

// 自动调整输入框高度
inputEl.addEventListener("input", () => {
  inputEl.style.height = "44px";
  inputEl.style.height = Math.min(inputEl.scrollHeight, 120) + "px";
});
</script>
</body>
</html>

七、Docker 部署(Dockerfile)

FROM python:3.11-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

RUN mkdir -p data/history static

EXPOSE 8000

CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]
# requirements.txt
anthropic
fastapi
uvicorn[standard]
python-dotenv
rich
# 构建镜像
docker build -t claude-assistant .

# 启动容器(挂载 .env 和历史数据目录)
docker run -d \
  --name claude-assistant \
  -p 8000:8000 \
  --env-file .env \
  -v $(pwd)/data:/app/data \
  claude-assistant

# 查看日志
docker logs -f claude-assistant

八、启动与使用

命令行版本

python cli.py

Web 版本

python server.py
# 浏览器访问 http://localhost:8000
✅ 常用扩展方向
这套基础架构可以向多个方向延伸:换掉 HTML 前端改用 React/Vue、加入用户登录和多用户隔离、接入数据库(SQLite/PostgreSQL)替换 JSON 文件存储历史、添加文件上传和文档解析功能、或者接入 RAG 让助手能检索私有知识库。

常见问题

Q:历史对话文件存在哪里?
默认存储在 ./data/history/ 目录,每个 session 对应一个 JSON 文件,文件名为 session_id。可在 .env 中修改 HISTORY_DIR 路径。

Q:如何修改 AI 角色设定?
修改 .env 中的 SYSTEM_PROMPT 即可,重启服务生效。也可以在代码中给 ClaudeAssistant 传入 system 参数,实现每个 session 使用不同的角色设定。

Q:对话历史太长会有问题吗?
Claude Sonnet 4.6 支持 100 万 token 上下文,日常使用很难触达上限。但如果历史很长,每次请求的费用会增加(因为输入 token 包含全部历史)。可以在 ClaudeAssistant 中加入历史截断逻辑,只保留最近 N 轮对话。

总结

这套架构把 Claude API 封装成了一个可独立运行的 AI 助手:核心逻辑在 assistant.py,命令行和 Web 是两种不同的交互层,互不影响。本地 JSON 文件存储历史对话,无需数据库,部署简单。Docker 镜像一行命令启动,适合个人使用或小团队内部部署。