概述
随着大语言模型(LLM)的普及,前端开发者越来越需要在网页应用中集成 AI 能力。本文将详细介绍如何在前端项目中集成 LLM API,实现流畅的用户体验。
基础架构设计
安全考虑
永远不要在前端直接暴露 API Key! 正确的架构应该是:
用户浏览器 → 你的后端服务 → LLM API
Nuxt 3 服务端路由示例
// server/api/chat.post.ts
import { OpenAI } from 'openai'
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY
})
export default defineEventHandler(async (event) => {
const body = await readBody(event)
const { messages, stream = false } = body
if (stream) {
// 流式响应
const response = await openai.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages,
stream: true
})
setHeader(event, 'Content-Type', 'text/event-stream')
setHeader(event, 'Cache-Control', 'no-cache')
setHeader(event, 'Connection', 'keep-alive')
const encoder = new TextEncoder()
return new ReadableStream({
async start(controller) {
for await (const chunk of response) {
const content = chunk.choices[0]?.delta?.content || ''
if (content) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ content })}\n\n`)
)
}
}
controller.enqueue(encoder.encode('data: [DONE]\n\n'))
controller.close()
}
})
}
// 非流式响应
const response = await openai.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages
})
return response.choices[0].message
})
前端流式响应处理
使用 EventSource 接收 SSE
// composables/useChat.ts
export function useChat() {
const messages = ref<Message[]>([])
const isLoading = ref(false)
const currentResponse = ref('')
async function sendMessage(content: string) {
messages.value.push({ role: 'user', content })
isLoading.value = true
currentResponse.value = ''
try {
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
messages: messages.value,
stream: true
})
})
const reader = response.body?.getReader()
const decoder = new TextDecoder()
while (true) {
const { done, value } = await reader!.read()
if (done) break
const chunk = decoder.decode(value)
const lines = chunk.split('\n')
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6)
if (data === '[DONE]') continue
try {
const parsed = JSON.parse(data)
currentResponse.value += parsed.content
} catch (e) {
console.error('Parse error:', e)
}
}
}
}
messages.value.push({
role: 'assistant',
content: currentResponse.value
})
} finally {
isLoading.value = false
}
}
return { messages, isLoading, currentResponse, sendMessage }
}
打字机效果组件
<script setup lang="ts">
interface Props {
text: string
speed?: number
}
const props = withDefaults(defineProps<Props>(), {
speed: 30
})
const displayText = ref('')
const isTyping = ref(false)
watch(() => props.text, async (newText) => {
isTyping.value = true
displayText.value = ''
for (let i = 0; i < newText.length; i++) {
displayText.value += newText[i]
await new Promise(r => setTimeout(r, props.speed))
}
isTyping.value = false
})
</script>
<template>
<div class="prose dark:prose-invert">
<span v-html="displayText"></span>
<span v-if="isTyping" class="animate-pulse">▌</span>
</div>
</template>
Token 控制与成本优化
Token 计算工具
// utils/tokenizer.ts
import { encode } from 'gpt-tokenizer'
export function countTokens(text: string): number {
return encode(text).length
}
export function estimateCost(
inputTokens: number,
outputTokens: number,
model: string
): number {
const pricing: Record<string, { input: number; output: number }> = {
'gpt-4-turbo': { input: 0.01, output: 0.03 },
'gpt-3.5-turbo': { input: 0.0005, output: 0.0015 },
'claude-3-opus': { input: 0.015, output: 0.075 }
}
const rates = pricing[model] || pricing['gpt-3.5-turbo']
return (inputTokens * rates.input + outputTokens * rates.output) / 1000
}
上下文窗口管理
// utils/contextManager.ts
const MAX_CONTEXT_TOKENS = 8000
const SYSTEM_PROMPT_TOKENS = 500
const RESPONSE_RESERVE = 2000
export function trimContext(messages: Message[]): Message[] {
const available = MAX_CONTEXT_TOKENS - SYSTEM_PROMPT_TOKENS - RESPONSE_RESERVE
let totalTokens = 0
const result: Message[] = []
// 从最新消息开始保留
for (let i = messages.length - 1; i >= 0; i--) {
const tokens = countTokens(messages[i].content)
if (totalTokens + tokens > available) break
result.unshift(messages[i])
totalTokens += tokens
}
return result
}
错误处理与重试
// utils/retry.ts
export async function retryWithBackoff<T>(
fn: () => Promise<T>,
maxRetries = 3,
baseDelay = 1000
): Promise<T> {
let lastError: Error | null = null
for (let i = 0; i < maxRetries; i++) {
try {
return await fn()
} catch (error: any) {
lastError = error
// 不重试的错误类型
if (error.status === 401 || error.status === 403) {
throw error
}
// 速率限制:使用指数退避
if (error.status === 429) {
const delay = baseDelay * Math.pow(2, i)
await new Promise(r => setTimeout(r, delay))
continue
}
// 其他错误也重试
const delay = baseDelay * (i + 1)
await new Promise(r => setTimeout(r, delay))
}
}
throw lastError
}
请求取消
// composables/useChatWithCancel.ts
export function useChatWithCancel() {
const abortController = ref<AbortController | null>(null)
async function sendMessage(content: string) {
// 取消之前的请求
if (abortController.value) {
abortController.value.abort()
}
abortController.value = new AbortController()
try {
const response = await fetch('/api/chat', {
method: 'POST',
signal: abortController.value.signal,
body: JSON.stringify({ messages: [...], stream: true })
})
// 处理响应...
} catch (error: any) {
if (error.name === 'AbortError') {
console.log('请求已取消')
return
}
throw error
}
}
function cancelRequest() {
abortController.value?.abort()
}
return { sendMessage, cancelRequest }
}
多模型支持
// server/api/chat.post.ts
import { OpenAI } from 'openai'
import Anthropic from '@anthropic-ai/sdk'
const providers = {
openai: new OpenAI({ apiKey: process.env.OPENAI_API_KEY }),
anthropic: new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY })
}
export default defineEventHandler(async (event) => {
const { messages, provider = 'openai', model } = await readBody(event)
switch (provider) {
case 'openai':
return await providers.openai.chat.completions.create({
model: model || 'gpt-4-turbo-preview',
messages
})
case 'anthropic':
return await providers.anthropic.messages.create({
model: model || 'claude-3-opus-20240229',
max_tokens: 4096,
messages
})
default:
throw createError({ statusCode: 400, message: '不支持的模型提供商' })
}
})
最佳实践总结
| 方面 | 建议 |
|---|---|
| 安全性 | API Key 仅在服务端使用 |
| 用户体验 | 使用流式响应 + 打字机效果 |
| 成本控制 | Token 计算 + 上下文裁剪 |
| 可靠性 | 指数退避重试 + 请求取消 |
| 可扩展性 | 多模型抽象 + 统一接口 |
总结
集成 LLM API 需要考虑安全性、用户体验、成本控制等多个方面。通过本文介绍的技术方案,你可以构建一个健壮的 AI 对话系统。
相关文章推荐:


