ai 精选推荐

LLM API 前端集成完整指南:从调用到流式响应

HTMLPAGE 团队
18 分钟阅读

详解如何在前端项目中集成大语言模型 API,实现流式响应、打字机效果和成本优化

#LLM #OpenAI API #流式响应 #前端集成

概述

随着大语言模型(LLM)的普及,前端开发者越来越需要在网页应用中集成 AI 能力。本文将详细介绍如何在前端项目中集成 LLM API,实现流畅的用户体验。

基础架构设计

安全考虑

永远不要在前端直接暴露 API Key! 正确的架构应该是:

用户浏览器 → 你的后端服务 → LLM API

Nuxt 3 服务端路由示例

// server/api/chat.post.ts
import { OpenAI } from 'openai'

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY
})

export default defineEventHandler(async (event) => {
  const body = await readBody(event)
  const { messages, stream = false } = body

  if (stream) {
    // 流式响应
    const response = await openai.chat.completions.create({
      model: 'gpt-4-turbo-preview',
      messages,
      stream: true
    })

    setHeader(event, 'Content-Type', 'text/event-stream')
    setHeader(event, 'Cache-Control', 'no-cache')
    setHeader(event, 'Connection', 'keep-alive')

    const encoder = new TextEncoder()

    return new ReadableStream({
      async start(controller) {
        for await (const chunk of response) {
          const content = chunk.choices[0]?.delta?.content || ''
          if (content) {
            controller.enqueue(
              encoder.encode(`data: ${JSON.stringify({ content })}\n\n`)
            )
          }
        }
        controller.enqueue(encoder.encode('data: [DONE]\n\n'))
        controller.close()
      }
    })
  }

  // 非流式响应
  const response = await openai.chat.completions.create({
    model: 'gpt-4-turbo-preview',
    messages
  })

  return response.choices[0].message
})

前端流式响应处理

使用 EventSource 接收 SSE

// composables/useChat.ts
export function useChat() {
  const messages = ref<Message[]>([])
  const isLoading = ref(false)
  const currentResponse = ref('')

  async function sendMessage(content: string) {
    messages.value.push({ role: 'user', content })
    isLoading.value = true
    currentResponse.value = ''

    try {
      const response = await fetch('/api/chat', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({
          messages: messages.value,
          stream: true
        })
      })

      const reader = response.body?.getReader()
      const decoder = new TextDecoder()

      while (true) {
        const { done, value } = await reader!.read()
        if (done) break

        const chunk = decoder.decode(value)
        const lines = chunk.split('\n')

        for (const line of lines) {
          if (line.startsWith('data: ')) {
            const data = line.slice(6)
            if (data === '[DONE]') continue
            
            try {
              const parsed = JSON.parse(data)
              currentResponse.value += parsed.content
            } catch (e) {
              console.error('Parse error:', e)
            }
          }
        }
      }

      messages.value.push({
        role: 'assistant',
        content: currentResponse.value
      })
    } finally {
      isLoading.value = false
    }
  }

  return { messages, isLoading, currentResponse, sendMessage }
}

打字机效果组件

<script setup lang="ts">
interface Props {
  text: string
  speed?: number
}

const props = withDefaults(defineProps<Props>(), {
  speed: 30
})

const displayText = ref('')
const isTyping = ref(false)

watch(() => props.text, async (newText) => {
  isTyping.value = true
  displayText.value = ''
  
  for (let i = 0; i < newText.length; i++) {
    displayText.value += newText[i]
    await new Promise(r => setTimeout(r, props.speed))
  }
  
  isTyping.value = false
})
</script>

<template>
  <div class="prose dark:prose-invert">
    <span v-html="displayText"></span>
    <span v-if="isTyping" class="animate-pulse">▌</span>
  </div>
</template>

Token 控制与成本优化

Token 计算工具

// utils/tokenizer.ts
import { encode } from 'gpt-tokenizer'

export function countTokens(text: string): number {
  return encode(text).length
}

export function estimateCost(
  inputTokens: number,
  outputTokens: number,
  model: string
): number {
  const pricing: Record<string, { input: number; output: number }> = {
    'gpt-4-turbo': { input: 0.01, output: 0.03 },
    'gpt-3.5-turbo': { input: 0.0005, output: 0.0015 },
    'claude-3-opus': { input: 0.015, output: 0.075 }
  }

  const rates = pricing[model] || pricing['gpt-3.5-turbo']
  return (inputTokens * rates.input + outputTokens * rates.output) / 1000
}

上下文窗口管理

// utils/contextManager.ts
const MAX_CONTEXT_TOKENS = 8000
const SYSTEM_PROMPT_TOKENS = 500
const RESPONSE_RESERVE = 2000

export function trimContext(messages: Message[]): Message[] {
  const available = MAX_CONTEXT_TOKENS - SYSTEM_PROMPT_TOKENS - RESPONSE_RESERVE
  
  let totalTokens = 0
  const result: Message[] = []

  // 从最新消息开始保留
  for (let i = messages.length - 1; i >= 0; i--) {
    const tokens = countTokens(messages[i].content)
    if (totalTokens + tokens > available) break
    
    result.unshift(messages[i])
    totalTokens += tokens
  }

  return result
}

错误处理与重试

// utils/retry.ts
export async function retryWithBackoff<T>(
  fn: () => Promise<T>,
  maxRetries = 3,
  baseDelay = 1000
): Promise<T> {
  let lastError: Error | null = null

  for (let i = 0; i < maxRetries; i++) {
    try {
      return await fn()
    } catch (error: any) {
      lastError = error
      
      // 不重试的错误类型
      if (error.status === 401 || error.status === 403) {
        throw error
      }

      // 速率限制:使用指数退避
      if (error.status === 429) {
        const delay = baseDelay * Math.pow(2, i)
        await new Promise(r => setTimeout(r, delay))
        continue
      }

      // 其他错误也重试
      const delay = baseDelay * (i + 1)
      await new Promise(r => setTimeout(r, delay))
    }
  }

  throw lastError
}

请求取消

// composables/useChatWithCancel.ts
export function useChatWithCancel() {
  const abortController = ref<AbortController | null>(null)

  async function sendMessage(content: string) {
    // 取消之前的请求
    if (abortController.value) {
      abortController.value.abort()
    }

    abortController.value = new AbortController()

    try {
      const response = await fetch('/api/chat', {
        method: 'POST',
        signal: abortController.value.signal,
        body: JSON.stringify({ messages: [...], stream: true })
      })
      // 处理响应...
    } catch (error: any) {
      if (error.name === 'AbortError') {
        console.log('请求已取消')
        return
      }
      throw error
    }
  }

  function cancelRequest() {
    abortController.value?.abort()
  }

  return { sendMessage, cancelRequest }
}

多模型支持

// server/api/chat.post.ts
import { OpenAI } from 'openai'
import Anthropic from '@anthropic-ai/sdk'

const providers = {
  openai: new OpenAI({ apiKey: process.env.OPENAI_API_KEY }),
  anthropic: new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY })
}

export default defineEventHandler(async (event) => {
  const { messages, provider = 'openai', model } = await readBody(event)

  switch (provider) {
    case 'openai':
      return await providers.openai.chat.completions.create({
        model: model || 'gpt-4-turbo-preview',
        messages
      })

    case 'anthropic':
      return await providers.anthropic.messages.create({
        model: model || 'claude-3-opus-20240229',
        max_tokens: 4096,
        messages
      })

    default:
      throw createError({ statusCode: 400, message: '不支持的模型提供商' })
  }
})

最佳实践总结

方面建议
安全性API Key 仅在服务端使用
用户体验使用流式响应 + 打字机效果
成本控制Token 计算 + 上下文裁剪
可靠性指数退避重试 + 请求取消
可扩展性多模型抽象 + 统一接口

总结

集成 LLM API 需要考虑安全性、用户体验、成本控制等多个方面。通过本文介绍的技术方案,你可以构建一个健壮的 AI 对话系统。


相关文章推荐: