diff --git a/frontend/src/views/admin/SystemSettings.vue b/frontend/src/views/admin/SystemSettings.vue
index 60dfb33..a63425b 100644
--- a/frontend/src/views/admin/SystemSettings.vue
+++ b/frontend/src/views/admin/SystemSettings.vue
@@ -470,20 +470,68 @@
         title="流式输出"
         description="配置流式响应的输出效果"
       >
-        <div class="flex items-center space-x-2">
-          <Checkbox
-            id="stream-smoothing-enabled"
-            v-model:checked="systemConfig.stream_smoothing_enabled"
-          />
+        <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
+          <div class="md:col-span-2">
+            <div class="flex items-center space-x-2">
+              <Checkbox
+                id="stream-smoothing-enabled"
+                v-model:checked="systemConfig.stream_smoothing_enabled"
+              />
+              <div>
+                <Label
+                  for="stream-smoothing-enabled"
+                  class="cursor-pointer"
+                >
+                  启用平滑输出
+                </Label>
+                <p class="text-xs text-muted-foreground">
+                  将上游返回的大块内容拆分成小块，模拟打字效果
+                </p>
+              </div>
+            </div>
+          </div>
+
           <div>
             <Label
-              for="stream-smoothing-enabled"
-              class="cursor-pointer"
+              for="stream-smoothing-chunk-size"
+              class="block text-sm font-medium"
             >
-              启用平滑输出
+              每块字符数
             </Label>
-            <p class="text-xs text-muted-foreground">
-              自动根据文本长度调整输出速度：短文本逐字符输出（打字感更强），长文本按块输出（避免卡顿）
+            <Input
+              id="stream-smoothing-chunk-size"
+              v-model.number="systemConfig.stream_smoothing_chunk_size"
+              type="number"
+              min="1"
+              max="100"
+              placeholder="20"
+              class="mt-1"
+              :disabled="!systemConfig.stream_smoothing_enabled"
+            />
+            <p class="mt-1 text-xs text-muted-foreground">
+              每次输出的字符数量（1-100）
+            </p>
+          </div>
+
+          <div>
+            <Label
+              for="stream-smoothing-delay-ms"
+              class="block text-sm font-medium"
+            >
+              输出间隔 (毫秒)
+            </Label>
+            <Input
+              id="stream-smoothing-delay-ms"
+              v-model.number="systemConfig.stream_smoothing_delay_ms"
+              type="number"
+              min="1"
+              max="100"
+              placeholder="8"
+              class="mt-1"
+              :disabled="!systemConfig.stream_smoothing_enabled"
+            />
+            <p class="mt-1 text-xs text-muted-foreground">
+              每块之间的延迟毫秒数（1-100）
             </p>
           </div>
         </div>
@@ -838,6 +886,8 @@ interface SystemConfig {
   audit_log_retention_days: number
   // 流式输出
   stream_smoothing_enabled: boolean
+  stream_smoothing_chunk_size: number
+  stream_smoothing_delay_ms: number
 }
 
 const loading = ref(false)
@@ -889,6 +939,8 @@ const systemConfig = ref<SystemConfig>({
   audit_log_retention_days: 30,
   // 流式输出
   stream_smoothing_enabled: false,
+  stream_smoothing_chunk_size: 20,
+  stream_smoothing_delay_ms: 8,
 })
 
 // 计算属性：KB 和 字节 之间的转换
@@ -947,6 +999,8 @@ async function loadSystemConfig() {
       'audit_log_retention_days',
       // 流式输出
       'stream_smoothing_enabled',
+      'stream_smoothing_chunk_size',
+      'stream_smoothing_delay_ms',
     ]
 
     for (const key of configs) {
@@ -1060,6 +1114,16 @@ async function saveSystemConfig() {
         value: systemConfig.value.stream_smoothing_enabled,
         description: '是否启用流式平滑输出'
       },
+      {
+        key: 'stream_smoothing_chunk_size',
+        value: systemConfig.value.stream_smoothing_chunk_size,
+        description: '流式平滑输出每个小块的字符数'
+      },
+      {
+        key: 'stream_smoothing_delay_ms',
+        value: systemConfig.value.stream_smoothing_delay_ms,
+        description: '流式平滑输出每个小块之间的延迟毫秒数'
+      },
     ]
 
     const promises = configItems.map(item =>
diff --git a/src/api/admin/providers/models.py b/src/api/admin/providers/models.py
index 8b84ca6..9b56366 100644
--- a/src/api/admin/providers/models.py
+++ b/src/api/admin/providers/models.py
@@ -9,6 +9,7 @@ from fastapi import APIRouter, Depends, Request
 from sqlalchemy.orm import Session, joinedload
 
 from src.api.base.admin_adapter import AdminApiAdapter
+from src.api.base.models_service import invalidate_models_list_cache
 from src.api.base.pipeline import ApiRequestPipeline
 from src.core.exceptions import InvalidRequestException, NotFoundException
 from src.core.logger import logger
@@ -419,4 +420,8 @@ class AdminBatchAssignModelsToProviderAdapter(AdminApiAdapter):
             f"Batch assigned {len(success)} GlobalModels to provider {provider.name} by {context.user.username}"
         )
 
+        # 清除 /v1/models 列表缓存
+        if success:
+            await invalidate_models_list_cache()
+
         return BatchAssignModelsToProviderResponse(success=success, errors=errors)
diff --git a/src/api/base/models_service.py b/src/api/base/models_service.py
index d4aeb67..ee9cdfa 100644
--- a/src/api/base/models_service.py
+++ b/src/api/base/models_service.py
@@ -55,6 +55,23 @@ async def _set_cached_models(api_formats: list[str], models: list["ModelInfo"])
         logger.warning(f"[ModelsService] 缓存写入失败: {e}")
 
 
+async def invalidate_models_list_cache() -> None:
+    """
+    清除所有 /v1/models 列表缓存
+
+    在模型创建、更新、删除时调用，确保模型列表实时更新
+    """
+    # 清除所有格式的缓存
+    all_formats = ["CLAUDE", "OPENAI", "GEMINI"]
+    for fmt in all_formats:
+        cache_key = f"{_CACHE_KEY_PREFIX}:{fmt}"
+        try:
+            await CacheService.delete(cache_key)
+            logger.debug(f"[ModelsService] 已清除缓存: {cache_key}")
+        except Exception as e:
+            logger.warning(f"[ModelsService] 清除缓存失败 {cache_key}: {e}")
+
+
 @dataclass
 class ModelInfo:
     """统一的模型信息结构"""
diff --git a/src/api/handlers/base/chat_handler_base.py b/src/api/handlers/base/chat_handler_base.py
index 417aeec..ceb10d3 100644
--- a/src/api/handlers/base/chat_handler_base.py
+++ b/src/api/handlers/base/chat_handler_base.py
@@ -32,7 +32,7 @@ from src.api.handlers.base.parsers import get_parser_for_format
 from src.api.handlers.base.request_builder import PassthroughRequestBuilder
 from src.api.handlers.base.response_parser import ResponseParser
 from src.api.handlers.base.stream_context import StreamContext
-from src.api.handlers.base.stream_processor import StreamProcessor, StreamSmoothingConfig
+from src.api.handlers.base.stream_processor import StreamProcessor
 from src.api.handlers.base.stream_telemetry import StreamTelemetryRecorder
 from src.api.handlers.base.utils import build_sse_headers
 from src.config.settings import config
@@ -52,7 +52,6 @@ from src.models.database import (
     User,
 )
 from src.services.provider.transport import build_provider_url
-from src.services.system.config import SystemConfigService
 
 
 
@@ -298,18 +297,11 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
         def update_streaming_status() -> None:
             self._update_usage_to_streaming_with_ctx(ctx)
 
-        # 读取流式平滑输出开关
-        smoothing_enabled = bool(
-            SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False)
-        )
-        smoothing_config = StreamSmoothingConfig(enabled=smoothing_enabled)
-
         # 创建流处理器
         stream_processor = StreamProcessor(
             request_id=self.request_id,
             default_parser=self.parser,
             on_streaming_start=update_streaming_status,
-            smoothing_config=smoothing_config,
         )
 
         # 定义请求函数
@@ -387,11 +379,8 @@ class ChatHandlerBase(BaseMessageHandler, ABC):
                 http_request.is_disconnected,
             )
 
-            # 创建平滑输出流（如果启用）
-            smoothed_stream = stream_processor.create_smoothed_stream(monitored_stream)
-
             return StreamingResponse(
-                smoothed_stream,
+                monitored_stream,
                 media_type="text/event-stream",
                 headers=build_sse_headers(),
                 background=background_tasks,
diff --git a/src/api/handlers/base/cli_handler_base.py b/src/api/handlers/base/cli_handler_base.py
index e4b863e..0d8ca16 100644
--- a/src/api/handlers/base/cli_handler_base.py
+++ b/src/api/handlers/base/cli_handler_base.py
@@ -34,9 +34,7 @@ from src.api.handlers.base.base_handler import (
 from src.api.handlers.base.parsers import get_parser_for_format
 from src.api.handlers.base.request_builder import PassthroughRequestBuilder
 from src.api.handlers.base.stream_context import StreamContext
-from src.api.handlers.base.stream_processor import create_smoothed_stream
 from src.api.handlers.base.utils import build_sse_headers
-from src.services.system.config import SystemConfigService
 
 # 直接从具体模块导入，避免循环依赖
 from src.api.handlers.base.response_parser import (
@@ -354,17 +352,8 @@ class CliMessageHandlerBase(BaseMessageHandler):
             # 创建监控流
             monitored_stream = self._create_monitored_stream(ctx, stream_generator)
 
-            # 创建平滑输出流（如果启用）
-            smoothing_enabled = bool(
-                SystemConfigService.get_config(self.db, "stream_smoothing_enabled", False)
-            )
-            if smoothing_enabled:
-                final_stream = create_smoothed_stream(monitored_stream)
-            else:
-                final_stream = monitored_stream
-
             return StreamingResponse(
-                final_stream,
+                monitored_stream,
                 media_type="text/event-stream",
                 headers=build_sse_headers(),
                 background=background_tasks,
diff --git a/src/api/handlers/base/stream_processor.py b/src/api/handlers/base/stream_processor.py
index 07fbe42..275de5d 100644
--- a/src/api/handlers/base/stream_processor.py
+++ b/src/api/handlers/base/stream_processor.py
@@ -12,7 +12,6 @@
 import asyncio
 import codecs
 import json
-import math
 from dataclasses import dataclass
 from typing import Any, AsyncGenerator, Callable, Optional
 
@@ -37,6 +36,8 @@ class StreamSmoothingConfig:
     """流式平滑输出配置"""
 
     enabled: bool = False
+    chunk_size: int = 20
+    delay_ms: int = 8
 
 
 class StreamProcessor:
@@ -47,13 +48,6 @@ class StreamProcessor:
     从 ChatHandlerBase 中提取，使其职责更加单一。
     """
 
-    # 平滑输出参数
-    CHUNK_SIZE = 20  # 每块字符数
-    MIN_DELAY_MS = 8  # 长文本延迟（毫秒）
-    MAX_DELAY_MS = 15  # 短文本延迟（毫秒）
-    SHORT_TEXT_THRESHOLD = 20  # 短文本阈值
-    LONG_TEXT_THRESHOLD = 100  # 长文本阈值
-
     def __init__(
         self,
         request_id: str,
@@ -548,10 +542,10 @@ class StreamProcessor:
 
                 # 只有内容长度大于 1 才需要平滑处理
                 if content and len(content) > 1 and extractor:
-                    # 计算动态延迟
-                    delay_seconds = self._calculate_delay(len(content))
+                    # 获取配置的延迟
+                    delay_seconds = self._calculate_delay()
 
-                    # 智能拆分
+                    # 拆分内容
                     content_chunks = self._split_content(content)
 
                     for i, sub_content in enumerate(content_chunks):
@@ -610,40 +604,24 @@ class StreamProcessor:
 
         return None, None
 
-    def _calculate_delay(self, text_length: int) -> float:
-        """
-        根据文本长度计算动态延迟（秒）
-
-        短文本使用较大延迟（打字感更强），长文本使用较小延迟（避免卡顿）。
-        中间长度使用对数插值平滑过渡。
-        """
-        if text_length <= self.SHORT_TEXT_THRESHOLD:
-            return self.MAX_DELAY_MS / 1000.0
-        if text_length >= self.LONG_TEXT_THRESHOLD:
-            return self.MIN_DELAY_MS / 1000.0
-
-        # 对数插值：平滑过渡
-        ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log(
-            self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD
-        )
-        delay_ms = self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS)
-        return delay_ms / 1000.0
+    def _calculate_delay(self) -> float:
+        """获取配置的延迟（秒）"""
+        return self.smoothing_config.delay_ms / 1000.0
 
     def _split_content(self, content: str) -> list[str]:
         """
         按块拆分文本
-
-        统一使用 CHUNK_SIZE 拆分，通过动态延迟控制打字感。
         """
+        chunk_size = self.smoothing_config.chunk_size
         text_length = len(content)
 
-        if text_length <= self.CHUNK_SIZE:
+        if text_length <= chunk_size:
             return [content]
 
-        # 统一按块拆分
+        # 按块拆分
         chunks = []
-        for i in range(0, text_length, self.CHUNK_SIZE):
-            chunks.append(content[i : i + self.CHUNK_SIZE])
+        for i in range(0, text_length, chunk_size):
+            chunks.append(content[i : i + chunk_size])
         return chunks
 
     async def _cleanup(
@@ -664,6 +642,8 @@ class StreamProcessor:
 
 async def create_smoothed_stream(
     stream_generator: AsyncGenerator[bytes, None],
+    chunk_size: int = 20,
+    delay_ms: int = 8,
 ) -> AsyncGenerator[bytes, None]:
     """
     独立的平滑流生成函数
@@ -672,11 +652,13 @@ async def create_smoothed_stream(
 
     Args:
         stream_generator: 原始流生成器
+        chunk_size: 每块字符数
+        delay_ms: 每块之间的延迟毫秒数
 
     Yields:
         平滑处理后的响应数据块
     """
-    processor = _LightweightSmoother()
+    processor = _LightweightSmoother(chunk_size=chunk_size, delay_ms=delay_ms)
     async for chunk in processor.smooth(stream_generator):
         yield chunk
 
@@ -688,13 +670,9 @@ class _LightweightSmoother:
     只包含平滑输出所需的最小逻辑，不依赖 StreamProcessor 的其他功能。
     """
 
-    CHUNK_SIZE = 20
-    MIN_DELAY_MS = 8
-    MAX_DELAY_MS = 15
-    SHORT_TEXT_THRESHOLD = 20
-    LONG_TEXT_THRESHOLD = 100
-
-    def __init__(self) -> None:
+    def __init__(self, chunk_size: int = 20, delay_ms: int = 8) -> None:
+        self.chunk_size = chunk_size
+        self.delay_ms = delay_ms
         self._extractors: dict[str, ContentExtractor] = {}
 
     def _get_extractor(self, format_name: str) -> Optional[ContentExtractor]:
@@ -715,21 +693,14 @@ class _LightweightSmoother:
                     return content, extractor
         return None, None
 
-    def _calculate_delay(self, text_length: int) -> float:
-        if text_length <= self.SHORT_TEXT_THRESHOLD:
-            return self.MAX_DELAY_MS / 1000.0
-        if text_length >= self.LONG_TEXT_THRESHOLD:
-            return self.MIN_DELAY_MS / 1000.0
-        ratio = math.log(text_length / self.SHORT_TEXT_THRESHOLD) / math.log(
-            self.LONG_TEXT_THRESHOLD / self.SHORT_TEXT_THRESHOLD
-        )
-        return (self.MAX_DELAY_MS - ratio * (self.MAX_DELAY_MS - self.MIN_DELAY_MS)) / 1000.0
+    def _calculate_delay(self) -> float:
+        return self.delay_ms / 1000.0
 
     def _split_content(self, content: str) -> list[str]:
         text_length = len(content)
-        if text_length <= self.CHUNK_SIZE:
+        if text_length <= self.chunk_size:
             return [content]
-        return [content[i : i + self.CHUNK_SIZE] for i in range(0, text_length, self.CHUNK_SIZE)]
+        return [content[i : i + self.chunk_size] for i in range(0, text_length, self.chunk_size)]
 
     async def smooth(
         self, stream_generator: AsyncGenerator[bytes, None]
@@ -772,7 +743,7 @@ class _LightweightSmoother:
                 content, extractor = self._detect_format_and_extract(data)
 
                 if content and len(content) > 1 and extractor:
-                    delay_seconds = self._calculate_delay(len(content))
+                    delay_seconds = self._calculate_delay()
                     content_chunks = self._split_content(content)
 
                     for i, sub_content in enumerate(content_chunks):
diff --git a/src/services/model/service.py b/src/services/model/service.py
index 1be69d1..764701c 100644
--- a/src/services/model/service.py
+++ b/src/services/model/service.py
@@ -13,6 +13,7 @@ from src.core.exceptions import InvalidRequestException, NotFoundException
 from src.core.logger import logger
 from src.models.api import ModelCreate, ModelResponse, ModelUpdate
 from src.models.database import Model, Provider
+from src.api.base.models_service import invalidate_models_list_cache
 from src.services.cache.invalidation import get_cache_invalidation_service
 from src.services.cache.model_cache import ModelCacheService
 
@@ -75,6 +76,10 @@ class ModelService:
                 )
 
             logger.info(f"创建模型成功: provider={provider.name}, model={model.provider_model_name}, global_model_id={model.global_model_id}")
+
+            # 清除 /v1/models 列表缓存
+            asyncio.create_task(invalidate_models_list_cache())
+
             return model
 
         except IntegrityError as e:
@@ -197,6 +202,9 @@ class ModelService:
                 cache_service = get_cache_invalidation_service()
                 cache_service.on_model_changed(model.provider_id, model.global_model_id)
 
+            # 清除 /v1/models 列表缓存
+            asyncio.create_task(invalidate_models_list_cache())
+
             logger.info(f"更新模型成功: id={model_id}, 最终 supports_vision: {model.supports_vision}, supports_function_calling: {model.supports_function_calling}, supports_extended_thinking: {model.supports_extended_thinking}")
             return model
         except IntegrityError as e:
@@ -261,6 +269,9 @@ class ModelService:
                 cache_service = get_cache_invalidation_service()
                 cache_service.on_model_changed(cache_info["provider_id"], cache_info["global_model_id"])
 
+            # 清除 /v1/models 列表缓存
+            asyncio.create_task(invalidate_models_list_cache())
+
             logger.info(f"删除模型成功: id={model_id}, provider_model_name={cache_info['provider_model_name']}, "
                 f"global_model_id={cache_info['global_model_id'][:8] if cache_info['global_model_id'] else 'None'}...")
         except Exception as e:
@@ -295,6 +306,9 @@ class ModelService:
             cache_service = get_cache_invalidation_service()
             cache_service.on_model_changed(model.provider_id, model.global_model_id)
 
+        # 清除 /v1/models 列表缓存
+        asyncio.create_task(invalidate_models_list_cache())
+
         status = "可用" if is_available else "不可用"
         logger.info(f"更新模型可用状态: id={model_id}, status={status}")
         return model
@@ -358,6 +372,9 @@ class ModelService:
                 for model in created_models:
                     db.refresh(model)
                 logger.info(f"批量创建 {len(created_models)} 个模型成功")
+
+                # 清除 /v1/models 列表缓存
+                asyncio.create_task(invalidate_models_list_cache())
             except IntegrityError as e:
                 db.rollback()
                 logger.error(f"批量创建模型失败: {str(e)}")
diff --git a/src/services/system/config.py b/src/services/system/config.py
index 3ff9e32..cfa9210 100644
--- a/src/services/system/config.py
+++ b/src/services/system/config.py
@@ -83,6 +83,14 @@ class SystemConfigService:
             "value": False,
             "description": "是否启用流式平滑输出，自动根据文本长度调整输出速度",
         },
+        "stream_smoothing_chunk_size": {
+            "value": 20,
+            "description": "流式平滑输出每个小块的字符数",
+        },
+        "stream_smoothing_delay_ms": {
+            "value": 8,
+            "description": "流式平滑输出每个小块之间的延迟毫秒数",
+        },
     }
 
     @classmethod