常见YAML配置内容

基础应用配置

server:
  port: 8080  # 服务端口
servlet:
  context-path: /api  # 上下文路径

spring:
  application:
    name: order-service  # 应用名称
  profiles:
    active: dev  # 激活的环境配置(dev/test/prod)

数据源配置(如 MySQL + HikariCP)

spring:
  datasource:
    url: jdbc:mysql://db-host:3306/mydb
    username: admin
    password: ${DB_PASSWORD}  # 从环境变量读取
    driver-class-name: com.mysql.cj.jdbc.Driver
    hikari:
      maximum-pool-size: 20
      connection-timeout: 30000

ORM框架配置(如 JPA/Hibernate)

spring:
  jpa:
    hibernate:
      ddl-auto: validate  # 校验实体与表结构
    show-sql: false
    properties:
      hibernate:
        dialect: org.hibernate.dialect.MySQL8Dialect
        format_sql: true

缓存配置(如 Redis)

spring:
  cache:
    type: redis
  redis:
    host: redis-host
    port: 6379
    password: ${REDIS_PWD}
    lettuce:
      pool:
        max-active: 8

消息队列(如 RabbitMQ/Kafka)

spring:
  rabbitmq:
    host: mq-host
    port: 5672
    username: user
    password: ${RABBIT_PWD}
    virtual-host: /prod

安全配置(如 OAuth2/JWT)

security:
  jwt:
    secret: ${JWT_SECRET}  # 从环境变量读取
    expiration: 86400000  # Token有效期(毫秒)

日志配置(Logback/Log4j2)

logging:
  level:
    root: INFO
    com.example.order: DEBUG  # 指定包日志级别
  file:
    name: logs/app.log
  pattern:
    console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n"

自定义业务配置

app:
  payment:
    gateway-url: https://pay.gateway/api
    timeout-ms: 5000
  feature-flags:
    new-checkout-enabled: true  # 功能开关

云服务集成(如 AWS S3)

cloud:
  aws:
    s3:
      bucket-name: my-company-bucket
    credentials:
      access-key: ${AWS_ACCESS_KEY}
      secret-key: ${AWS_SECRET_KEY}

加载过程:

  • Spring Boot启动时,会自动加载application.yml(或application.properties)文件
  • 如果存在Profile特定的配置文件(如application-dev.yml),则会根据激活的Profile加载对应的配置文件
  • 配置内容会被绑定到Spring Environment对象中,可以通过@Value注解或@ConfigurationProperties注解来访问
  • 启动时指定Profile:java -jar app.jar --spring.profiles.active=prod
@Component
public class MyService{
	//可以直接将配置值注入到Bean的属性中
	@Value("${external.service.api-key}")
	private String apiKey;
}
//可以将一组相关的配置绑定到一个POJO类上
@Configuration
@ConfigurationProperties(prefix = "external.service")
public class ExternalServiceConfig{
	private String apiKey;
	private String url;

	//getters and setters
}


//然后在需要使用的地方注入这个配置类:
@Service
public class SomeService {
    private final ExternalServiceConfig config;

    public SomeService(ExternalServiceConfig config) {
        this.config = config;
    }

    public void callApi() {
        String apiKey = config.getApiKey();
        // ...
    }
}

项目实战

spring: #这是Spring Boot的核心配置前缀,下面的配置都是Spring Boot标准配置或通过Spring Boot的自动配置机制绑定的
  application:
    name: ai-orchestration-service #应用名称,用于服务发现、日志等
  
  # 数据源配置
  datasource:
    url: jdbc:mysql://${DB_HOST:localhost}:3306/ai_orchestration
    username: ${DB_USER:ai_admin}
    password: ${DB_PASSWORD:changeit}
    hikari:
      maximum-pool-size: 20
      minimum-idle: 5
      connection-timeout: 30000
  
  # Redis配置
  redis:
    host: ${REDIS_HOST:localhost}
    port: ${REDIS_PORT:6379}
    password: ${REDIS_PASSWORD:}
    lettuce:
      pool:
        max-active: 20
        max-idle: 10
        min-idle: 5
  
  # 异步配置
  task:
    execution:
      pool:
        core-size: 10
        max-size: 50
        queue-capacity: 1000

# AI编排配置,这是一个自定义的配置前缀,通常对应于项目中自定义的配置类。
# 可能有一个名为AiOrchestrationProperties的配置类,使用@ConfigurationProperties(prefix = "ai.orchestration")注解,这样Spring Boot就会将ai.orchestration下的配置绑定到该类的属性上
ai:
  orchestration:
    # 执行策略配置
    execution:
      default-timeout-ms: 30000
      max-retries: 3
      backoff-delay-ms: 1000
      fallback-enabled: true
    
    # 模型路由配置
    routing:
      cache-enabled: true
      cache-ttl: 300000  # 5分钟
      strategy: loadBalancing
      cost-weight: 0.3
      performance-weight: 0.4
      accuracy-weight: 0.3
    
    # 监控配置
    monitoring:
      enabled: true
      metrics-prefix: ai.orchestration
      export-interval: 60000  # 1分钟
      slow-query-threshold-ms: 5000
      error-threshold-percent: 10
    
    # 安全配置
    security:
      input-validation: true
      output-sanitization: true
      max-input-length: 10000
      sensitive-patterns:
        - "\d{4}-\d{4}-\d{4}-\d{4}"  # 信用卡号
        - "\d{18}"                    # 身份证号
        - "\d{11}"                    # 手机号
    
    # 合规配置
    compliance:
      audit-enabled: true
      audit-retention-days: 365
      data-residency: CHINA
      model-blacklist:
        - gpt-4  # 根据合规要求屏蔽某些模型
      content-filter:
        enabled: true
        provider: local
        categories:
          - hate
          - violence
          - financial_risk
          - privacy_violation

# 熔断器配置,这是Resilience4j库的配置前缀,用于配置熔断器、重试和时间限制器等
# Resilience4j库提供了相应的自动配置,在Spring Boot中可以通过这些配置来创建实例
resilience4j:
  circuitbreaker:
    instances:
      aiOrchestration:
        sliding-window-size: 100
        failure-rate-threshold: 50
        wait-duration-in-open-state: 60s
        permitted-number-of-calls-in-half-open-state: 10
        automatic-transition-from-open-to-half-open-enabled: true
        record-exceptions:
          - java.io.IOException
          - java.util.concurrent.TimeoutException
          - com.bank.ai.orchestration.exception.OrchestrationException
  
  retry:
    instances:
      aiModelRetry:
        max-attempts: 3
        wait-duration: 1s
        retry-exceptions:
          - java.io.IOException
          - java.util.concurrent.TimeoutException
  
  timelimiter:
    instances:
      aiServiceTimeout:
        timeout-duration: 30s

# 监控配置 这是Spring Boot Actuator的配置前缀,用于配置监控端点、指标导出等。
management:
  endpoints:
    web:
      exposure:
        include: "health,metrics,prometheus,circuitbreakers"
  metrics:
    export:
      prometheus:
        enabled: true
    tags:
      application: ${spring.application.name}
      environment: ${ENVIRONMENT:dev}
  endpoint:
    health:
      show-details: always
    metrics:
      enabled: true

# 日志配置 用于配置日志级别、日志文件等
logging:
  level:
    com.bank.ai.orchestration: DEBUG
    org.springframework: INFO
  file:
    name: logs/ai-orchestration.log
    max-size: 100MB
    max-history: 30
  logback:
    rollingpolicy:
      max-file-size: 100MB
  pattern:
    console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n"
    file: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n"

Spring基础配置

spring:
  application:
    name: ai-orchestration-service

代码读取方式:

// 1. 使用 @Value 注解
@Value("${spring.application.name}")
private String appName;

// 2. 通过 Environment 接口
@Autowired
private Environment env;
String appName = env.getProperty("spring.application.name");

数据源配置

datasource:
  url: jdbc:mysql://${DB_HOST:localhost}:3306/ai_orchestration
  username: ${DB_USER:ai_admin}
  password: ${DB_PASSWORD:changeit}

工作原理:

  1. Spring Boot 自动配置 DataSource Bean
  2. HikariCP 连接池自动初始化
  3. ${} 语法支持环境变量覆盖(如:DB_HOST=mysql-server)
@Service
public class DataService{
	
	@Autowired
	private DataSource dataSource;//自动注入配置的DataSource

	@Autowored
	private JdbcTemplate jdbcTemplate;//自动使用配置的数据源
}
@Service
public class CacheService{
	@Autowired
	private RedisTemplate<String,Object> redisTemplate;//自动配置

	public void setValue(){
		redisTemplate.opsForValue().set(key,value,properties.getRouting().getCacheTtl(),TimeUnit.MILLISECONDS);
	}
}
@Service
public class AsyncService{
	
	@Async //使用配置的线程池
	public CompletableFuture<String> processAsync(){
		//异步处理
	}
}

自定义配置(AI 编排)

ai:
  orchestration:
    execution:
      default-timeout-ms: 30000

代码读取方式:

//推荐使用:@ConfigurationProperties
@Configuration
@ConfigurationProperties(prefix = "ai.orchestration")
@Data
public class AiOrchestrationProperties{
	
	private Execution execution;
	private Routing routing
	private Monitoring monitoring;
	
	@Data
	public static class Execution{
		private long defaultTimeoutMs;
		private int maxRetries;
		private long backoffDelayMs;
		private boolean fallbackEnabled;
	}

	//其他配置类……略
	
}
@Service
public class OrchestrationService{

	@Autowired
	private AiOrchestrationProperties properties;
	
	public void execute(){
		long timeout = properties.getExecution().getDefaultTimeoutMs();
	}
}

自定义业务配置

app:
  payment:
    gateway-url: https://pay.gateway/api
    timeout-ms: 5000
  feature-flags:
    new-checkout-enabled: true  # 功能开关
//使用JSR-303校验配置
@ConfigurationProperties(prefix = "app.payment")
@Validated
public class PaymentConfig{
	@NotBlank
	private String gatewayUrl;

	@Min(1000)
	private int timeoutMs;
}

第三方Resilience4j 熔断器配置

resilience4j:
  circuitbreaker:
    instances:
      aiOrchestration:
        sliding-window-size: 100

自动生效机制

// Spring Boot Starter 自动创建 CircuitBreaker 实例
@Bean
public CircuitBreaker aiOrchestrationCircuitBreaker(){
	//自动读取 resilience4j.circuitbreaker.instances.aiOrchestration 配置
	return CircuitBreaker.ofDefaults("aiOrchestration");
}

// 使用 @CircuitBreaker 注解
@CircuitBreaker(name = "aiOrchestration")
public Response callExternalService(){
	 // 业务逻辑
}

监控配置(Spring Boot Actuator)

management:
  endpoints:
    web:
      exposure:
        include: "health,metrics,prometheus"

自动生效:

  • 访问 /actuator/health 查看健康状态
  • 访问 /actuator/metrics 查看指标
  • Prometheus 自动抓取 /actuator/prometheus

日志配置

logging:
  level:
    com.bank.ai.orchestration: DEBUG
  file:
    name: logs/ai-orchestration.log

生效方式:Spring Boot 自动配置 Logback/Log4j2

如何加载独立的yml文件

模型注册配置(model-registry.yaml)

  • 该文件定义了几个模型列表和路由策略,
  • 非SpringBoot默认配置文件(application.yml),需要使用@ConfigurationProperties配合@PropertySource
  • @PropertySource默认不支持YAML(SpringBoot2.4开始,已经支持YAML,可以使用org.springframework.boot.env.YamlPropertySourceFactory),需要自己指定工厂类

处理方式:
方式一:将配置放在application.yml中,作为其中的一部分。使用SpringBoot的@ConfigurationProperties默认支持从application.yml读取
方式二:对于独立的yml文件,通过@ConfigurationProperties和@PropertySource结合,并自定义一个YAML解析器(例如使用SnakeYAML)
方式三:使用@ConfigurationProperties和@Configuration,然后在@Configuration中使用@Bean方法读取yml文件

models:
  - modelId: gpt-4-1106-preview   #模型配置数据结构
    modelType: GPT_4
    provider: OPENAI
    capabilities:
      - TEXT_GENERATION
      - CODE_GENERATION
      - COMPLEX_REASONING
    costPerToken:
      input: 0.00003
      output: 0.00006
    performance:
      avgLatency: 1200
      p95Latency: 2500
      accuracy: 0.95
    limits:
      maxTokens: 128000
      requestsPerMinute: 5000
    compliance:
      dataResidency: GLOBAL
      privacyCertifications:
        - SOC2
        - GDPR
    endpoints:
      primary: https://api.openai.com/v1/chat/completions
      backup: https://api.openai-backup.com/v1/chat/completions
    healthCheck:
      path: /health
      interval: 60000
      timeout: 5000
  
  - modelId: claude-3-opus
    modelType: CLAUDE_3
    provider: ANTHROPIC
    capabilities:
      - TEXT_GENERATION
      - LONG_CONTEXT
      - CREATIVE_WRITING
    costPerToken:
      input: 0.000075
      output: 0.0003
    performance:
      avgLatency: 2000
      p95Latency: 4000
      accuracy: 0.96
    limits:
      maxTokens: 200000
      requestsPerMinute: 1000
    compliance:
      dataResidency: GLOBAL
      privacyCertifications:
        - SOC2
    endpoints:
      primary: https://api.anthropic.com/v1/messages
  
  - modelId: wenxin-ernie-4.0
    modelType: ERNIE_4
    provider: BAIDU
    capabilities:
      - TEXT_GENERATION
      - CHINESE_OPTIMIZED
      - FINANCIAL_DOMAIN
    costPerToken:
      input: 0.00002
      output: 0.00004
    performance:
      avgLatency: 800
      p95Latency: 1500
      accuracy: 0.92
    limits:
      maxTokens: 32000
      requestsPerMinute: 10000
    compliance:
      dataResidency: CHINA
      privacyCertifications:
        - CSL
        - PIPL
    endpoints:
      primary: https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions
  
  - modelId: qwen-72b-chat
    modelType: QWEN_72B
    provider: ALIYUN
    capabilities:
      - TEXT_GENERATION
      - CHINESE_OPTIMIZED
      - CODE_GENERATION
    costPerToken:
      input: 0.000015
      output: 0.00003
    performance:
      avgLatency: 1500
      p95Latency: 3000
      accuracy: 0.90
    limits:
      maxTokens: 32000
      requestsPerMinute: 5000
    compliance:
      dataResidency: CHINA
      privacyCertifications:
        - CSL
    endpoints:
      primary: https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation

routingStrategies:   #路由策略配置
  costOptimized:
    priority: COST
    weight: 0.7
    fallbackModel: qwen-72b-chat
  
  performanceOptimized:
    priority: LATENCY
    weight: 0.8
    fallbackModel: wenxin-ernie-4.0
  
  accuracyOptimized:
    priority: ACCURACY
    weight: 0.9
    fallbackModel: gpt-4-1106-preview
  
  loadBalancing:
    algorithm: ROUND_ROBIN
    weights:
      gpt-4-1106-preview: 0.3
      claude-3-opus: 0.2
      wenxin-ernie-4.0: 0.3
      qwen-72b-chat: 0.2

circuitBreaker:    # 熔断器配置
  failureThreshold: 5
  successThreshold: 3
  timeoutMs: 60000
  halfOpenMaxCalls: 10

一、模型配置类

@Data
@ConfigurationProperties(prefix = "model-registry")
public class ModelRegistryProperties{
	
	private List<ModelConfig> models;
	private Map<String,RoutingStrategy> routingStragegies;
	private CircuitBreakerConfig circuitBreaker;

	@Data
	public static class ModelConfig{
		private String modelId;
        private ModelType modelType;
        private Provider provider;
        private List<Capability> capabilities;
        private CostPerToken costPerToken;
        private Performance performance;
        private Limits limits;
        private Compliance compliance;
        private Endpoints endpoints;
        private HealthCheck healthCheck;

		@Data
        public static class CostPerToken {
            private BigDecimal input;
            private BigDecimal output;
        }
        
        @Data
        public static class Performance {
            private int avgLatency;
            private int p95Latency;
            private double accuracy;
        }
        
        @Data
        public static class Limits {
            private int maxTokens;
            private int requestsPerMinute;
        }
        
        @Data
        public static class Compliance {
            private DataResidency dataResidency;
            private List<String> privacyCertifications;
        }
        
        @Data
        public static class Endpoints {
            private String primary;
            private String backup;
        }
        
        @Data
        public static class HealthCheck {
            private String path;
            private long interval;
            private long timeout;
        }
	}

	@Data
	public static class RoutingStrategy{
		private Priority priority;
        private double weight;
        private String fallbackModel;
        private Algorithm algorithm; // 用于负载均衡
        private Map<String, Double> weights; // 模型权重映射
	}

	@Data
    public static class CircuitBreakerConfig {
        private int failureThreshold;
        private int successThreshold;
        private long timeoutMs;
        private int halfOpenMaxCalls;
    }

	// 枚举定义
    public enum ModelType { GPT_4, CLAUDE_3, ERNIE_4, QWEN_72B }
    public enum Provider { OPENAI, ANTHROPIC, BAIDU, ALIYUN }
    public enum Capability { 
        TEXT_GENERATION, CODE_GENERATION, COMPLEX_REASONING, 
        LONG_CONTEXT, CREATIVE_WRITING, CHINESE_OPTIMIZED, 
        FINANCIAL_DOMAIN 
    }
    public enum DataResidency { GLOBAL, CHINA, EU, US }
    public enum Priority { COST, LATENCY, ACCURACY }
    public enum Algorithm { ROUND_ROBIN, WEIGHTED, LEAST_CONNECTIONS }
}

动态配置文件更细,修改配置文件后自动刷新

@Configuration
@RefreshScope
public class DynamicModelRegistry{
	@Bean
	@RefreshScope
	public ModelRegistryProperties modelRegistryProperties(){
		//支持热更新,修改配置文件后自动刷新
		return new ModelRegistryProperties();
	}
}
@RefreshScope
@RestController
public class ConfigController{
	@Value("${app.feature-flags.new-ui}")
	private boolean newUiEnabled;
}

二、配置文件加载方式

方式1:使用 @ConfigurationProperties

@Configuration
@EnableConfigurationProperties(ModelRegistryProperties.class)
@PropertySource(value = "classpath:model-registry.yaml", factory = YamlPropertySourceFactory.class)
public class ModelRegistryConfig{
	@Bean
    public ModelRegistry modelRegistry(ModelRegistryProperties properties) {
        return new ModelRegistry(properties);
    }
}

方式2:自定义 YamlPropertySourceFactory

public class YamlPropertySourceFactory implements PropertySourceFactory{
	@Override
    public PropertySource<?> createPropertySource(String name, EncodedResource resource) throws IOException {
        YamlPropertiesFactoryBean factory = new YamlPropertiesFactoryBean();
        factory.setResources(resource.getResource());
        Properties properties = factory.getObject();
        
        return new PropertiesPropertySource(
            name != null ? name : resource.getResource().getFilename(),
            properties
        );
    }
}

三、核心业务类实现

模型注册表服务

@Service
@Slf4j
public class ModelRegistryService{
	
	@Autowired
	private ModelRegistryProperties properties;

	private Map<String, ModelConfig> modelCache = new ConcurrentHashMap<>();
    private Map<String, RoutingStrategy> routingCache = new ConcurrentHashMap<>();
	
	@PostConstruct
	public void init(){
		//初始化模型缓存
		properties.getModels().forEach(model -> {
			modelCache.put(model.getModelId(),model);
			log.info("Registered model: {}, provider: {}", 
                model.getModelId(), model.getProvider());
		});

		//初始化路由策略
		routingCache.putAll(properties.getRoutingStrategies());
	}

	public ModelConfig getModel(String modelId){
		ModelConfig model = modelCache.get(modelId);
		if(model == null){
			throw new ModelNotFoundException("Model not found: " + modelId);
		}
		return model;
	}

	public List<ModelConfig> getModelsByCapability(Capalibity capability){
		return properties.getModels().stream()
			.filter((model -> model.getCapabilities().contains(capability))
			.collect(Collectors.toList());
	}

	public List<ModelConfig> getModelsByCompliance(DataResidency residency) {
        return properties.getModels().stream()
            .filter(model -> model.getCompliance().getDataResidency() == residency)
            .collect(Collectors.toList());
    }
    
    public RoutingStrategy getRoutingStrategy(String strategyName) {
        RoutingStrategy strategy = routingCache.get(strategyName);
        if (strategy == null) {
            throw new IllegalArgumentException("Routing strategy not found: " + strategyName);
        }
        return strategy;
    }
}

模型路由服务

@Service
public class ModelRoutingService {
    
    @Autowired
    private ModelRegistryService modelRegistryService;
    
    @Autowired
    private ModelRegistryProperties properties;
    
    /**
     * 根据策略选择模型
     */
    public ModelConfig selectModel(String strategyName, Context context) {
        RoutingStrategy strategy = modelRegistryService.getRoutingStrategy(strategyName);
        
        return switch (strategy.getPriority()) {
            case COST -> selectByCost(strategy);
            case LATENCY -> selectByLatency(strategy);
            case ACCURACY -> selectByAccuracy(strategy);
            default -> selectByLoadBalancing(strategy);
        };
    }
    
    private ModelConfig selectByCost(RoutingStrategy strategy) {
        return modelRegistryService.getAllModels().stream()
            .min(Comparator.comparing(model -> 
                model.getCostPerToken().getInput().add(model.getCostPerToken().getOutput())))
            .orElseGet(() -> modelRegistryService.getModel(strategy.getFallbackModel()));
    }
    
    private ModelConfig selectByLatency(RoutingStrategy strategy) {
        return modelRegistryService.getAllModels().stream()
            .min(Comparator.comparing(model -> model.getPerformance().getAvgLatency()))
            .orElseGet(() -> modelRegistryService.getModel(strategy.getFallbackModel()));
    }
    
    private ModelConfig selectByAccuracy(RoutingStrategy strategy) {
        return modelRegistryService.getAllModels().stream()
            .max(Comparator.comparing(model -> model.getPerformance().getAccuracy()))
            .orElseGet(() -> modelRegistryService.getModel(strategy.getFallbackModel()));
    }
    
    private ModelConfig selectByLoadBalancing(RoutingStrategy strategy) {
        if (strategy.getAlgorithm() == Algorithm.ROUND_ROBIN) {
            return getNextModelRoundRobin();
        } else if (strategy.getAlgorithm() == Algorithm.WEIGHTED) {
            return selectByWeightedRandom(strategy.getWeights());
        }
        return null;
    }
    
    private AtomicInteger roundRobinIndex = new AtomicInteger(0);
    
    private ModelConfig getNextModelRoundRobin() {
        List<ModelConfig> models = modelRegistryService.getAllModels();
        int index = roundRobinIndex.getAndUpdate(i -> (i + 1) % models.size());
        return models.get(index);
    }
    
    private ModelConfig selectByWeightedRandom(Map<String, Double> weights) {
        double totalWeight = weights.values().stream().mapToDouble(Double::doubleValue).sum();
        double random = ThreadLocalRandom.current().nextDouble(totalWeight);
        
        double cumulative = 0.0;
        for (Map.Entry<String, Double> entry : weights.entrySet()) {
            cumulative += entry.getValue();
            if (random <= cumulative) {
                return modelRegistryService.getModel(entry.getKey());
            }
        }
        return null;
    }
}

熔断器管理器

@Component
public class CircuitBreakerManager {
    
    @Autowired
    private ModelRegistryProperties properties;
    
    private Map<String, CircuitBreaker> circuitBreakers = new ConcurrentHashMap<>();
    
    @PostConstruct
    public void initCircuitBreakers() {
        CircuitBreakerConfig config = properties.getCircuitBreaker();
        
        modelRegistryService.getAllModels().forEach(model -> {
            CircuitBreaker circuitBreaker = CircuitBreaker.of(model.getModelId(), 
                CircuitBreakerConfig.custom()
                    .failureRateThreshold(50.0f)
                    .slidingWindowSize(config.getFailureThreshold())
                    .minimumNumberOfCalls(10)
                    .waitDurationInOpenState(Duration.ofMillis(config.getTimeoutMs()))
                    .permittedNumberOfCallsInHalfOpenState(config.getHalfOpenMaxCalls())
                    .build());
                    
            circuitBreakers.put(model.getModelId(), circuitBreaker);
        });
    }
    
    public <T> T executeWithCircuitBreaker(String modelId, Supplier<T> supplier) {
        CircuitBreaker circuitBreaker = circuitBreakers.get(modelId);
        if (circuitBreaker == null) {
            return supplier.get();
        }
        
        return circuitBreaker.executeSupplier(supplier);
    }
    
    public CircuitBreaker.State getCircuitBreakerState(String modelId) {
        CircuitBreaker circuitBreaker = circuitBreakers.get(modelId);
        return circuitBreaker != null ? circuitBreaker.getState() : null;
    }
}

四、应用

控制器中使用

@RestController
@RequestMapping("/api/models")
public class ModelController {
    
    @Autowired
    private ModelRegistryService modelRegistryService;
    
    @Autowired
    private ModelRoutingService routingService;
    
    @GetMapping("/select")
    public ModelConfig selectModel(
            @RequestParam String strategy,
            @RequestParam(required = false) Capability capability) {
        
        if (capability != null) {
            // 根据能力筛选
            List<ModelConfig> models = modelRegistryService.getModelsByCapability(capability);
            return selectBestModel(models, strategy);
        }
        
        // 直接使用路由策略
        return routingService.selectModel(strategy, new Context());
    }
    
    @GetMapping("/cost")
    public BigDecimal calculateCost(@RequestParam String modelId, 
                                   @RequestParam int inputTokens,
                                   @RequestParam int outputTokens) {
        ModelConfig model = modelRegistryService.getModel(modelId);
        return model.getCostPerToken().getInput()
            .multiply(BigDecimal.valueOf(inputTokens))
            .add(model.getCostPerToken().getOutput()
                .multiply(BigDecimal.valueOf(outputTokens)));
    }
}

在业务逻辑中使用

@Service
public class AIService {
    
    @Autowired
    private CircuitBreakerManager circuitBreakerManager;
    
    @Autowired
    private ModelRoutingService routingService;
    
    public Response process(String strategy, Request request) {
        // 选择模型
        ModelConfig selectedModel = routingService.selectModel(strategy, request.getContext());
        
        // 使用熔断器保护调用
        return circuitBreakerManager.executeWithCircuitBreaker(
            selectedModel.getModelId(),
            () -> callModelAPI(selectedModel, request)
        );
    }
    
    private Response callModelAPI(ModelConfig model, Request request) {
        // 调用具体的模型API
        // 使用 model.getEndpoints().getPrimary() 获取端点
        // 使用 model.getLimits().getMaxTokens() 验证限制
        // ...
    }
}
Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐