AI编程-----彩票蓝球

简单的分类函数

he___H

146人浏览 · 2026-01-30 21:27:37

he___H · 2026-01-30 21:27:37 发布

提示：文章写完后，目录可以自动生成，如何生成可参考右边的帮助文档

文章目录

前言
一、彩票------蓝球
二、结果
彩票期望值计算器
每张成本: 2 元中奖金额: 5 元
总结

前言

一、彩票------蓝球

import joblib
import requests
import numpy as np
import time
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.utils import compute_class_weight


def crawl_ssq_history(self, count=70):
    """
    爬取最近指定期数的双色球历史数据，默认获取最近70期
    """
    url = "https://kaijiang.78500.cn/ssq/"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0'
    }
    all_data = []

    # 从最新数据开始爬取，直到获取足够期数
    current_year = 2026  # 根据当前年份调整
    periods_needed = count

    while periods_needed > 0 and current_year >= 2000:
        params = {
            "startqi": "",
            "endqi": "",
            "year": str(current_year),
            "action": "years"
        }
        try:
            response = requests.post(url, data=params, headers=headers)
            time.sleep(np.random.uniform(1, 3))
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')
                tbody = soup.find('tbody', class_='list-tr')
                if tbody:
                    rows = tbody.find_all('tr')
                    # 逆序遍历，从最新数据开始
                    for row in reversed(rows):
                        if periods_needed <= 0:
                            break

                        tds = row.find_all('td')
                        if len(tds) >= 3:
                            issue = tds[0].text.strip()
                            draw_date = tds[1].text.strip()
                            numbers_div = tds[2].find('div')
                            red_balls = [num.text.strip() for num in numbers_div.find_all('span', class_='red')]
                            blue_ball = numbers_div.find_all('span', class_='blue')[0].text.strip()

                            if len(red_balls) == 6:
                                all_data.append([issue, draw_date] + red_balls + [blue_ball])
                                periods_needed -= 1
                else:
                    print(f"未找到{current_year}年的数据表格")
            else:
                print(f"请求{current_year}年数据失败，状态码：{response.status_code}")
        except Exception as e:
            print(f"爬取{current_year}年数据时发生错误：{e}")

        print(f"已处理{current_year}年数据，还需{periods_needed}期")
        current_year -= 1

    # 按时间顺序排列（从旧到新）
    all_data.sort(key=lambda x: x[0])  # 按期号排序

    columns = ['期号', '开奖日期', '红球1', '红球2', '红球3', '红球4', '红球5', '红球6', '蓝球']
    df = pd.DataFrame(all_data, columns=columns)

    # 数据类型转换
    for col in ['红球1', '红球2', '红球3', '红球4', '红球5', '红球6', '蓝球']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    # 只返回蓝球号码列（如需要）
    blue_balls = df['蓝球'].tolist()

    return blue_balls[-count:]  # 确保返回最近70个号码


# 获取最近70个蓝球号码
blue_balls = crawl_ssq_history(70)
print(f"最近70期蓝球号码: {blue_balls}")

# 获取最近30个蓝球号码
#blue_balls_30 = crawl_ssq_history(30)

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# 假设这是您提供的蓝球历史数据列表

# 设置滑动窗口的大小，例如用过去3期数据预测下一期
window_size = 3

# 创建特征集(X)和目标集(y)
X = []  # 特征列表，每个元素是一个包含连续window_size个号码的列表
y = []  # 目标列表，每个元素是紧接着窗口期之后的一个号码

for i in range(len(blue_balls) - window_size):
    X.append(blue_balls[i:i+window_size])  # 获取从i开始的连续window_size个数字作为特征
    y.append(blue_balls[i+window_size])   # 获取紧接着的下一个数字作为目标

# 转换为DataFrame，更便于查看
features_df = pd.DataFrame(X, columns=[f'lag_{i+1}' for i in range(window_size)])
features_df['target'] = y
print("构建的训练数据预览（前5行）：")
print(features_df.head())







# 划分训练集和测试集，用于评估模型性能
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 如果你主要是为了预测，而不是评估模型
# 使用所有数据训练
print("使用所有数据训练模型（不划分测试集）...")

model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)
model.fit(X, y)  # 使用全部数据训练
# 评估模型------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# model.fit(X_train, y_train)
#
# # 评估模型在测试集上的准确率
# train_score = model.score(X_train, y_train)
# test_score = model.score(X_test, y_test)
# 评估模型在测试集上的准确率
# print(f"\n模型训练完成！")
# print(f"训练集准确率: {train_score:.4f}")
# print(f"测试集准确率: {test_score:.4f}")
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# 预测
latest_features = blue_balls[-window_size:]
next_blue_pred = model.predict([latest_features])[0]
prediction_proba = model.predict_proba([latest_features])

print(f"\n使用所有{len(X)}个样本训练")
print(f"模型识别的类别数: {len(model.classes_)}")
print(f"预测的下期蓝球: {next_blue_pred}")

# 显示概率
prob_dict = {}
for i, ball_num in enumerate(model.classes_):
    prob_dict[ball_num] = prediction_proba[0][i]

print("\n所有蓝球号码概率:")
for ball_num in range(1, 17):
    if ball_num in prob_dict:
        print(f"蓝球 {ball_num:2d}: {prob_dict[ball_num]:.4f}")
    else:
        print(f"蓝球 {ball_num:2d}: 0.0000 (模型未学习)")

print(f"\n模型训练完成!")
print(f"模型识别的类别数量: {len(model.classes_)}")
print(f"模型识别的类别: {sorted(model.classes_)}")
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# 参数设置
COST_PER_TICKET = 2  # 每张彩票成本
PRIZE = 5  # 中奖金额


# 1. 计算买一张彩票的期望
def calculate_single_expectation():
    print("=== 买一张彩票的期望计算 ===\n")

    # 找出概率最高的号码
    best_number = max(prob_dict.items(), key=lambda x: x[1])
    best_num, best_prob = best_number

    print(f"最佳选择: 蓝球 {best_num} 号")
    print(f"中奖概率: {best_prob:.4f}")
    print(f"不中概率: {1 - best_prob:.4f}")

    # 计算收益
    win_net = PRIZE - COST_PER_TICKET  # 中奖净收益
    lose_net = -COST_PER_TICKET  # 不中损失

    # 计算期望
    expectation = best_prob * win_net + (1 - best_prob) * lose_net

    print(f"\n收益情况:")
    print(f"  中奖收益: {PRIZE} - {COST_PER_TICKET} = {win_net} 元")
    print(f"  不中收益: -{COST_PER_TICKET} 元")

    print(f"\n期望值计算:")
    print(f"  E = {best_prob:.4f} × {win_net} + {1 - best_prob:.4f} × {lose_net}")
    print(f"    = {best_prob * win_net:.4f} + {(1 - best_prob) * lose_net:.4f}")
    print(f"    = {expectation:.4f} 元")

    return expectation, best_num


# 2. 计算买两张彩票的期望
def calculate_double_expectation():
    print("\n\n=== 买两张彩票的期望计算 ===\n")

    # 找出概率最高的两个号码
    sorted_numbers = sorted(prob_dict.items(), key=lambda x: x[1], reverse=True)
    num1, prob1 = sorted_numbers[0]
    num2, prob2 = sorted_numbers[1]

    print(f"最佳选择: 蓝球 {num1} 号和 {num2} 号")
    print(f"{num1}号概率: {prob1:.4f}")
    print(f"{num2}号概率: {prob2:.4f}")

    # 计算中奖概率
    win_prob = prob1 + prob2
    lose_prob = 1 - win_prob

    print(f"\n中奖概率: {win_prob:.4f}")
    print(f"不中概率: {lose_prob:.4f}")

    # 计算收益
    total_cost = 2 * COST_PER_TICKET
    win_net = PRIZE - total_cost  # 中奖净收益
    lose_net = -total_cost  # 不中损失

    # 计算期望
    expectation = win_prob * win_net + lose_prob * lose_net

    print(f"\n收益情况:")
    print(f"  总成本: 2 × {COST_PER_TICKET} = {total_cost} 元")
    print(f"  中奖净收益: {PRIZE} - {total_cost} = {win_net} 元")
    print(f"  不中损失: -{total_cost} 元")

    print(f"\n期望值计算:")
    print(f"  E = {win_prob:.4f} × {win_net} + {lose_prob:.4f} × {lose_net}")
    print(f"    = {win_prob * win_net:.4f} + {lose_prob * lose_net:.4f}")
    print(f"    = {expectation:.4f} 元")

    return expectation, (num1, num2)


print("彩票期望值计算器")
print("=" * 50)
print(f"每张成本: {COST_PER_TICKET} 元")
print(f"中奖金额: {PRIZE} 元")
print("=" * 50)

# 计算单张期望
single_exp, best_single = calculate_single_expectation()

# 计算两张期望
double_exp, best_double = calculate_double_expectation()

# 比较结果
print("\n\n=== 结果对比 ===")
print(f"最佳单张: 买蓝球 {best_single} 号")
print(f"  期望值: {single_exp:.4f} 元")
print(f"  奖金/花费_越大越好: {single_exp / COST_PER_TICKET:.4f} 元")

print(f"\n最佳双张: 买蓝球 {best_double[0]} 和 {best_double[1]} 号")
print(f"  期望值: {double_exp:.4f} 元")
print(f"  奖金/花费_越大越好: {double_exp / (2 * COST_PER_TICKET):.4f} 元")

print(f"\n对比分析:")
if single_exp > double_exp:
    print(f"  ✓ 买一张更优 (多 {single_exp - double_exp:.4f} 元)")
else:
    print(f"  ✓ 买两张更优 (多 {double_exp - single_exp:.4f} 元)")

# 方法二：使用 joblib 加载模型

loaded_model = joblib.load("blue_ball_model.pkl")
predictions = loaded_model.predict([blue_balls])
print("预测结果上一版：", predictions)

二、结果

E:\caipiao.venv\Scripts\python.exe E:\caipiao\blue_ball.py
已处理2026年数据，还需57期
已处理2025年数据，还需0期
最近70期蓝球号码: [16, 11, 15, 4, 9, 6, 11, 11, 6, 8, 11, 11, 14, 5, 15, 14, 5, 11, 11, 15, 14, 10, 7, 15, 2, 7, 6, 15, 16, 6, 1, 3, 8, 12, 1, 7, 2, 9, 11, 11, 6, 12, 13, 11, 13, 1, 12, 10, 11, 16, 1, 9, 7, 16, 12, 3, 14, 15, 2, 16, 10, 10, 15, 1, 10, 10, 12, 4, 8, 1]
构建的训练数据预览（前5行）：
lag_1 lag_2 lag_3 target
0 16 11 15 4
1 11 15 4 9
2 15 4 9 6
3 4 9 6 11
4 9 6 11 11
使用所有数据训练模型（不划分测试集）…

使用所有67个样本训练
模型识别的类别数: 16
预测的下期蓝球: 7

所有蓝球号码概率:
蓝球 1: 0.0400
蓝球 2: 0.0000
蓝球 3: 0.0800
蓝球 4: 0.0000
蓝球 5: 0.0000
蓝球 6: 0.0000
蓝球 7: 0.2200
蓝球 8: 0.0600
蓝球 9: 0.1400
蓝球 10: 0.0500
蓝球 11: 0.2000
蓝球 12: 0.0300
蓝球 13: 0.0000
蓝球 14: 0.0100
蓝球 15: 0.1600
蓝球 16: 0.0100

模型训练完成!
模型识别的类别数量: 16
模型识别的类别: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16)]