统信系统小程序（五）python通过OCR自动旋转图片程序

hnxaoli

776人浏览 · 2025-08-22 13:44:44
hnxaoli · 2025-08-22 13:44:44 发布
'''
这是为了方便使用统信系统自制的小程序。
通过pytesseract模块识别图片中的几个字，来猜测图片的正确方向。然后旋转。
这个图片批量旋转的python程序，支持png、jpg、bmp、png等格式，通过tktinter输入参数，界面尺寸300*500，底色淡黄色，一键处理的按键是糖果绿色，字体：fangsong ti，字号12号。
功能1：可打开文件夹自动识别所有图片、也可手动对图片批量选择后，自动逐个识别图片文件，可手动选择另存、覆盖、加扩展名3种选项，默认另存到rotation文件夹。可勾选旋转90、180、270度、OCR旋转，默认选择OCR旋转。
功能2：点击识别旋转后，如果选择了另存，则逐个对每个图片执行OCR，在识别到至少3个汉字或者英文、数值的方向后，将图片旋转到其向上的方向，如果有几个文件OCR识别不出则在界面显示，随后逐个按照其原名保存到旋转文件夹，遇到无需旋转的也把源文件复制进rotation文件夹。如果选择了覆盖则替换原文件，如果选择了加扩展名，则在原文件夹将源文件旋转后增加后缀另存，在界面中提示保存的文件数。
功能3：点击停止，则结束正在进行的操作。
'''
# -*- coding: utf-8 -*-
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
from PIL import Image, ImageEnhance, ImageFilter
import pytesseract
import re
import threading
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
import queue

# 设置Tesseract数据路径（根据实际安装路径调整）
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/tessdata/'


# 或者os.environ['TESSDATA_PREFIX'] = '/usr/share/tessdata/'

class ImageBatchRotationApp:
    def __init__(self, root):
        self.root = root
        self.root.title("图片批量旋转")
        self.root.geometry("300x750")  # 调整窗口高度以适应新增功能
        self.root.configure(bg="#ffffcc")  # 淡黄色背景

        # 默认路径
        self.default_path = "/home/huanghe/Desktop/"
        self.selected_path = self.default_path
        self.selected_images = []
        self.save_mode = tk.StringVar(value="save_to_folder")  # 默认另存到rotation文件夹
        self.rotation_mode = tk.StringVar(value="ocr")  # 默认OCR旋转
        self.save_original = tk.BooleanVar(value=True)  # 修改：保存原图选项，默认选中
        self.whiten_option = tk.BooleanVar(value=False)  # 刷白选项，默认不选中
        self.remove_shadow_option = tk.BooleanVar(value=True)  # 除阴影选项，默认选中
        self.micro_rotation_center = tk.BooleanVar(value=False)  # 中心微旋转
        self.micro_rotation_near = tk.BooleanVar(value=False)  # 就近微旋转
        self.gray_threshold = tk.IntVar(value=200)  # 灰色阈值，默认200
        self.running = False
        self.unrecognized_files = []  # 无法识别的文件列表
        self.processed_count = 0  # 已处理文件数
        self.ocr_available = self.check_ocr_availability()  # 检查OCR是否可用
        self.total_images = 0  # 总图片数

        # 创建界面组件
        self.create_widgets()

    def check_ocr_availability(self):
        """检查OCR是否可用"""
        try:
            # 尝试执行一个简单的OCR操作
            pytesseract.get_tesseract_version()
            return True
        except Exception as e:
            print(f"OCR不可用: {e}")
            return False

    def create_widgets(self):
        # 主框架
        main_frame = tk.Frame(self.root, bg="#ffffcc")
        main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

        # 路径选择
        path_label = tk.Label(main_frame, text="图片路径:", bg="#ffffcc", font=("fangsong ti", 12))
        path_label.grid(row=0, column=0, sticky="w", pady=5)

        path_frame = tk.Frame(main_frame, bg="#ffffcc")
        path_frame.grid(row=1, column=0, columnspan=2, sticky="ew", pady=5)
        main_frame.columnconfigure(1, weight=1)

        self.path_entry = tk.Entry(path_frame, font=("fangsong ti", 12))
        self.path_entry.insert(0, self.default_path)
        self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)

        browse_button = tk.Button(path_frame, text="浏览", font=("fangsong ti", 12),
                                  command=self.browse_path)
        browse_button.pack(side=tk.RIGHT, padx=(5, 0))

        # 打开文件夹按钮
        open_folder_button = tk.Button(main_frame, text="扫描文件夹", font=("fangsong ti", 12),
                                       command=self.open_folder)
        open_folder_button.grid(row=2, column=0, columnspan=2, pady=5)

        # 图片选择按钮
        select_button = tk.Button(main_frame, text="选择图片", bg="#90ee90", font=("fangsong ti", 12),
                                  command=self.select_images)
        select_button.grid(row=3, column=0, columnspan=2, pady=5)

        # 保存模式选择
        mode_label = tk.Label(main_frame, text="保存模式:", bg="#ffffcc", font=("fangsong ti", 12))
        mode_label.grid(row=4, column=0, sticky="w", pady=5)

        mode_frame = tk.Frame(main_frame, bg="#ffffcc")
        mode_frame.grid(row=5, column=0, columnspan=2, sticky="w", pady=5)

        save_radio = tk.Radiobutton(mode_frame, text="另存到rotation文件夹",
                                    variable=self.save_mode, value="save_to_folder",
                                    bg="#ffffcc", font=("fangsong ti", 12))
        save_radio.pack(anchor="w")

        overwrite_radio = tk.Radiobutton(mode_frame, text="覆盖原文件",
                                         variable=self.save_mode, value="overwrite",
                                         bg="#ffffcc", font=("fangsong ti", 12))
        overwrite_radio.pack(anchor="w")

        suffix_radio = tk.Radiobutton(mode_frame, text="加扩展名另存",
                                      variable=self.save_mode, value="add_suffix",
                                      bg="#ffffcc", font=("fangsong ti", 12))
        suffix_radio.pack(anchor="w")

        # 修改：保存原图选项，默认选中
        self.save_original_check = tk.Checkbutton(mode_frame, text="未识别图单独另存",
                                                  variable=self.save_original,
                                                  bg="#ffffcc", font=("fangsong ti", 12))
        self.save_original_check.pack(anchor="w")
        self.save_original_check.select()  # 默认选中

        # 刷白选项
        self.whiten_check = tk.Checkbutton(mode_frame, text="刷白优化(文字图片)",
                                           variable=self.whiten_option,
                                           bg="#ffffcc", font=("fangsong ti", 12))
        self.whiten_check.pack(anchor="w")

        # 除阴影选项
        self.remove_shadow_check = tk.Checkbutton(mode_frame, text="除阴影",
                                                  variable=self.remove_shadow_option,
                                                  bg="#ffffcc", font=("fangsong ti", 12))
        self.remove_shadow_check.pack(anchor="w")
        self.remove_shadow_check.select()  # 默认选中

        # 灰色阈值滑块
        gray_threshold_label = tk.Label(mode_frame, text="灰色阈值:", bg="#ffffcc", font=("fangsong ti", 12))
        gray_threshold_label.pack(anchor="w")

        self.gray_threshold_scale = tk.Scale(mode_frame, from_=150, to=250, orient=tk.HORIZONTAL,
                                             variable=self.gray_threshold, bg="#ffffcc",
                                             font=("fangsong ti", 10), length=200)
        self.gray_threshold_scale.pack(anchor="w")

        # 旋转模式选择
        rotation_label = tk.Label(main_frame, text="旋转模式:", bg="#ffffcc", font=("fangsong ti", 12))
        rotation_label.grid(row=6, column=0, sticky="w", pady=5)

        rotation_frame = tk.Frame(main_frame, bg="#ffffcc")
        rotation_frame.grid(row=7, column=0, columnspan=2, sticky="w", pady=5)

        # OCR旋转勾选框
        self.ocr_check = tk.Checkbutton(rotation_frame, text="OCR旋转",
                                        variable=self.rotation_mode, onvalue="ocr", offvalue="fixed",
                                        bg="#ffffcc", font=("fangsong ti", 12),
                                        command=self.on_rotation_mode_change)
        self.ocr_check.pack(anchor="w")

        # 根据OCR可用性设置默认选项
        if self.ocr_available:
            self.ocr_check.select()  # 默认选中
        else:
            self.ocr_check.config(state=tk.DISABLED)
            self.rotation_mode.set("fixed")
            # 显示OCR不可用提示
            ocr_warning = tk.Label(rotation_frame, text="OCR不可用，请安装tesseract-ocr",
                                   bg="#ffffcc", fg="red", font=("fangsong ti", 10))
            ocr_warning.pack(anchor="w")

        # 微旋转选项
        micro_rotation_label = tk.Label(rotation_frame, text="微旋转模式:", bg="#ffffcc", font=("fangsong ti", 12))
        micro_rotation_label.pack(anchor="w", pady=(5, 0))

        micro_frame = tk.Frame(rotation_frame, bg="#ffffcc")
        micro_frame.pack(anchor="w")

        self.center_rotation_check = tk.Checkbutton(micro_frame, text="中心微旋转",
                                                    variable=self.micro_rotation_center,
                                                    bg="#ffffcc", font=("fangsong ti", 12),
                                                    command=self.on_micro_rotation_change)
        self.center_rotation_check.pack(side=tk.LEFT)

        self.near_rotation_check = tk.Checkbutton(micro_frame, text="就近微旋转",
                                                  variable=self.micro_rotation_near,
                                                  bg="#ffffcc", font=("fangsong ti", 12),
                                                  command=self.on_micro_rotation_change)
        self.near_rotation_check.pack(side=tk.LEFT, padx=(10, 0))

        # 固定角度勾选框
        angle_frame = tk.Frame(rotation_frame, bg="#ffffcc")
        angle_frame.pack(anchor="w")

        # 为每个角度创建独立的变量
        self.angle90_var = tk.BooleanVar()
        self.angle180_var = tk.BooleanVar()
        self.angle270_var = tk.BooleanVar()

        self.angle90_check = tk.Checkbutton(angle_frame, text="90度",
                                            variable=self.angle90_var,
                                            bg="#ffffcc", font=("fangsong ti", 12),
                                            command=self.on_angle_change)
        self.angle90_check.pack(side=tk.LEFT)

        self.angle180_check = tk.Checkbutton(angle_frame, text="180度",
                                             variable=self.angle180_var,
                                             bg="#ffffcc", font=("fangsong ti", 12),
                                             command=self.on_angle_change)
        self.angle180_check.pack(side=tk.LEFT, padx=(10, 0))

        self.angle270_check = tk.Checkbutton(angle_frame, text="270度",
                                             variable=self.angle270_var,
                                             bg="#ffffcc", font=("fangsong ti", 12),
                                             command=self.on_angle_change)
        self.angle270_check.pack(side=tk.LEFT, padx=(10, 0))

        # 控制按钮框架
        button_frame = tk.Frame(main_frame, bg="#ffffcc")
        button_frame.grid(row=8, column=0, columnspan=2, pady=10)

        # 一键处理按钮 (糖果绿色)
        process_button = tk.Button(button_frame, text="一键处理", bg="#90ee90",
                                   font=("fangsong ti", 12), command=self.start_processing)
        process_button.pack(side=tk.LEFT, padx=10)

        # 停止按钮
        stop_button = tk.Button(button_frame, text="停止", font=("fangsong ti", 12),
                                command=self.stop_processing)
        stop_button.pack(side=tk.LEFT, padx=10)

        # 处理数与总数显示
        self.count_label = tk.Label(main_frame, text="处理进度: 0 / 0", bg="#ffffcc", font=("fangsong ti", 12))
        self.count_label.grid(row=9, column=0, columnspan=2, pady=5)

        # 进度条
        self.progress_var = tk.DoubleVar()
        self.progress_bar = ttk.Progressbar(main_frame, variable=self.progress_var, maximum=100)
        self.progress_bar.grid(row=10, column=0, columnspan=2, pady=5, sticky="ew")

        # 无法识别文件显示框
        unrecognized_frame = tk.Frame(main_frame, bg="#ffffcc")
        unrecognized_frame.grid(row=11, column=0, columnspan=2, pady=5, sticky="ew")

        unrecognized_label = tk.Label(unrecognized_frame, text="无法识别:", bg="#ffffcc",
                                      font=("fangsong ti", 12))
        unrecognized_label.pack(anchor="w")

        self.unrecognized_text = tk.Text(unrecognized_frame, height=6, font=("fangsong ti", 10))
        self.unrecognized_text.pack(fill=tk.X, expand=True)

        # 处理结果标签
        self.result_label = tk.Label(main_frame, text="", bg="#ffffcc", font=("fangsong ti", 12))
        self.result_label.grid(row=12, column=0, columnspan=2, pady=5)

    def browse_path(self):
        """浏览选择路径"""
        path = filedialog.askdirectory(initialdir=self.selected_path)
        if path:
            self.selected_path = path
            self.path_entry.delete(0, tk.END)
            self.path_entry.insert(0, path)

    def open_folder(self):
        """打开文件夹并自动识别图片"""
        if not os.path.exists(self.selected_path):
            messagebox.showerror("错误", "指定的路径不存在")
            return

        self.selected_images = []
        image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
        for file in os.listdir(self.selected_path):
            if file.lower().endswith(image_extensions):
                self.selected_images.append(os.path.join(self.selected_path, file))

        messagebox.showinfo("提示", f"自动找到 {len(self.selected_images)} 张图片")

    def select_images(self):
        """手动选择图片"""
        files = filedialog.askopenfilenames(
            initialdir=self.selected_path,
            filetypes=[("Image files", "*.jpg *.jpeg *.png *.bmp *.tiff")]
        )
        if files:
            self.selected_images = list(files)
            # 修复：选择图片后更新路径
            if files:
                # 使用第一个选择的文件的目录作为路径
                first_file_dir = os.path.dirname(files[0])
                if first_file_dir:
                    self.selected_path = first_file_dir
                    self.path_entry.delete(0, tk.END)
                    self.path_entry.insert(0, self.selected_path)
            messagebox.showinfo("提示", f"已选择 {len(self.selected_images)} 张图片")

    def on_rotation_mode_change(self):
        """当旋转模式改变时调用"""
        pass  # 可以在这里添加逻辑

    def on_angle_change(self):
        """当角度勾选框改变时调用"""
        # 如果勾选了任何角度，则自动取消OCR旋转
        if (self.angle90_var.get() or
                self.angle180_var.get() or
                self.angle270_var.get()):
            self.rotation_mode.set("fixed")
        # 如果没有勾选任何角度，则自动选中OCR旋转（如果OCR可用）
        elif (not self.angle90_var.get() and
              not self.angle180_var.get() and
              not self.angle270_var.get()):
            if self.ocr_available:
                self.rotation_mode.set("ocr")

    def on_micro_rotation_change(self):
        """当微旋转选项改变时调用"""
        # 确保两个微旋转选项互斥
        if self.micro_rotation_center.get():
            self.micro_rotation_near.set(False)
        elif self.micro_rotation_near.get():
            self.micro_rotation_center.set(False)

    def start_processing(self):
        """开始处理图片"""
        if not self.running:
            self.running = True
            self.unrecognized_files = []
            self.processed_count = 0
            self.total_images = len(self.selected_images)
            self.unrecognized_text.delete(1.0, tk.END)
            self.result_label.config(text="")
            self.count_label.config(text=f"处理进度: {self.processed_count} / {self.total_images}")
            self.progress_var.set(0)
            threading.Thread(target=self.process_images, daemon=True).start()

    def stop_processing(self):
        """停止处理"""
        self.running = False

    def process_images(self):
        """处理图片的线程函数 - 并行处理版本"""
        try:
            # 如果没有图片，则自动查找
            if not self.selected_images:
                self.root.after(0, lambda: messagebox.showwarning("警告", "未找到任何图片文件"))
                self.running = False
                return

            # 根据保存模式处理图片
            mode = self.save_mode.get()
            rotation_mode = self.rotation_mode.get()

            # 使用线程池并行处理图片
            max_workers = min(4, len(self.selected_images))  # 最多4个线程，或图片数量
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                # 提交所有任务
                future_to_image = {
                    executor.submit(self.process_single_image, image_path, mode, rotation_mode): image_path
                    for image_path in self.selected_images
                }

                # 处理完成的任务
                for future in as_completed(future_to_image):
                    if not self.running:
                        break

                    try:
                        result = future.result()
                        if result:
                            self.processed_count += 1
                            # 更新进度
                            progress = (self.processed_count / self.total_images) * 100
                            self.root.after(0, lambda p=progress: self.progress_var.set(p))
                            # 更新处理数显示
                            self.root.after(0, lambda: self.count_label.config(
                                text=f"处理进度: {self.processed_count} / {self.total_images}"))
                    except Exception as e:
                        print(f"处理图片时出错: {e}")

            # 显示无法识别的文件
            if self.unrecognized_files:
                self.root.after(0, self.show_unrecognized_files)

            if self.running:
                self.root.after(0, lambda: self.result_label.config(text=f"已保存 {self.processed_count} 个文件"))
                # 增加处理完毕后的弹窗提示
                self.root.after(0, lambda: messagebox.showinfo("处理完成",
                                                               f"图片处理完成！\n已保存 {self.processed_count} 个文件"))
            else:
                self.root.after(0, lambda: messagebox.showinfo("提示", "操作已停止"))

        except Exception as e:
            self.root.after(0, lambda: messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}"))
        finally:
            self.running = False
            self.selected_images = []
            self.root.after(0, lambda: self.progress_var.set(0))

    def process_single_image(self, image_path, mode, rotation_mode):
        """处理单张图片"""
        try:
            self.rotate_image(image_path, mode, rotation_mode)
            return True
        except Exception as e:
            print(f"处理图片 {image_path} 时出错: {str(e)}")
            return False

    def detect_text_regions(self, img_array):
        """检测文字区域，避免在这些区域去除斜线"""
        try:
            height, width = img_array.shape

            # 创建文字区域掩码
            text_mask = np.zeros_like(img_array, dtype=bool)

            # 简单的文字检测：查找较暗且连续的区域
            for y in range(1, height - 1):
                for x in range(1, width - 1):
                    # 如果是较暗的像素
                    if img_array[y, x] < 80:
                        # 检查周围是否有相邻的暗像素
                        dark_neighbors = 0
                        for dy in range(-1, 2):
                            for dx in range(-1, 2):
                                if img_array[y + dy, x + dx] < 100:
                                    dark_neighbors += 1

                        # 如果有足够的暗邻居，标记为文字区域
                        if dark_neighbors >= 3:
                            # 标记一个稍大的区域作为文字区域
                            for dy in range(-3, 4):
                                for dx in range(-3, 4):
                                    ny, nx = y + dy, x + dx
                                    if 0 <= ny < height and 0 <= nx < width:
                                        text_mask[ny, nx] = True

            return text_mask
        except Exception as e:
            print(f"文字区域检测出错: {e}")
            return np.zeros_like(img_array, dtype=bool)

    def line_fit(self, points):
        """简单线性拟合，替代sklearn的RANSAC"""
        if len(points) < 2:
            return None, None

        # 转换点为numpy数组
        points = np.array(points)
        x = points[:, 0]
        y = points[:, 1]

        # 计算线性回归
        A = np.vstack([x, np.ones(len(x))]).T
        try:
            m, c = np.linalg.lstsq(A, y, rcond=None)[0]
            return m, c  # 斜率和截距
        except:
            return None, None

    def calculate_distance_to_line(self, point, slope, intercept):
        """计算点到直线的距离"""
        x, y = point
        # 直线方程: y = slope * x + intercept => slope * x - y + intercept = 0
        # 点到直线距离公式: |ax + by + c| / sqrt(a^2 + b^2)
        # 其中 a = slope, b = -1, c = intercept
        if slope is not None:
            distance = abs(slope * x - y + intercept) / np.sqrt(slope ** 2 + 1)
            return distance
        return float('inf')

    def detect_scan_lines_advanced(self, img_array, text_mask):
        """使用直线拟合检测扫描斜线（不依赖sklearn）"""
        try:
            height, width = img_array.shape

            # 定义边缘区域（2mm约为图像宽度/高度的1/50）
            edge_margin_x = max(1, width // 50)
            edge_margin_y = max(1, height // 50)

            # 收集边缘区域的灰色像素点
            edge_points = []
            for y in range(height):
                for x in range(width):
                    # 检查是否在边缘区域且为灰色像素（避免黑色文字）
                    if ((x < edge_margin_x or x > width - edge_margin_x or
                         y < edge_margin_y or y > height - edge_margin_y) and
                            img_array[y, x] > 80 and img_array[y, x] < 180 and  # 灰色范围
                            not text_mask[y, x]):  # 不在文字区域
                        edge_points.append([x, y])

            if len(edge_points) < 20:
                return np.zeros_like(img_array, dtype=bool)

            # 简单线性拟合
            slope, intercept = self.line_fit(edge_points)

            if slope is None or intercept is None:
                # 拟合失败，使用备用方法
                return self.detect_scan_lines_backup(img_array, text_mask, edge_margin_x, edge_margin_y)

            # 检查斜率是否符合扫描线特征（不是接近水平或垂直）
            if abs(slope) < 0.1 or abs(slope) > 10:
                # 斜率不符合要求，使用备用方法
                return self.detect_scan_lines_backup(img_array, text_mask, edge_margin_x, edge_margin_y)

            # 创建扫描线掩码
            scan_line_mask = np.zeros_like(img_array, dtype=bool)

            # 标记直线附近的点为扫描线（基于距离阈值）
            distance_threshold = 10  # 10像素范围内的点认为属于同一条线
            for point in edge_points:
                x, y = point
                distance = self.calculate_distance_to_line(point, slope, intercept)
                if distance <= distance_threshold:
                    # 检查该点是否为灰色且不在文字区域
                    if (img_array[y, x] > 80 and img_array[y, x] < 180 and
                            not text_mask[y, x]):
                        # 标记该点及其周围区域
                        for dy in range(-5, 6):
                            for dx in range(-5, 6):
                                ny, nx = y + dy, x + dx
                                if 0 <= ny < height and 0 <= nx < width:
                                    scan_line_mask[ny, nx] = True

            return scan_line_mask

        except Exception as e:
            print(f"高级扫描线检测出错: {e}")
            # 出错时使用备用方法
            try:
                height, width = img_array.shape
                edge_margin_x = max(1, width // 50)
                edge_margin_y = max(1, height // 50)
                return self.detect_scan_lines_backup(img_array, text_mask, edge_margin_x, edge_margin_y)
            except:
                return np.zeros_like(img_array, dtype=bool)

    def detect_scan_lines_backup(self, img_array, text_mask, edge_margin_x, edge_margin_y):
        """备用的扫描线检测方法"""
        try:
            height, width = img_array.shape

            # 检测密集的不连贯点
            scan_line_mask = np.zeros_like(img_array, dtype=bool)

            # 统计每行每列的灰色点数量
            row_counts = np.zeros(height)
            col_counts = np.zeros(width)

            for y in range(height):
                for x in range(width):
                    if (img_array[y, x] > 80 and img_array[y, x] < 180 and
                            not text_mask[y, x]):
                        row_counts[y] += 1
                        col_counts[x] += 1

            # 查找可能的扫描线行/列
            avg_row_count = np.mean(row_counts) if len(row_counts) > 0 else 0
            avg_col_count = np.mean(col_counts) if len(col_counts) > 0 else 0

            # 标记可能是扫描线的区域
            for y in range(height):
                if row_counts[y] > avg_row_count * 1.5:  # 高于平均值的行
                    for x in range(width):
                        if (img_array[y, x] > 80 and img_array[y, x] < 180 and
                                not text_mask[y, x]):
                            scan_line_mask[y, x] = True

            for x in range(width):
                if col_counts[x] > avg_col_count * 1.5:  # 高于平均值的列
                    for y in range(height):
                        if (img_array[y, x] > 80 and img_array[y, x] < 180 and
                                not text_mask[y, x]):
                            scan_line_mask[y, x] = True

            return scan_line_mask

        except Exception as e:
            print(f"备用扫描线检测出错: {e}")
            return np.zeros_like(img_array, dtype=bool)

    def remove_scan_lines(self, img_array, scan_line_mask, text_mask):
        """去除检测到的扫描线"""
        try:
            result_array = img_array.copy()
            height, width = img_array.shape

            # 对于每个扫描线点，用周围非扫描线区域的值进行插值
            for y in range(height):
                for x in range(width):
                    if scan_line_mask[y, x]:
                        # 收集周围非扫描线区域的像素值
                        surrounding_values = []
                        for dy in range(-10, 11):
                            for dx in range(-10, 11):
                                ny, nx = y + dy, x + dx
                                if (0 <= ny < height and 0 <= nx < width and
                                        not scan_line_mask[ny, nx] and
                                        not text_mask[ny, nx]):
                                    surrounding_values.append(img_array[ny, nx])

                        # 如果找到足够的周围像素值，用中位数替换
                        if len(surrounding_values) > 5:
                            result_array[y, x] = np.median(surrounding_values)
                        else:
                            # 否则用更大范围的值
                            extended_values = []
                            for dy in range(-20, 21):
                                for dx in range(-20, 21):
                                    ny, nx = y + dy, x + dx
                                    if (0 <= ny < height and 0 <= nx < width and
                                            not scan_line_mask[ny, nx] and
                                            not text_mask[ny, nx]):
                                        extended_values.append(img_array[ny, nx])

                            if len(extended_values) > 5:
                                result_array[y, x] = np.median(extended_values)

            return result_array
        except Exception as e:
            print(f"扫描线去除出错: {e}")
            return img_array

    def detect_and_remove_scan_lines(self, image):
        """检测并去除扫描时产生的斜线"""
        try:
            # 保存原始图像模式
            original_mode = image.mode

            # 转换为灰度图像进行处理
            if image.mode != 'L':
                gray_image = image.convert('L')
            else:
                gray_image = image.copy()

            # 转换为numpy数组
            img_array = np.array(gray_image)
            height, width = img_array.shape

            # 检测文字区域，避免在这些区域操作
            text_mask = self.detect_text_regions(img_array)

            # 检测扫描线
            scan_line_mask = self.detect_scan_lines_advanced(img_array, text_mask)

            # 去除扫描线
            result_array = self.remove_scan_lines(img_array, scan_line_mask, text_mask)

            # 转换回PIL图像（修复警告：移除已弃用的mode参数）
            result_image = Image.fromarray(result_array.astype(np.uint8))

            # 如果原图是RGB模式，转换回RGB
            if original_mode == 'RGB':
                result_image = result_image.convert('RGB')
            elif original_mode != 'L':
                result_image = result_image.convert(original_mode)

            return result_image

        except Exception as e:
            print(f"斜线检测和去除出错: {e}")
            return image  # 出错时返回原图

    def remove_shadow_areas(self, image):
        """去除阴影区域"""
        try:
            # 保存原始图像模式
            original_mode = image.mode

            # 转换为RGB模式进行处理
            if image.mode != 'RGB':
                rgb_image = image.convert('RGB')
            else:
                rgb_image = image.copy()

            # 转换为numpy数组
            img_array = np.array(rgb_image)
            height, width, channels = img_array.shape

            # 计算平均字符大小（假设页面上有约30行，每行30个字符）
            avg_char_width = width // 30
            avg_char_height = height // 30
            min_shadow_area = avg_char_width * avg_char_height

            # 转换为灰度图进行阴影检测
            gray_img = np.dot(img_array[..., :3], [0.2989, 0.5870, 0.1140])

            # 创建阴影掩码
            shadow_mask = np.zeros((height, width), dtype=bool)

            # 检测灰色区域
            gray_threshold_val = self.gray_threshold.get()
            gray_areas = (gray_img > 100) & (gray_img < gray_threshold_val)

            # 标记连通的灰色区域
            labeled_array, num_features = self.label_connected_components(gray_areas.astype(int))

            # 检查每个连通区域
            for label in range(1, num_features + 1):
                # 获取当前区域的坐标
                region_coords = np.where(labeled_array == label)
                area_size = len(region_coords[0])

                # 如果区域足够大且不包含黑色像素，则认为是阴影
                if area_size > min_shadow_area:
                    # 检查区域内是否包含黑色像素
                    has_black = False
                    for i in range(min(10, len(region_coords[0]))):  # 采样检查
                        y, x = region_coords[0][i], region_coords[1][i]
                        brightness = gray_img[y, x]
                        if brightness < 80:  # 黑色阈值
                            has_black = True
                            break

                    # 如果没有黑色像素，则标记为阴影
                    if not has_black:
                        shadow_mask[region_coords] = True

            # 去除阴影：将阴影区域变为白色
            for y in range(height):
                for x in range(width):
                    if shadow_mask[y, x]:
                        img_array[y, x] = [255, 255, 255]  # 白色

            # 转换回PIL图像
            result_image = Image.fromarray(img_array.astype(np.uint8))

            # 转换回原始模式
            if result_image.mode != original_mode:
                result_image = result_image.convert(original_mode)

            return result_image

        except Exception as e:
            print(f"阴影去除出错: {e}")
            return image  # 出错时返回原图

    def label_connected_components(self, binary_array):
        """标记连通区域"""
        try:
            from scipy.ndimage import label
            structure = np.ones((3, 3), dtype=int)  # 8连通
            labeled_array, num_features = label(binary_array, structure)
            return labeled_array, num_features
        except ImportError:
            # 如果没有scipy，使用简单的4连通标记
            return self.simple_label_connected_components(binary_array)

    def simple_label_connected_components(self, binary_array):
        """简单的连通区域标记"""
        labeled = np.zeros_like(binary_array, dtype=int)
        label_count = 0
        height, width = binary_array.shape

        # 第一次扫描：标记初步的区域
        for y in range(height):
            for x in range(width):
                if binary_array[y, x] and labeled[y, x] == 0:
                    label_count += 1
                    self.flood_fill(labeled, binary_array, y, x, label_count)

        return labeled, label_count

    def flood_fill(self, labeled, binary_array, y, x, label_value):
        """洪水填充算法标记连通区域"""
        height, width = binary_array.shape
        stack = [(y, x)]

        while stack:
            cy, cx = stack.pop()
            if (0 <= cy < height and 0 <= cx < width and
                    binary_array[cy, cx] and labeled[cy, cx] == 0):
                labeled[cy, cx] = label_value
                # 添加4个邻居
                stack.append((cy - 1, cx))
                stack.append((cy + 1, cx))
                stack.append((cy, cx - 1))
                stack.append((cy, cx + 1))

    def whiten_image(self, image):
        """刷白优化图片 - 针对文字为主的图片，特别优化彩色和红色文字"""
        try:
            # 保存原始图像模式
            original_mode = image.mode
            original_image = image.copy()

            # 首先检测并去除扫描斜线
            image = self.detect_and_remove_scan_lines(image)

            # 如果启用了除阴影选项，去除阴影
            if self.remove_shadow_option.get():
                image = self.remove_shadow_areas(image)

            # 转换为RGB模式（如果需要）
            if image.mode != 'RGB':
                image = image.convert('RGB')

            # 获取图像数据
            pixels = image.load()
            width, height = image.size

            # 获取当前灰色阈值
            gray_threshold_val = self.gray_threshold.get()

            # 遍历每个像素进行处理
            for x in range(width):
                for y in range(height):
                    r, g, b = pixels[x, y]

                    # 1. 处理灰色背景 - 将接近白色的灰色转为纯白
                    # 计算亮度
                    brightness = (r + g + b) / 3

                    # 如果是浅灰色背景，转为白色
                    if brightness > gray_threshold_val and abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30:
                        pixels[x, y] = (255, 255, 255)  # 白色

                    # 2. 增强红色文字 - 将淡红色转为纯红
                    # 特别处理红色文字，保持其鲜艳度
                    elif r > 120 and g < 100 and b < 100 and (r - g) > 20 and (r - b) > 20:
                        # 保持红色，但增强饱和度
                        new_r = min(255, r + 30)  # 增加红色分量
                        new_g = max(0, g - 20)  # 减少绿色分量
                        new_b = max(0, b - 20)  # 减少蓝色分量
                        pixels[x, y] = (new_r, new_g, new_b)

                    # 3. 增强黑色文字 - 将深灰色转为纯黑
                    elif brightness < 80:
                        # 检查是否为深灰色或黑色
                        if abs(r - g) < 30 and abs(g - b) < 30 and abs(r - b) < 30:
                            pixels[x, y] = (0, 0, 0)  # 黑色

            # 应用轻微的锐化以增强文字边缘
            image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=50, threshold=3))

            # 应用中值滤波去除小噪点
            image = image.filter(ImageFilter.MedianFilter(size=3))

            # 确保始终返回与原始模式兼容的图像
            if original_mode != 'RGB':
                # 如果原始图像是灰度图，需要特殊处理
                if original_mode == 'L':
                    # 转换为灰度图
                    image = image.convert('L')
                else:
                    # 转换为原始模式
                    image = image.convert(original_mode)

            return image

        except Exception as e:
            print(f"刷白处理出错: {e}")
            return original_image  # 出错时返回原图

    def rotate_image(self, image_path, mode, rotation_mode):
        """旋转单张图片 - 优化版"""
        try:
            # 打开图片
            image = Image.open(image_path)

            # 获取原始图片格式和质量信息
            original_format = image.format
            print(f"原始图片格式: {original_format}")

            # 保存原始图像模式
            original_mode = image.mode
            original_image = image.copy()

            # 如果启用了刷白选项，进行刷白优化
            if self.whiten_option.get():
                image = self.whiten_image(image)

            # 如果是OCR模式且OCR可用
            if rotation_mode == "ocr" and self.ocr_available:
                try:
                    # 尝试OCR识别
                    rotation_angle = self.detect_text_orientation_advanced(image_path)

                    # 判断是否需要旋转（角度为0表示无需旋转）
                    if rotation_angle == 0:
                        # 无需旋转，但仍然保存处理后的图片（包括刷白优化）
                        rotated_image = image  # 使用已经刷白优化的图片
                        self.save_rotated_image(rotated_image, image_path, mode, original_format, original_mode)
                    else:
                        # 检查是否启用微旋转模式
                        if self.micro_rotation_center.get() or self.micro_rotation_near.get():
                            # 微旋转模式
                            rotated_image = self.micro_rotate_image(original_image, rotation_angle)
                            # 如果启用了刷白选项，对旋转后的图像应用刷白优化
                            if self.whiten_option.get():
                                rotated_image = self.whiten_image(rotated_image)
                        else:
                            # 普通旋转模式
                            rotated_image = original_image.rotate(rotation_angle, expand=True,
                                                                  resample=Image.Resampling.LANCZOS)
                            # 如果启用了刷白选项，对旋转后的图像应用刷白优化
                            if self.whiten_option.get():
                                rotated_image = self.whiten_image(rotated_image)

                        self.save_rotated_image(rotated_image, image_path, mode, original_format, original_mode)

                except Exception as e:
                    # OCR识别失败，但仍然保存处理后的图片（包括刷白优化）
                    print(f"OCR识别失败 {image_path}: {str(e)}")

                    # 仍然保存刷白优化后的图片
                    rotated_image = image  # 使用已经刷白优化的图片
                    self.save_rotated_image(rotated_image, image_path, mode, original_format, original_mode)

                    # 添加到无法识别列表
                    self.unrecognized_files.append(os.path.basename(image_path))
            else:
                # 固定角度旋转模式
                rotation_angle = 0
                if self.angle90_var.get():
                    rotation_angle -= 90
                if self.angle180_var.get():
                    rotation_angle += 180
                if self.angle270_var.get():
                    rotation_angle += 90
                # 规范化角度到-360到360之间
                rotation_angle = rotation_angle % 360
                if rotation_angle > 180:
                    rotation_angle -= 360
                elif rotation_angle < -180:
                    rotation_angle += 360

                # 旋转图片（仅当角度不为0时）
                if rotation_angle != 0:
                    # 检查是否启用微旋转模式
                    if self.micro_rotation_center.get() or self.micro_rotation_near.get():
                        # 微旋转模式
                        rotated_image = self.micro_rotate_image(original_image, rotation_angle)
                        # 如果启用了刷白选项，对旋转后的图像应用刷白优化
                        if self.whiten_option.get():
                            rotated_image = self.whiten_image(rotated_image)
                    else:
                        # 普通旋转模式
                        rotated_image = original_image.rotate(rotation_angle, expand=True,
                                                              resample=Image.Resampling.LANCZOS)
                        # 如果启用了刷白选项，对旋转后的图像应用刷白优化
                        if self.whiten_option.get():
                            rotated_image = self.whiten_image(rotated_image)
                else:
                    rotated_image = original_image
                    # 如果启用了刷白选项，对图像应用刷白优化
                    if self.whiten_option.get():
                        rotated_image = self.whiten_image(rotated_image)

                # 保存图片（包括刷白优化）
                self.save_rotated_image(rotated_image, image_path, mode, original_format, original_mode)

        except Exception as e:
            print(f"处理图片 {image_path} 时出错: {str(e)}")

    def micro_rotate_image(self, image, angle):
        """微旋转图像"""
        width, height = image.size

        if self.micro_rotation_center.get():
            # 中心微旋转：以图片几何中心为圆心旋转
            center_x = width / 2
            center_y = height / 2
            rotated_image = image.rotate(angle, center=(center_x, center_y), resample=Image.Resampling.BICUBIC)
        elif self.micro_rotation_near.get():
            # 就近微旋转：以图片总宽度中心为宽度0，需要OCR识别文字位置
            # 这里简化处理，使用图片中心作为旋转点
            center_x = width / 2
            center_y = height / 2
            rotated_image = image.rotate(angle, center=(center_x, center_y), resample=Image.Resampling.BICUBIC)
        else:
            # 默认旋转
            rotated_image = image.rotate(angle, expand=True, resample=Image.Resampling.LANCZOS)

        return rotated_image

    def save_original_image(self, image_path):
        """保存原图到rotation文件夹下的original子文件夹"""
        try:
            # 创建original子文件夹
            original_folder = os.path.join(self.selected_path, "rotation", "未识别")
            if not os.path.exists(original_folder):
                os.makedirs(original_folder)

            # 构造保存路径
            save_path = os.path.join(original_folder, os.path.basename(image_path))

            # 如果原图已存在，则添加序号
            if os.path.exists(save_path):
                name, ext = os.path.splitext(os.path.basename(image_path))
                counter = 1
                while os.path.exists(os.path.join(original_folder, f"{name}_{counter}{ext}")):
                    counter += 1
                save_path = os.path.join(original_folder, f"{name}_{counter}{ext}")

            # 复制原图到目标位置
            with Image.open(image_path) as img:
                img.save(save_path)

        except Exception as e:
            print(f"保存原图 {image_path} 时出错: {str(e)}")

    def save_rotated_image(self, rotated_image, original_path, mode, original_format, original_mode):
        """保存旋转后的图片"""
        if mode == "save_to_folder":
            # 另存到rotation文件夹
            rotation_folder = os.path.join(self.selected_path, "rotation")
            if not os.path.exists(rotation_folder):
                os.makedirs(rotation_folder)
            save_path = os.path.join(rotation_folder, os.path.basename(original_path))
            self.save_image_with_quality(rotated_image, save_path, original_format, original_path, original_mode)

        elif mode == "overwrite":
            # 覆盖原文件
            self.save_image_with_quality(rotated_image, original_path, original_format, original_path, original_mode)

        elif mode == "add_suffix":
            # 加扩展名另存
            name, ext = os.path.splitext(original_path)
            save_path = f"{name}_rotated{ext}"
            self.save_image_with_quality(rotated_image, save_path, original_format, original_path, original_mode)

    def save_image_with_quality(self, image, save_path, original_format, original_path, original_mode):
        """以实用方式保存图片，尽量维持原始大小"""
        try:
            # 获取原始文件大小
            original_size = os.path.getsize(original_path)

            # 打开原始图片获取信息
            with Image.open(original_path) as original_image:
                original_info = original_image.info
                original_mode_from_file = original_image.mode

            # 确保处理后的图片模式与原始图片一致
            # 但如果是灰度图转为彩色图，则保持彩色图
            if image.mode != original_mode_from_file:
                # 如果原图是灰度图，但现在处理成了彩色图，则保持彩色图
                if original_mode_from_file == 'L' and image.mode in ['RGB', 'RGBA']:
                    # 保持彩色图
                    pass
                else:
                    # 其他情况转换回原始模式
                    image = image.convert(original_mode_from_file)

            # 根据文件扩展名确定保存格式
            _, ext = os.path.splitext(original_path)
            ext = ext.lower()

            if ext in ['.jpg', '.jpeg']:
                # 根据原始文件大小选择质量
                if original_size > 5 * 1024 * 1024:  # >5MB
                    quality = 75
                elif original_size > 3 * 1024 * 1024:  # >3MB
                    quality = 80
                elif original_size > 1 * 1024 * 1024:  # >1MB
                    quality = 85
                else:
                    quality = 90

                save_params = {
                    'format': 'JPEG',
                    'quality': quality,
                    'optimize': True,
                    'subsampling': 1
                }

                # 保留重要元数据
                for key in ['icc_profile', 'exif']:
                    if key in original_info:
                        save_params[key] = original_info[key]

                image.save(save_path, **save_params)

            elif ext == '.png':
                image.save(save_path, 'PNG', optimize=True)
            else:
                image.save(save_path)

        except Exception as e:
            print(f"保存出错，使用默认方法: {e}")
            image.save(save_path)

    def detect_text_orientation_advanced(self, image_path):
        """文字方向检测 - 修正版"""
        try:
            # 使用image_to_osd进行方向检测
            osd = pytesseract.image_to_osd(image_path)
            print(f"OSD结果: {osd}")

            # 解析方向信息
            lines = osd.split('\n')
            degrees = 0

            for line in lines:
                if 'Rotate: ' in line:
                    degrees = int(line.split(': ')[1])
                    break

            print(f"OCR检测到的角度: {degrees}度")

            # 重要：Tesseract的Rotate值表示需要将图像旋转多少度才能使文字正向
            # 我们需要直接使用这个值，因为PIL的rotate函数会正确处理
            # 但需要确保在正确的方向上旋转
            return degrees

        except Exception as e:
            print(f"OCR检测主方法失败: {e}")
            # 备用方法
            try:
                osd = pytesseract.image_to_osd(image_path)
                lines = osd.split('\n')
                degrees = 0

                for line in lines:
                    if 'Rotate: ' in line:
                        degrees = int(line.split(': ')[1])
                        break

                print(f"备用方法检测到的角度: {degrees}度")
                return degrees

            except Exception as e2:
                # 如果OCR完全失败，添加到无法识别列表
                self.unrecognized_files.append(os.path.basename(image_path))
                print(f"OCR检测失败 {image_path}: {str(e2)}")
                raise e2  # 重新抛出异常

    def _get_orientation_with_confidence(self, image_path):
        """获取方向和置信度"""
        with Image.open(image_path) as image:
            temp_image = image.copy()
            temp_image.thumbnail((1000, 1000), Image.Resampling.LANCZOS)
            if temp_image.mode != 'L':
                temp_image = temp_image.convert('L')

        osd = pytesseract.image_to_osd(temp_image, config='--psm 0')
        lines = osd.split('\n')

        degrees = 0
        confidence = 0

        for line in lines:
            if 'Rotate: ' in line:
                degrees = int(line.split(': ')[1])
            elif 'Orientation confidence: ' in line:
                confidence = float(line.split(': ')[1])

        return degrees, confidence

    def _enhanced_orientation_detection(self, image_path):
        """增强方向检测"""
        with Image.open(image_path) as image:
            # 创建对比度增强版本
            from PIL import ImageEnhance
            enhanced_image = image.copy()
            enhanced_image.thumbnail((1000, 1000), Image.Resampling.LANCZOS)
            if enhanced_image.mode != 'L':
                enhanced_image = enhanced_image.convert('L')
            enhancer = ImageEnhance.Contrast(enhanced_image)
            enhanced_image = enhancer.enhance(1.3)

        try:
            osd = pytesseract.image_to_osd(enhanced_image, config='--psm 0')
            lines = osd.split('\n')

            degrees = 0
            confidence = 0

            for line in lines:
                if 'Rotate: ' in line:
                    degrees = int(line.split(': ')[1])
                elif 'Orientation confidence: ' in line:
                    confidence = float(line.split(': ')[1])

            return degrees, confidence
        except:
            return 0, 0

    def _verify_orientation_result(self, image_path, angle):
        """验证方向检测结果"""
        try:
            with Image.open(image_path) as image:
                # 模拟旋转后的效果
                test_image = image.copy()
                test_image = test_image.rotate(angle, expand=True, resample=Image.Resampling.LANCZOS)

                # 缩小图像进行快速验证
                test_image.thumbnail((500, 500), Image.Resampling.LANCZOS)
                if test_image.mode != 'L':
                    test_image = test_image.convert('L')

                # 快速检测旋转后图像的方向
                osd = pytesseract.image_to_osd(test_image, config='--psm 0')
                lines = osd.split('\n')

                for line in lines:
                    if 'Rotate: ' in line:
                        result_angle = int(line.split(': ')[1])
                        # 旋转后的图像应该接近0度
                        return result_angle == 0 or abs(result_angle) < 90

            return True  # 默认返回True避免误判
        except:
            return True  # 验证失败也返回True

    def show_unrecognized_files(self):
        """在界面显示无法识别的文件"""
        if self.unrecognized_files:
            self.unrecognized_text.delete(1.0, tk.END)
            for file in self.unrecognized_files:
                self.unrecognized_text.insert(tk.END, file + "\n")


if __name__ == "__main__":
    root = tk.Tk()
    app = ImageBatchRotationApp(root)
    root.mainloop()
2048 AI社区
有“AI”的1024 = 2048，欢迎大家加入2048 AI社区
更多推荐
Expect脚本实战：多条件匹配与防卡死技巧
在自动化测试和系统初始化中，我们常常使用 Expect 脚本来完成一系列交互操作，比如自动输入密码、监听输出并作出响应。然而，实际项目中可能遇到多步骤输出匹配的场景：只有在先匹配到某一行输出，再匹配到另一行输出时，才需要做出响应。同时，如果 “testX failed” 出现了，但 “set params error” 没有在合理时间内出现，不要卡死，而是打印警告并继续。这个写法的问题在于：如
2048 AI社区
AI模型版本控制的标签管理：架构师的技巧
在AI模型生命周期中，版本控制是保障模型可追溯性、协作效率与生产可靠性的核心环节。而标签管理作为版本控制的"语义接口"，其设计质量直接决定了团队对模型版本的理解、检索与复用能力。本文从架构师视角出发，结合第一性原理与MLOps实践，系统阐述AI模型标签管理的理论框架、架构设计、实现机制与高级考量。通过拆解标签的"唯一标识+语义描述"本质，提出四维标签模型。