Win10程序（二）OCR识别简单程序

rotation_label = tk.Label(main_frame, text="旋转模式:", bg="#ffffcc", font=("fangsong ti", 12))path_label = tk.Label(main_frame, text="图片路径:", bg="#ffffcc", font=("fangsong ti", 12))select_button = tk.But
hnxaoli

904人浏览 · 2025-08-30 10:15:26
hnxaoli · 2025-08-30 10:15:26 发布
'''
这是为了方便使用统信系统自制的小程序。
通过pytesseract模块识别图片中的几个字，来猜测图片的正确方向。然后旋转。
访问下边网址，先下载Tesseract-ocr。安装在默认路径，要改变地址，代码中的OS.位置要相应改变。
Index of /tesseract
这个图片批量旋转的python程序，支持png、jpg、bmp、png等格式，通过tktinter输入参数，界面尺寸300*500，底色淡黄色，一键处理的按键是糖果绿色，字体：fangsong ti，字号12号。
功能1：可打开文件夹自动识别所有图片、也可手动对图片批量选择后，自动逐个识别图片文件，可手动选择另存、覆盖、加扩展名3种选项，默认另存到rotation文件夹。可勾选旋转90、180、270度、OCR旋转，默认选择OCR旋转。
功能2：点击识别旋转后，如果选择了另存，则逐个对每个图片执行OCR，在识别到至少3个汉字或者英文、数值的方向后，将图片旋转到其向上的方向，如果有几个文件OCR识别不出则在界面显示，随后逐个按照其原名保存到旋转文件夹，遇到无需旋转的也把源文件复制进rotation文件夹。如果选择了覆盖则替换原文件，如果选择了加扩展名，则在原文件夹将源文件旋转后增加后缀另存，在界面中提示保存的文件数。
功能3：点击停止，则结束正在进行的操作。
'''
# -*- coding: utf-8 -*-
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
from PIL import Image
import pytesseract
import re
import threading
# 设置Tesseract数据路径（根据实际安装路径调整）
os.environ['TESSDATA_PREFIX'] = 'C:\\Program Files\\Tesseract-OCR\\tessdata\\'# OR '/usr/share/tesseract-ocr/4.00/tessdata/'

# 或者os.environ['TESSDATA_PREFIX'] = '/usr/share/tessdata/'

class ImageBatchRotationApp:
    def __init__(self, root):
        self.root = root
        self.root.title("图片批量ocr旋转")
        self.root.geometry("300x600")
        self.root.configure(bg="#ffffcc")  # 淡黄色背景

        # 设置默认打开路径
        self.set_default_path()
        #self.default_path = "/home/huanghe/Desktop/"
        self.selected_path = self.default_path
        self.selected_images = []
        self.save_mode = tk.StringVar(value="save_to_folder")  # 默认另存到rotation文件夹
        self.rotation_mode = tk.StringVar(value="ocr")  # 默认OCR旋转
        self.running = False
        self.unrecognized_files = []  # 无法识别的文件列表
        self.processed_count = 0  # 已处理文件数
        self.ocr_available = self.check_ocr_availability()  # 检查OCR是否可用

        # 创建界面组件
        self.create_widgets()
    def set_default_path(self):
        # 检查操作系统并设置默认路径
        if os.name == 'posix':  # Linux
            self.default_path = "/home/huanghe/Desktop/"
        else:  # Windows
            self.default_path = "C:\\Users\\1\\Desktop\\"

        # 如果路径不存在，使用当前目录
        if not os.path.exists(self.default_path):
            self.default_path = "./"
    def check_ocr_availability(self):
        """检查OCR是否可用"""
        try:
            # 尝试执行一个简单的OCR操作
            pytesseract.get_tesseract_version()
            return True
        except Exception as e:
            print(f"OCR不可用: {e}")
            return False

    def create_widgets(self):
        # 主框架
        main_frame = tk.Frame(self.root, bg="#ffffcc")
        main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

        # 路径选择
        path_label = tk.Label(main_frame, text="图片路径:", bg="#ffffcc", font=("fangsong ti", 12))
        path_label.grid(row=0, column=0, sticky="w", pady=5)

        path_frame = tk.Frame(main_frame, bg="#ffffcc")
        path_frame.grid(row=1, column=0, columnspan=2, sticky="ew", pady=5)
        main_frame.columnconfigure(1, weight=1)

        self.path_entry = tk.Entry(path_frame, font=("fangsong ti", 12))
        self.path_entry.insert(0, self.default_path)
        self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)

        browse_button = tk.Button(path_frame, text="浏览", font=("fangsong ti", 12),
                                  command=self.browse_path)
        browse_button.pack(side=tk.RIGHT, padx=(5, 0))

        # 打开文件夹按钮
        open_folder_button = tk.Button(main_frame, text="打开文件夹", font=("fangsong ti", 12),
                                       command=self.open_folder)
        open_folder_button.grid(row=2, column=0, columnspan=2, pady=5)

        # 图片选择按钮
        select_button = tk.Button(main_frame, text="选择图片", font=("fangsong ti", 12),
                                  command=self.select_images)
        select_button.grid(row=3, column=0, columnspan=2, pady=5)

        # 保存模式选择
        mode_label = tk.Label(main_frame, text="保存模式:", bg="#ffffcc", font=("fangsong ti", 12))
        mode_label.grid(row=4, column=0, sticky="w", pady=5)

        mode_frame = tk.Frame(main_frame, bg="#ffffcc")
        mode_frame.grid(row=5, column=0, columnspan=2, sticky="w", pady=5)

        save_radio = tk.Radiobutton(mode_frame, text="另存到rotation文件夹",
                                    variable=self.save_mode, value="save_to_folder",
                                    bg="#ffffcc", font=("fangsong ti", 12))
        save_radio.pack(anchor="w")

        overwrite_radio = tk.Radiobutton(mode_frame, text="覆盖原文件",
                                         variable=self.save_mode, value="overwrite",
                                         bg="#ffffcc", font=("fangsong ti", 12))
        overwrite_radio.pack(anchor="w")

        suffix_radio = tk.Radiobutton(mode_frame, text="加扩展名另存",
                                      variable=self.save_mode, value="add_suffix",
                                      bg="#ffffcc", font=("fangsong ti", 12))
        suffix_radio.pack(anchor="w")

        # 旋转模式选择
        rotation_label = tk.Label(main_frame, text="旋转模式:", bg="#ffffcc", font=("fangsong ti", 12))
        rotation_label.grid(row=6, column=0, sticky="w", pady=5)

        rotation_frame = tk.Frame(main_frame, bg="#ffffcc")
        rotation_frame.grid(row=7, column=0, columnspan=2, sticky="w", pady=5)

        # OCR旋转勾选框
        self.ocr_check = tk.Checkbutton(rotation_frame, text="OCR旋转",
                                        variable=self.rotation_mode, onvalue="ocr", offvalue="fixed",
                                        bg="#ffffcc", font=("fangsong ti", 12),
                                        command=self.on_rotation_mode_change)
        self.ocr_check.pack(anchor="w")

        # 根据OCR可用性设置默认选项
        if self.ocr_available:
            self.ocr_check.select()  # 默认选中
        else:
            self.ocr_check.config(state=tk.DISABLED)
            self.rotation_mode.set("fixed")
            # 显示OCR不可用提示
            ocr_warning = tk.Label(rotation_frame, text="OCR不可用，请安装tesseract-ocr",
                                   bg="#ffffcc", fg="red", font=("fangsong ti", 10))
            ocr_warning.pack(anchor="w")

        # 固定角度勾选框
        angle_frame = tk.Frame(rotation_frame, bg="#ffffcc")
        angle_frame.pack(anchor="w")

        # 为每个角度创建独立的变量
        self.angle90_var = tk.BooleanVar()
        self.angle180_var = tk.BooleanVar()
        self.angle270_var = tk.BooleanVar()

        self.angle90_check = tk.Checkbutton(angle_frame, text="90度",
                                            variable=self.angle90_var,
                                            bg="#ffffcc", font=("fangsong ti", 12),
                                            command=self.on_angle_change)
        self.angle90_check.pack(side=tk.LEFT)

        self.angle180_check = tk.Checkbutton(angle_frame, text="180度",
                                             variable=self.angle180_var,
                                             bg="#ffffcc", font=("fangsong ti", 12),
                                             command=self.on_angle_change)
        self.angle180_check.pack(side=tk.LEFT, padx=(10, 0))

        self.angle270_check = tk.Checkbutton(angle_frame, text="270度",
                                             variable=self.angle270_var,
                                             bg="#ffffcc", font=("fangsong ti", 12),
                                             command=self.on_angle_change)
        self.angle270_check.pack(side=tk.LEFT, padx=(10, 0))

        # 控制按钮框架
        button_frame = tk.Frame(main_frame, bg="#ffffcc")
        button_frame.grid(row=8, column=0, columnspan=2, pady=10)

        # 一键处理按钮 (糖果绿色)
        process_button = tk.Button(button_frame, text="一键处理", bg="#90ee90",
                                   font=("fangsong ti", 12), command=self.start_processing)
        process_button.pack(side=tk.LEFT, padx=10)

        # 停止按钮
        stop_button = tk.Button(button_frame, text="停止", font=("fangsong ti", 12),
                                command=self.stop_processing)
        stop_button.pack(side=tk.LEFT, padx=10)

        # 无法识别文件显示框
        unrecognized_frame = tk.Frame(main_frame, bg="#ffffcc")
        unrecognized_frame.grid(row=9, column=0, columnspan=2, pady=5, sticky="ew")

        unrecognized_label = tk.Label(unrecognized_frame, text="无法识别:", bg="#ffffcc",
                                      font=("fangsong ti", 12))
        unrecognized_label.pack(anchor="w")

        self.unrecognized_text = tk.Text(unrecognized_frame, height=6, font=("fangsong ti", 10))
        self.unrecognized_text.pack(fill=tk.X, expand=True)

        # 处理结果标签
        self.result_label = tk.Label(main_frame, text="", bg="#ffffcc", font=("fangsong ti", 12))
        self.result_label.grid(row=10, column=0, columnspan=2, pady=5)

    def browse_path(self):
        """浏览选择路径"""
        path = filedialog.askdirectory(initialdir=self.selected_path)
        if path:
            self.selected_path = path
            self.path_entry.delete(0, tk.END)
            self.path_entry.insert(0, path)

    def open_folder(self):
        """打开文件夹并自动识别图片"""
        if not os.path.exists(self.selected_path):
            messagebox.showerror("错误", "指定的路径不存在")
            return

        self.selected_images = []
        image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
        for file in os.listdir(self.selected_path):
            if file.lower().endswith(image_extensions):
                self.selected_images.append(os.path.join(self.selected_path, file))

        messagebox.showinfo("提示", f"自动找到 {len(self.selected_images)} 张图片")

    def select_images(self):
        """手动选择图片"""
        files = filedialog.askopenfilenames(
            initialdir=self.selected_path,
            filetypes=[("Image files", "*.jpg *.jpeg *.png *.bmp *.tiff")]
        )
        if files:
            self.selected_images = list(files)
            messagebox.showinfo("提示", f"已选择 {len(self.selected_images)} 张图片")

    def on_rotation_mode_change(self):
        """当旋转模式改变时调用"""
        pass  # 可以在这里添加逻辑

    def on_angle_change(self):
        """当角度勾选框改变时调用"""
        # 如果勾选了任何角度，则自动取消OCR旋转
        if (self.angle90_var.get() or
                self.angle180_var.get() or
                self.angle270_var.get()):
            self.rotation_mode.set("fixed")
        # 如果没有勾选任何角度，则自动选中OCR旋转（如果OCR可用）
        elif (not self.angle90_var.get() and
              not self.angle180_var.get() and
              not self.angle270_var.get()):
            if self.ocr_available:
                self.rotation_mode.set("ocr")

    def start_processing(self):
        """开始处理图片"""
        if not self.running:
            self.running = True
            self.unrecognized_files = []
            self.processed_count = 0
            self.unrecognized_text.delete(1.0, tk.END)
            self.result_label.config(text="")
            threading.Thread(target=self.process_images, daemon=True).start()

    def stop_processing(self):
        """停止处理"""
        self.running = False

    def process_images(self):
        """处理图片的线程函数"""
        try:
            # 如果没有图片，则自动查找
            if not self.selected_images:
                self.root.after(0, lambda: messagebox.showwarning("警告", "未找到任何图片文件"))
                self.running = False
                return

            # 根据保存模式处理图片
            mode = self.save_mode.get()
            rotation_mode = self.rotation_mode.get()

            for image_path in self.selected_images:
                if not self.running:
                    break

                self.rotate_image(image_path, mode, rotation_mode)
                self.processed_count += 1

            # 显示无法识别的文件
            if self.unrecognized_files:
                self.root.after(0, self.show_unrecognized_files)

            if self.running:
                self.root.after(0, lambda: self.result_label.config(text=f"已保存 {self.processed_count} 个文件"))
                self.root.after(0, lambda: messagebox.showinfo("完成", "图片处理完成"))
            else:
                self.root.after(0, lambda: messagebox.showinfo("提示", "操作已停止"))

        except Exception as e:
            self.root.after(0, lambda: messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}"))
        finally:
            self.running = False
            self.selected_images = []

    def rotate_image(self, image_path, mode, rotation_mode):
        """旋转单张图片 - 优化版"""
        try:
            # 打开图片
            image = Image.open(image_path)

            # 获取原始图片格式和质量信息
            original_format = image.format
            print(f"原始图片格式: {original_format}")

            # 根据旋转模式确定旋转角度
            if rotation_mode == "ocr" and self.ocr_available:
                rotation_angle = self.detect_text_orientation_advanced(image_path)
            else:
                # 固定角度旋转，根据勾选的角度计算
                rotation_angle = 0
                if self.angle90_var.get():
                    rotation_angle -= 90
                if self.angle180_var.get():
                    rotation_angle += 180
                if self.angle270_var.get():
                    rotation_angle += 90
                # 规范化角度到-360到360之间
                rotation_angle = rotation_angle % 360
                if rotation_angle > 180:
                    rotation_angle -= 360
                elif rotation_angle < -180:
                    rotation_angle += 360

            # 旋转图片
            if rotation_angle != 0:
                # 使用高质量的重采样算法
                rotated_image = image.rotate(rotation_angle, expand=True, resample=Image.Resampling.LANCZOS)
            else:
                rotated_image = image

            # 根据保存模式保存图片，保持原始质量
            if mode == "save_to_folder":
                # 另存到rotation文件夹
                rotation_folder = os.path.join(self.selected_path, "rotation")
                if not os.path.exists(rotation_folder):
                    os.makedirs(rotation_folder)
                save_path = os.path.join(rotation_folder, os.path.basename(image_path))
                self.save_image_with_quality(rotated_image, save_path, original_format, image_path)

            elif mode == "overwrite":
                # 覆盖原文件
                self.save_image_with_quality(rotated_image, image_path, original_format, image_path)

            elif mode == "add_suffix":
                # 加扩展名另存
                name, ext = os.path.splitext(image_path)
                save_path = f"{name}_rotated{ext}"
                self.save_image_with_quality(rotated_image, save_path, original_format, image_path)

        except Exception as e:
            print(f"处理图片 {image_path} 时出错: {str(e)}")

    def save_image_with_quality(self, image, save_path, original_format, original_path):
        """以实用方式保存图片，尽量维持原始大小"""
        try:
            # 获取原始文件大小
            original_size = os.path.getsize(original_path)

            # 打开原始图片获取信息
            with Image.open(original_path) as original_image:
                original_info = original_image.info
                original_mode = original_image.mode

            # 确保处理后的图片模式与原始图片一致
            if image.mode != original_mode:
                image = image.convert(original_mode)

            # 根据文件扩展名确定保存格式
            _, ext = os.path.splitext(original_path)
            ext = ext.lower()

            if ext in ['.jpg', '.jpeg']:
                # 根据原始文件大小选择质量
                if original_size > 5 * 1024 * 1024:  # >5MB
                    quality = 75
                elif original_size > 3 * 1024 * 1024:  # >3MB
                    quality = 80
                elif original_size > 1 * 1024 * 1024:  # >1MB
                    quality = 85
                else:
                    quality = 90

                save_params = {
                    'format': 'JPEG',
                    'quality': quality,
                    'optimize': True,
                    'subsampling': 1
                }

                # 保留重要元数据
                for key in ['icc_profile', 'exif']:
                    if key in original_info:
                        save_params[key] = original_info[key]

                image.save(save_path, **save_params)

            elif ext == '.png':
                image.save(save_path, 'PNG', optimize=True)
            else:
                image.save(save_path)

        except Exception as e:
            print(f"保存出错，使用默认方法: {e}")
            image.save(save_path)

    def detect_text_orientation_advanced(self, image_path):
        """文字方向检测 - 修正版"""
        try:
            # 使用image_to_osd进行方向检测
            osd = pytesseract.image_to_osd(image_path)
            print(f"OSD结果: {osd}")

            # 解析方向信息
            lines = osd.split('\n')
            degrees = 0

            for line in lines:
                if 'Rotate: ' in line:
                    degrees = int(line.split(': ')[1])
                    break

            print(f"OCR检测到的角度: {degrees}度")

            # 重要：Tesseract的Rotate值表示需要将图像旋转多少度才能使文字正向
            # 我们需要直接使用这个值，因为PIL的rotate函数会正确处理
            # 但需要确保在正确的方向上旋转
            return degrees

        except Exception as e:
            print(f"OCR检测主方法失败: {e}")
            # 备用方法
            try:
                osd = pytesseract.image_to_osd(image_path)
                lines = osd.split('\n')
                degrees = 0

                for line in lines:
                    if 'Rotate: ' in line:
                        degrees = int(line.split(': ')[1])
                        break

                print(f"备用方法检测到的角度: {degrees}度")
                return degrees

            except Exception as e2:
                # 如果OCR完全失败，添加到无法识别列表
                self.unrecognized_files.append(os.path.basename(image_path))
                print(f"OCR检测失败 {image_path}: {str(e2)}")
                return 0

    def _get_orientation_with_confidence(self, image_path):
        """获取方向和置信度"""
        with Image.open(image_path) as image:
            temp_image = image.copy()
            temp_image.thumbnail((1000, 1000), Image.Resampling.LANCZOS)
            if temp_image.mode != 'L':
                temp_image = temp_image.convert('L')

        osd = pytesseract.image_to_osd(temp_image, config='--psm 0')
        lines = osd.split('\n')

        degrees = 0
        confidence = 0

        for line in lines:
            if 'Rotate: ' in line:
                degrees = int(line.split(': ')[1])
            elif 'Orientation confidence: ' in line:
                confidence = float(line.split(': ')[1])

        return degrees, confidence

    def _enhanced_orientation_detection(self, image_path):
        """增强方向检测"""
        with Image.open(image_path) as image:
            # 创建对比度增强版本
            from PIL import ImageEnhance
            enhanced_image = image.copy()
            enhanced_image.thumbnail((1000, 1000), Image.Resampling.LANCZOS)
            if enhanced_image.mode != 'L':
                enhanced_image = enhanced_image.convert('L')
            enhancer = ImageEnhance.Contrast(enhanced_image)
            enhanced_image = enhancer.enhance(1.3)

        try:
            osd = pytesseract.image_to_osd(enhanced_image, config='--psm 0')
            lines = osd.split('\n')

            degrees = 0
            confidence = 0

            for line in lines:
                if 'Rotate: ' in line:
                    degrees = int(line.split(': ')[1])
                elif 'Orientation confidence: ' in line:
                    confidence = float(line.split(': ')[1])

            return degrees, confidence
        except:
            return 0, 0

    def _verify_orientation_result(self, image_path, angle):
        """验证方向检测结果"""
        try:
            with Image.open(image_path) as image:
                # 模拟旋转后的效果
                test_image = image.copy()
                test_image = test_image.rotate(angle, expand=True, resample=Image.Resampling.LANCZOS)

                # 缩小图像进行快速验证
                test_image.thumbnail((500, 500), Image.Resampling.LANCZOS)
                if test_image.mode != 'L':
                    test_image = test_image.convert('L')

                # 快速检测旋转后图像的方向
                osd = pytesseract.image_to_osd(test_image, config='--psm 0')
                lines = osd.split('\n')

                for line in lines:
                    if 'Rotate: ' in line:
                        result_angle = int(line.split(': ')[1])
                        # 旋转后的图像应该接近0度
                        return result_angle == 0 or abs(result_angle) < 90

            return True  # 默认返回True避免误判
        except:
            return True  # 验证失败也返回True

    def show_unrecognized_files(self):
        """在界面显示无法识别的文件"""
        if self.unrecognized_files:
            self.unrecognized_text.delete(1.0, tk.END)
            for file in self.unrecognized_files:
                self.unrecognized_text.insert(tk.END, file + "\n")


if __name__ == "__main__":
    root = tk.Tk()
    app = ImageBatchRotationApp(root)
    root.mainloop()