PDF文件批量删除空白页(带UI界面+成品)
一个是需要选中处理的PDF路径,一个是保存的位置
制作背景:由于有大量excel文件需要打印,合并了EXCEL再生成PDF查看格式是否发生变化,看到合并的pdf后一堆空白页,根本删不完,而且也在网上找方法大部分都是教怎么预览删除,于是就制作了批量删除空白页的小软件,刚好同事也需要,但是她没python环境,干脆用tkinter做了简单的UI,打包后文件有点大(约60M),这个也没优化了。。将就着能用!
import tkinter as tk from tkinter import filedialog, messagebox from tkinter.ttk import Progressbar import pdfplumber from PyPDF2 import PdfReader, PdfWriter import threading def is_blank_page(page): text = page.extract_text() if not text or text.isspace(): images = [im for im in page.images] if len(images) == 0: return True return False def remove_blank_pages(input_pdf_path, output_pdf_path, progress_var, total_pages): reader = PdfReader(input_pdf_path) writer = PdfWriter() with pdfplumber.open(input_pdf_path) as pdf: for i in range(len(reader.pages)): page = pdf.pages[i] if not is_blank_page(page): writer.add_page(reader.pages[i]) # 更新进度条 progress_var.set((i + 1) / total_pages * 100) root.update_idletasks() # 确保界面更新 with open(output_pdf_path, "wb") as output_pdf: writer.write(output_pdf) def select_input_file(): file_path = filedialog.askopenfilename(filetypes=[("PDF 文件", "*.pdf")]) if file_path: input_entry.delete(0, tk.END) input_entry.insert(0, file_path) def select_output_path(): file_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF 文件", "*.pdf")]) if file_path: output_entry.delete(0, tk.END) output_entry.insert(0, file_path) def process_pdf(): input_pdf_path = input_entry.get() output_pdf_path = output_entry.get() if not input_pdf_path or not output_pdf_path: messagebox.showerror("错误", "请选择输入和输出路径。") return try: reader = PdfReader(input_pdf_path) total_pages = len(reader.pages) progress_var.set(0) # 重置进度条 progress_bar['maximum'] = 100 progress_bar['value'] = 0 # 使用线程避免GUI冻结 thread = threading.Thread(target=lambda: remove_blank_pages(input_pdf_path, output_pdf_path, progress_var, total_pages)) thread.start() # 检查线程是否完成 def check_thread(): if thread.is_alive(): root.after(100, check_thread) # 继续检查 else: messagebox.showinfo("成功", "空白页移除成功!") root.after(100, check_thread) except Exception as e: messagebox.showerror("错误", f"发生了一个错误: {str(e)}") # 创建主窗口 root = tk.Tk() root.title("PDF 空白页移除工具") # 输入文件选择 input_label = tk.Label(root, text="选择要处理的 PDF 文件:") input_label.pack(pady=5) input_entry = tk.Entry(root, width=50) input_entry.pack(pady=5) input_button = tk.Button(root, text="浏览...", command=select_input_file) input_button.pack(pady=5) # 输出文件选择 output_label = tk.Label(root, text="选择保存位置:") output_label.pack(pady=5) output_entry = tk.Entry(root, width=50) output_entry.pack(pady=5) output_button = tk.Button(root, text="浏览...", command=select_output_path) output_button.pack(pady=5) # 添加进度条 progress_var = tk.DoubleVar() progress_bar = Progressbar(root, variable=progress_var, maximum=100) progress_bar.pack(pady=20, fill=tk.X) # 处理按钮 process_button = tk.Button(root, text="开始移除空白页", command=process_pdf) process_button.pack(pady=20) # 运行主循环 root.mainloop()