diff --git a/PDFWorkflowManager/PDFWorkflowManager/MainForm.cs b/PDFWorkflowManager/PDFWorkflowManager/MainForm.cs index f8104d6..89fc53d 100644 --- a/PDFWorkflowManager/PDFWorkflowManager/MainForm.cs +++ b/PDFWorkflowManager/PDFWorkflowManager/MainForm.cs @@ -1,9 +1,6 @@ -using BitMiracle.LibTiff.Classic; -using ImageMagick; +using ImageMagick; using Microsoft.WindowsAPICodePack.Dialogs; -using PdfSharp.Drawing; -using PdfSharp.Pdf; -using PdfSharp.Pdf.IO; +using PdfSharp.Fonts; using RCEU_PDFWorkflowManager; using System; using System.Collections.Generic; @@ -15,9 +12,7 @@ using System.Linq; using System.Reflection; using System.Security.Cryptography; using System.Threading; -using System.Threading.Tasks; using System.Windows.Forms; -using Tesseract; using ImageFormat = System.Drawing.Imaging.ImageFormat; @@ -175,6 +170,8 @@ namespace PDFWorkflowManager cmbBanner.SelectedIndex = Properties.Settings.Default.Banner; txtPDFAuthor.Text = Properties.Settings.Default.PDFMetaAuthor; radioSortNormal.Checked = true; + GlobalFontSettings.FontResolver = new SystemFontResolver(); + // cmbLanguage Populate #region cmbLanguage @@ -702,286 +699,48 @@ namespace PDFWorkflowManager return null; } - private async Task convertToPdf(string[] strFiles, string outputDir, string selectedLanguage) - { - string outputPdf = Path.Combine(outputDir, "FinalDocument.pdf"); - - // Count total pages for progress bar - int totalPages = 0; - foreach (var tiffFile in strFiles) - { - using (var image = BitMiracle.LibTiff.Classic.Tiff.Open(tiffFile, "r")) - { - if (image != null) - totalPages += image.NumberOfDirectories(); - } - } - - toolStripProgressBar1.Maximum = totalPages; - toolStripProgressBar1.Value = 0; - toolStripStatusLabel1.Text = "Converting files to PDF..."; - - var converter = new TiffToPdfConverter(); - await Task.Run(() => - { - converter.ConvertTiffToPdfA(strFiles, Path.Combine(outputDir, "FinalDocument.pdf"), selectedLanguage, () => - { - if (toolStripProgressBar1.Control.InvokeRequired) - { - toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate - { - toolStripProgressBar1.Value++; - }); - } - else - { - toolStripProgressBar1.Value++; - } - }); - }); - toolStripStatusLabel1.Text = "Conversion complete!"; - - } - - - - private void ConvertTiffToPdfAWithOcr(string[] tiffFiles, string outputPdfPath, string ocrLanguage) - { - PdfDocument pdf = new PdfDocument(); - pdf.Info.Title = "Converted TIFF to PDF/A"; - pdf.Info.Creator = "RCEU_PDFWorkflowManager"; - - foreach (var tiffFile in tiffFiles) - { - using (Tiff image = Tiff.Open(tiffFile, "r")) - { - int pageCount = image.NumberOfDirectories(); - for (int page = 0; page < pageCount; page++) - { - image.SetDirectory((short)page); - - int width = image.GetField(TiffTag.IMAGEWIDTH)[0].ToInt(); - int height = image.GetField(TiffTag.IMAGELENGTH)[0].ToInt(); - - - int[] raster = new int[height * width]; // 32-bit pixels - image.ReadRGBAImage(width, height, raster); - - - using (var bmp = new System.Drawing.Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb)) - { - var bmpData = bmp.LockBits( - new System.Drawing.Rectangle(0, 0, width, height), - System.Drawing.Imaging.ImageLockMode.WriteOnly, - bmp.PixelFormat); - - System.Runtime.InteropServices.Marshal.Copy(raster, 0, bmpData.Scan0, raster.Length); - bmp.UnlockBits(bmpData); - - PdfPage pagePdf = pdf.AddPage(); - pagePdf.Width = XUnit.FromPoint(width); - pagePdf.Height = XUnit.FromPoint(height); - - using (XGraphics gfx = XGraphics.FromPdfPage(pagePdf)) - { - // Save Bitmap to a temporary PNG - string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".png"); - bmp.Save(tempPath, System.Drawing.Imaging.ImageFormat.Png); - - // Load PNG into XImage - using (XImage img = XImage.FromFile(tempPath)) - { - gfx.DrawImage(img, 0, 0, width, height); - } - - // Delete temporary file - File.Delete(tempPath); - } - - // Perform OCR to extract text - string extractedText = PerformOcr(bmp, ocrLanguage); - // Overlay OCR text onto the PDF page - OverlayTextOntoPdfPage(pdf, pagePdf, extractedText); - } - } - } - } - - pdf.Save(outputPdfPath); - } - - private void MergePdfs(string[] pdfFiles, string outputPdfPath) - { - PdfDocument outputPdf = new PdfDocument(); - - foreach (var pdfFile in pdfFiles) - { - PdfDocument inputPdf = PdfReader.Open(pdfFile, PdfDocumentOpenMode.Import); - foreach (PdfPage page in inputPdf.Pages) - { - outputPdf.AddPage(page); - } - } - - outputPdf.Save(outputPdfPath); - } - - - private string PerformOcr(System.Drawing.Bitmap image, string language) - { - using (var engine = new TesseractEngine(@"./tessdata", language, EngineMode.Default)) - { - using (var page = engine.Process(image)) - { - return page.GetText(); - } - } - } - - private void OverlayTextOntoPdfPage(PdfDocument pdf, PdfPage page, string text) - { - // Implement text overlay logic here - } - - private async Task prepConvertToTempOutdir(string[] strFiles) - { - try - { - await semaphore.WaitAsync(); - string tempJpg300Dir = Path.Combine(tempDir, "jpg300"); - string tempJpg150Dir = Path.Combine(tempDir, "jpg150"); - Directory.CreateDirectory(tempJpg300Dir); - Directory.CreateDirectory(tempJpg150Dir); - string outputFile = ""; - int filecount = strFiles.Count() * 2; - - toolStripProgressBar1.Maximum = filecount; - toolStripProgressBar1.Value = 0; - toolStripStatusLabel1.Text = "Converting files to jpg."; - - List tasks = new List(); - - await Task.Run(() => - { - Parallel.ForEach(strFiles, async inputFile => - { - outputFile = Path.Combine(tempJpg300Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg"); - convertToJpeg(inputFile, outputFile, 85, 300); - - //Interlocked.Increment(ref toolStripProgressBar1.Value); - //UpdateProgressBar(); - }); - }); - - await Task.Run(() => - { - Parallel.ForEach(strFiles, async inputFile => - { - outputFile = Path.Combine(tempJpg150Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg"); - convertToJpeg(inputFile, outputFile, 85, 150); - - //Interlocked.Increment(ref toolStripProgressBar1.Value); - //UpdateProgressBar(); - }); - }); - await Task.WhenAll(tasks); - } - finally - { - semaphore.Release(); // Release semaphore - } - } - - private void UpdateProgressBar() - { - if (toolStripProgressBar1.Control.InvokeRequired) - { - toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate - { - toolStripProgressBar1.Value++; - }); - } - else - { - toolStripProgressBar1.Value++; - } - } private async void btnConvertToPDF_Click(object sender, EventArgs e) { try { - string[] strFiles = Directory.GetFiles(workOutDir, "*.tif"); - int sourceFileCount = strFiles.Length; string outputDir = Path.Combine(tempDir, "output"); Directory.CreateDirectory(outputDir); - string sourceDir = workOutDir; - var selectedLanguage = "eng"; - if (checkLanguage.Checked == true) + // Determine OCR language + string selectedLanguage = "eng"; + if (checkLanguage.Checked && cmbLanguage.SelectedItem != null) { - var language = (dynamic)cmbLanguage.SelectedItem; - selectedLanguage = language.Value; + dynamic lang = cmbLanguage.SelectedItem; + selectedLanguage = lang.Value; } - else + else if (!string.IsNullOrWhiteSpace(txtLanguages.Text)) { string[] langArray = txtLanguages.Text.Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries); - selectedLanguage = String.Join("+", - trainLanguage - .Where(kv => langArray - .Contains(kv.Key)) - .Select(kv => kv.Value) - ); + selectedLanguage = string.Join("+", + trainLanguage.Where(kv => langArray.Contains(kv.Key)).Select(kv => kv.Value)); } - sourceDir = prepCopyToTempOutdir(sourceFileCount, strFiles); - strFiles = Directory.GetFiles(sourceDir, "*." + strExtension); - string jpg150dir = ""; - - if (chkMagazines.Checked == true) - { - await prepConvertToTempOutdir(strFiles); - sourceDir = Path.Combine(tempDir, "jpg300"); - jpg150dir = Path.Combine(tempDir, "jpg150"); - // convert to jpg function - strExtension = "jpg"; - } - strFiles = Directory.GetFiles(sourceDir, "*." + strExtension); + var tiffConverter = new TiffToPdfConverter(); + await tiffConverter.ConvertTiffToPdfAWithOcrAsync( + workOutDir, + Path.Combine(outputDir, "output.pdf"), + selectedLanguage, + toolStripProgressBar1, + toolStripStatusLabel1, + this); - - await convertToPdf(strFiles, outputDir, selectedLanguage); - - try - { - string[] strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension); - if (strFiles.Count() == strFiles150.Count()) - { - strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension); - await convertToPdf(strFiles150, jpg150dir, selectedLanguage); - } - } - catch { } - - - string[] inFiles = Directory.GetFiles(sourceDir, "*." + strExtension); - string[] outFiles = Directory.GetFiles(outputDir, "*.pdf"); - txtPageCount.Text = inFiles.Length.ToString(); + txtPageCount.Text = Directory.GetFiles(workOutDir, "*.tif").Length.ToString(); btnMakePDF.Enabled = true; - - - - btnMakePDF.Enabled = true; - toolStripStatusLabel1.Text = "Converting files to pdf. Done!"; - strExtension = "tif"; // reset to tiff extension + toolStripStatusLabel1.Text = "Conversion complete!"; } - catch + catch (Exception ex) { - strExtension = "tif"; // reset to tiff extension - MessageBox.Show("No supported files in Work\\out directory!", "No supported files found!", MessageBoxButtons.OK, MessageBoxIcon.Error); - return; + MessageBox.Show($"Error during conversion: {ex.Message}", "Conversion Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } } + private void btnCalcPageCount_Click(object sender, EventArgs e) { try diff --git a/PDFWorkflowManager/PDFWorkflowManager/PDF Workflow Manager.csproj b/PDFWorkflowManager/PDFWorkflowManager/PDF Workflow Manager.csproj index 1e6d3fe..e705b24 100644 --- a/PDFWorkflowManager/PDFWorkflowManager/PDF Workflow Manager.csproj +++ b/PDFWorkflowManager/PDFWorkflowManager/PDF Workflow Manager.csproj @@ -197,6 +197,7 @@ SettingsForm.cs + LanguagesForm.cs @@ -253,6 +254,7 @@ + diff --git a/PDFWorkflowManager/PDFWorkflowManager/Program.cs b/PDFWorkflowManager/PDFWorkflowManager/Program.cs index 7620176..26687c6 100644 --- a/PDFWorkflowManager/PDFWorkflowManager/Program.cs +++ b/PDFWorkflowManager/PDFWorkflowManager/Program.cs @@ -1,4 +1,5 @@ -using System; +using PdfSharp.Fonts; +using System; using System.Windows.Forms; namespace PDFWorkflowManager @@ -14,6 +15,12 @@ namespace PDFWorkflowManager Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); Application.Run(new MainForm()); + + GlobalFontSettings.FontResolver = new SystemFontResolver(); + + Application.EnableVisualStyles(); + Application.SetCompatibleTextRenderingDefault(false); + Application.Run(new MainForm()); } } } diff --git a/PDFWorkflowManager/PDFWorkflowManager/arial.ttf b/PDFWorkflowManager/PDFWorkflowManager/arial.ttf new file mode 100644 index 0000000..8682d94 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/arial.ttf differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/sRGB.icc b/PDFWorkflowManager/PDFWorkflowManager/sRGB.icc new file mode 100644 index 0000000..cbb16d4 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/sRGB.icc differ