This commit is contained in:
tomse
2025-09-07 22:04:39 +02:00
parent 2ae7112dc1
commit 5627dae0f4
5 changed files with 35 additions and 267 deletions

View File

@@ -1,9 +1,6 @@
using BitMiracle.LibTiff.Classic;
using ImageMagick;
using ImageMagick;
using Microsoft.WindowsAPICodePack.Dialogs;
using PdfSharp.Drawing;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
using PdfSharp.Fonts;
using RCEU_PDFWorkflowManager;
using System;
using System.Collections.Generic;
@@ -15,9 +12,7 @@ using System.Linq;
using System.Reflection;
using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Tesseract;
using ImageFormat = System.Drawing.Imaging.ImageFormat;
@@ -175,6 +170,8 @@ namespace PDFWorkflowManager
cmbBanner.SelectedIndex = Properties.Settings.Default.Banner;
txtPDFAuthor.Text = Properties.Settings.Default.PDFMetaAuthor;
radioSortNormal.Checked = true;
GlobalFontSettings.FontResolver = new SystemFontResolver();
// cmbLanguage Populate
#region cmbLanguage
@@ -702,286 +699,48 @@ namespace PDFWorkflowManager
return null;
}
private async Task convertToPdf(string[] strFiles, string outputDir, string selectedLanguage)
{
string outputPdf = Path.Combine(outputDir, "FinalDocument.pdf");
// Count total pages for progress bar
int totalPages = 0;
foreach (var tiffFile in strFiles)
{
using (var image = BitMiracle.LibTiff.Classic.Tiff.Open(tiffFile, "r"))
{
if (image != null)
totalPages += image.NumberOfDirectories();
}
}
toolStripProgressBar1.Maximum = totalPages;
toolStripProgressBar1.Value = 0;
toolStripStatusLabel1.Text = "Converting files to PDF...";
var converter = new TiffToPdfConverter();
await Task.Run(() =>
{
converter.ConvertTiffToPdfA(strFiles, Path.Combine(outputDir, "FinalDocument.pdf"), selectedLanguage, () =>
{
if (toolStripProgressBar1.Control.InvokeRequired)
{
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
{
toolStripProgressBar1.Value++;
});
}
else
{
toolStripProgressBar1.Value++;
}
});
});
toolStripStatusLabel1.Text = "Conversion complete!";
}
private void ConvertTiffToPdfAWithOcr(string[] tiffFiles, string outputPdfPath, string ocrLanguage)
{
PdfDocument pdf = new PdfDocument();
pdf.Info.Title = "Converted TIFF to PDF/A";
pdf.Info.Creator = "RCEU_PDFWorkflowManager";
foreach (var tiffFile in tiffFiles)
{
using (Tiff image = Tiff.Open(tiffFile, "r"))
{
int pageCount = image.NumberOfDirectories();
for (int page = 0; page < pageCount; page++)
{
image.SetDirectory((short)page);
int width = image.GetField(TiffTag.IMAGEWIDTH)[0].ToInt();
int height = image.GetField(TiffTag.IMAGELENGTH)[0].ToInt();
int[] raster = new int[height * width]; // 32-bit pixels
image.ReadRGBAImage(width, height, raster);
using (var bmp = new System.Drawing.Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
{
var bmpData = bmp.LockBits(
new System.Drawing.Rectangle(0, 0, width, height),
System.Drawing.Imaging.ImageLockMode.WriteOnly,
bmp.PixelFormat);
System.Runtime.InteropServices.Marshal.Copy(raster, 0, bmpData.Scan0, raster.Length);
bmp.UnlockBits(bmpData);
PdfPage pagePdf = pdf.AddPage();
pagePdf.Width = XUnit.FromPoint(width);
pagePdf.Height = XUnit.FromPoint(height);
using (XGraphics gfx = XGraphics.FromPdfPage(pagePdf))
{
// Save Bitmap to a temporary PNG
string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".png");
bmp.Save(tempPath, System.Drawing.Imaging.ImageFormat.Png);
// Load PNG into XImage
using (XImage img = XImage.FromFile(tempPath))
{
gfx.DrawImage(img, 0, 0, width, height);
}
// Delete temporary file
File.Delete(tempPath);
}
// Perform OCR to extract text
string extractedText = PerformOcr(bmp, ocrLanguage);
// Overlay OCR text onto the PDF page
OverlayTextOntoPdfPage(pdf, pagePdf, extractedText);
}
}
}
}
pdf.Save(outputPdfPath);
}
private void MergePdfs(string[] pdfFiles, string outputPdfPath)
{
PdfDocument outputPdf = new PdfDocument();
foreach (var pdfFile in pdfFiles)
{
PdfDocument inputPdf = PdfReader.Open(pdfFile, PdfDocumentOpenMode.Import);
foreach (PdfPage page in inputPdf.Pages)
{
outputPdf.AddPage(page);
}
}
outputPdf.Save(outputPdfPath);
}
private string PerformOcr(System.Drawing.Bitmap image, string language)
{
using (var engine = new TesseractEngine(@"./tessdata", language, EngineMode.Default))
{
using (var page = engine.Process(image))
{
return page.GetText();
}
}
}
private void OverlayTextOntoPdfPage(PdfDocument pdf, PdfPage page, string text)
{
// Implement text overlay logic here
}
private async Task prepConvertToTempOutdir(string[] strFiles)
{
try
{
await semaphore.WaitAsync();
string tempJpg300Dir = Path.Combine(tempDir, "jpg300");
string tempJpg150Dir = Path.Combine(tempDir, "jpg150");
Directory.CreateDirectory(tempJpg300Dir);
Directory.CreateDirectory(tempJpg150Dir);
string outputFile = "";
int filecount = strFiles.Count() * 2;
toolStripProgressBar1.Maximum = filecount;
toolStripProgressBar1.Value = 0;
toolStripStatusLabel1.Text = "Converting files to jpg.";
List<Task> tasks = new List<Task>();
await Task.Run(() =>
{
Parallel.ForEach(strFiles, async inputFile =>
{
outputFile = Path.Combine(tempJpg300Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
convertToJpeg(inputFile, outputFile, 85, 300);
//Interlocked.Increment(ref toolStripProgressBar1.Value);
//UpdateProgressBar();
});
});
await Task.Run(() =>
{
Parallel.ForEach(strFiles, async inputFile =>
{
outputFile = Path.Combine(tempJpg150Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
convertToJpeg(inputFile, outputFile, 85, 150);
//Interlocked.Increment(ref toolStripProgressBar1.Value);
//UpdateProgressBar();
});
});
await Task.WhenAll(tasks);
}
finally
{
semaphore.Release(); // Release semaphore
}
}
private void UpdateProgressBar()
{
if (toolStripProgressBar1.Control.InvokeRequired)
{
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
{
toolStripProgressBar1.Value++;
});
}
else
{
toolStripProgressBar1.Value++;
}
}
private async void btnConvertToPDF_Click(object sender, EventArgs e)
{
try
{
string[] strFiles = Directory.GetFiles(workOutDir, "*.tif");
int sourceFileCount = strFiles.Length;
string outputDir = Path.Combine(tempDir, "output");
Directory.CreateDirectory(outputDir);
string sourceDir = workOutDir;
var selectedLanguage = "eng";
if (checkLanguage.Checked == true)
// Determine OCR language
string selectedLanguage = "eng";
if (checkLanguage.Checked && cmbLanguage.SelectedItem != null)
{
var language = (dynamic)cmbLanguage.SelectedItem;
selectedLanguage = language.Value;
dynamic lang = cmbLanguage.SelectedItem;
selectedLanguage = lang.Value;
}
else
else if (!string.IsNullOrWhiteSpace(txtLanguages.Text))
{
string[] langArray = txtLanguages.Text.Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries);
selectedLanguage = String.Join("+",
trainLanguage
.Where(kv => langArray
.Contains(kv.Key))
.Select(kv => kv.Value)
);
selectedLanguage = string.Join("+",
trainLanguage.Where(kv => langArray.Contains(kv.Key)).Select(kv => kv.Value));
}
sourceDir = prepCopyToTempOutdir(sourceFileCount, strFiles);
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
string jpg150dir = "";
if (chkMagazines.Checked == true)
{
await prepConvertToTempOutdir(strFiles);
sourceDir = Path.Combine(tempDir, "jpg300");
jpg150dir = Path.Combine(tempDir, "jpg150");
// convert to jpg function
strExtension = "jpg";
}
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
var tiffConverter = new TiffToPdfConverter();
await tiffConverter.ConvertTiffToPdfAWithOcrAsync(
workOutDir,
Path.Combine(outputDir, "output.pdf"),
selectedLanguage,
toolStripProgressBar1,
toolStripStatusLabel1,
this);
await convertToPdf(strFiles, outputDir, selectedLanguage);
try
{
string[] strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
if (strFiles.Count() == strFiles150.Count())
{
strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
await convertToPdf(strFiles150, jpg150dir, selectedLanguage);
}
}
catch { }
string[] inFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
string[] outFiles = Directory.GetFiles(outputDir, "*.pdf");
txtPageCount.Text = inFiles.Length.ToString();
txtPageCount.Text = Directory.GetFiles(workOutDir, "*.tif").Length.ToString();
btnMakePDF.Enabled = true;
btnMakePDF.Enabled = true;
toolStripStatusLabel1.Text = "Converting files to pdf. Done!";
strExtension = "tif"; // reset to tiff extension
toolStripStatusLabel1.Text = "Conversion complete!";
}
catch
catch (Exception ex)
{
strExtension = "tif"; // reset to tiff extension
MessageBox.Show("No supported files in Work\\out directory!", "No supported files found!", MessageBoxButtons.OK, MessageBoxIcon.Error);
return;
MessageBox.Show($"Error during conversion: {ex.Message}", "Conversion Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
private void btnCalcPageCount_Click(object sender, EventArgs e)
{
try

View File

@@ -197,6 +197,7 @@
<Compile Include="SettingsForm.Designer.cs">
<DependentUpon>SettingsForm.cs</DependentUpon>
</Compile>
<Compile Include="SystemFontResolver.cs" />
<Compile Include="TiffToPdfConverter.cs" />
<EmbeddedResource Include="LanguagesForm.resx">
<DependentUpon>LanguagesForm.cs</DependentUpon>
@@ -253,6 +254,7 @@
<ItemGroup>
<Content Include="chicken_lips.ico" />
</ItemGroup>
<ItemGroup />
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>

View File

@@ -1,4 +1,5 @@
using System;
using PdfSharp.Fonts;
using System;
using System.Windows.Forms;
namespace PDFWorkflowManager
@@ -14,6 +15,12 @@ namespace PDFWorkflowManager
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new MainForm());
GlobalFontSettings.FontResolver = new SystemFontResolver();
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new MainForm());
}
}
}

Binary file not shown.

Binary file not shown.