This commit is contained in:
tomse
2025-09-07 22:04:39 +02:00
parent 2ae7112dc1
commit 5627dae0f4
5 changed files with 35 additions and 267 deletions

View File

@@ -1,9 +1,6 @@
using BitMiracle.LibTiff.Classic; using ImageMagick;
using ImageMagick;
using Microsoft.WindowsAPICodePack.Dialogs; using Microsoft.WindowsAPICodePack.Dialogs;
using PdfSharp.Drawing; using PdfSharp.Fonts;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
using RCEU_PDFWorkflowManager; using RCEU_PDFWorkflowManager;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
@@ -15,9 +12,7 @@ using System.Linq;
using System.Reflection; using System.Reflection;
using System.Security.Cryptography; using System.Security.Cryptography;
using System.Threading; using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms; using System.Windows.Forms;
using Tesseract;
using ImageFormat = System.Drawing.Imaging.ImageFormat; using ImageFormat = System.Drawing.Imaging.ImageFormat;
@@ -175,6 +170,8 @@ namespace PDFWorkflowManager
cmbBanner.SelectedIndex = Properties.Settings.Default.Banner; cmbBanner.SelectedIndex = Properties.Settings.Default.Banner;
txtPDFAuthor.Text = Properties.Settings.Default.PDFMetaAuthor; txtPDFAuthor.Text = Properties.Settings.Default.PDFMetaAuthor;
radioSortNormal.Checked = true; radioSortNormal.Checked = true;
GlobalFontSettings.FontResolver = new SystemFontResolver();
// cmbLanguage Populate // cmbLanguage Populate
#region cmbLanguage #region cmbLanguage
@@ -702,286 +699,48 @@ namespace PDFWorkflowManager
return null; return null;
} }
private async Task convertToPdf(string[] strFiles, string outputDir, string selectedLanguage)
{
string outputPdf = Path.Combine(outputDir, "FinalDocument.pdf");
// Count total pages for progress bar
int totalPages = 0;
foreach (var tiffFile in strFiles)
{
using (var image = BitMiracle.LibTiff.Classic.Tiff.Open(tiffFile, "r"))
{
if (image != null)
totalPages += image.NumberOfDirectories();
}
}
toolStripProgressBar1.Maximum = totalPages;
toolStripProgressBar1.Value = 0;
toolStripStatusLabel1.Text = "Converting files to PDF...";
var converter = new TiffToPdfConverter();
await Task.Run(() =>
{
converter.ConvertTiffToPdfA(strFiles, Path.Combine(outputDir, "FinalDocument.pdf"), selectedLanguage, () =>
{
if (toolStripProgressBar1.Control.InvokeRequired)
{
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
{
toolStripProgressBar1.Value++;
});
}
else
{
toolStripProgressBar1.Value++;
}
});
});
toolStripStatusLabel1.Text = "Conversion complete!";
}
private void ConvertTiffToPdfAWithOcr(string[] tiffFiles, string outputPdfPath, string ocrLanguage)
{
PdfDocument pdf = new PdfDocument();
pdf.Info.Title = "Converted TIFF to PDF/A";
pdf.Info.Creator = "RCEU_PDFWorkflowManager";
foreach (var tiffFile in tiffFiles)
{
using (Tiff image = Tiff.Open(tiffFile, "r"))
{
int pageCount = image.NumberOfDirectories();
for (int page = 0; page < pageCount; page++)
{
image.SetDirectory((short)page);
int width = image.GetField(TiffTag.IMAGEWIDTH)[0].ToInt();
int height = image.GetField(TiffTag.IMAGELENGTH)[0].ToInt();
int[] raster = new int[height * width]; // 32-bit pixels
image.ReadRGBAImage(width, height, raster);
using (var bmp = new System.Drawing.Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
{
var bmpData = bmp.LockBits(
new System.Drawing.Rectangle(0, 0, width, height),
System.Drawing.Imaging.ImageLockMode.WriteOnly,
bmp.PixelFormat);
System.Runtime.InteropServices.Marshal.Copy(raster, 0, bmpData.Scan0, raster.Length);
bmp.UnlockBits(bmpData);
PdfPage pagePdf = pdf.AddPage();
pagePdf.Width = XUnit.FromPoint(width);
pagePdf.Height = XUnit.FromPoint(height);
using (XGraphics gfx = XGraphics.FromPdfPage(pagePdf))
{
// Save Bitmap to a temporary PNG
string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".png");
bmp.Save(tempPath, System.Drawing.Imaging.ImageFormat.Png);
// Load PNG into XImage
using (XImage img = XImage.FromFile(tempPath))
{
gfx.DrawImage(img, 0, 0, width, height);
}
// Delete temporary file
File.Delete(tempPath);
}
// Perform OCR to extract text
string extractedText = PerformOcr(bmp, ocrLanguage);
// Overlay OCR text onto the PDF page
OverlayTextOntoPdfPage(pdf, pagePdf, extractedText);
}
}
}
}
pdf.Save(outputPdfPath);
}
private void MergePdfs(string[] pdfFiles, string outputPdfPath)
{
PdfDocument outputPdf = new PdfDocument();
foreach (var pdfFile in pdfFiles)
{
PdfDocument inputPdf = PdfReader.Open(pdfFile, PdfDocumentOpenMode.Import);
foreach (PdfPage page in inputPdf.Pages)
{
outputPdf.AddPage(page);
}
}
outputPdf.Save(outputPdfPath);
}
private string PerformOcr(System.Drawing.Bitmap image, string language)
{
using (var engine = new TesseractEngine(@"./tessdata", language, EngineMode.Default))
{
using (var page = engine.Process(image))
{
return page.GetText();
}
}
}
private void OverlayTextOntoPdfPage(PdfDocument pdf, PdfPage page, string text)
{
// Implement text overlay logic here
}
private async Task prepConvertToTempOutdir(string[] strFiles)
{
try
{
await semaphore.WaitAsync();
string tempJpg300Dir = Path.Combine(tempDir, "jpg300");
string tempJpg150Dir = Path.Combine(tempDir, "jpg150");
Directory.CreateDirectory(tempJpg300Dir);
Directory.CreateDirectory(tempJpg150Dir);
string outputFile = "";
int filecount = strFiles.Count() * 2;
toolStripProgressBar1.Maximum = filecount;
toolStripProgressBar1.Value = 0;
toolStripStatusLabel1.Text = "Converting files to jpg.";
List<Task> tasks = new List<Task>();
await Task.Run(() =>
{
Parallel.ForEach(strFiles, async inputFile =>
{
outputFile = Path.Combine(tempJpg300Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
convertToJpeg(inputFile, outputFile, 85, 300);
//Interlocked.Increment(ref toolStripProgressBar1.Value);
//UpdateProgressBar();
});
});
await Task.Run(() =>
{
Parallel.ForEach(strFiles, async inputFile =>
{
outputFile = Path.Combine(tempJpg150Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
convertToJpeg(inputFile, outputFile, 85, 150);
//Interlocked.Increment(ref toolStripProgressBar1.Value);
//UpdateProgressBar();
});
});
await Task.WhenAll(tasks);
}
finally
{
semaphore.Release(); // Release semaphore
}
}
private void UpdateProgressBar()
{
if (toolStripProgressBar1.Control.InvokeRequired)
{
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
{
toolStripProgressBar1.Value++;
});
}
else
{
toolStripProgressBar1.Value++;
}
}
private async void btnConvertToPDF_Click(object sender, EventArgs e) private async void btnConvertToPDF_Click(object sender, EventArgs e)
{ {
try try
{ {
string[] strFiles = Directory.GetFiles(workOutDir, "*.tif");
int sourceFileCount = strFiles.Length;
string outputDir = Path.Combine(tempDir, "output"); string outputDir = Path.Combine(tempDir, "output");
Directory.CreateDirectory(outputDir); Directory.CreateDirectory(outputDir);
string sourceDir = workOutDir;
var selectedLanguage = "eng"; // Determine OCR language
if (checkLanguage.Checked == true) string selectedLanguage = "eng";
if (checkLanguage.Checked && cmbLanguage.SelectedItem != null)
{ {
var language = (dynamic)cmbLanguage.SelectedItem; dynamic lang = cmbLanguage.SelectedItem;
selectedLanguage = language.Value; selectedLanguage = lang.Value;
} }
else else if (!string.IsNullOrWhiteSpace(txtLanguages.Text))
{ {
string[] langArray = txtLanguages.Text.Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries); string[] langArray = txtLanguages.Text.Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries);
selectedLanguage = String.Join("+", selectedLanguage = string.Join("+",
trainLanguage trainLanguage.Where(kv => langArray.Contains(kv.Key)).Select(kv => kv.Value));
.Where(kv => langArray
.Contains(kv.Key))
.Select(kv => kv.Value)
);
} }
sourceDir = prepCopyToTempOutdir(sourceFileCount, strFiles); var tiffConverter = new TiffToPdfConverter();
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension); await tiffConverter.ConvertTiffToPdfAWithOcrAsync(
string jpg150dir = ""; workOutDir,
Path.Combine(outputDir, "output.pdf"),
if (chkMagazines.Checked == true) selectedLanguage,
{ toolStripProgressBar1,
await prepConvertToTempOutdir(strFiles); toolStripStatusLabel1,
sourceDir = Path.Combine(tempDir, "jpg300"); this);
jpg150dir = Path.Combine(tempDir, "jpg150");
// convert to jpg function
strExtension = "jpg";
}
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
txtPageCount.Text = Directory.GetFiles(workOutDir, "*.tif").Length.ToString();
await convertToPdf(strFiles, outputDir, selectedLanguage);
try
{
string[] strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
if (strFiles.Count() == strFiles150.Count())
{
strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
await convertToPdf(strFiles150, jpg150dir, selectedLanguage);
}
}
catch { }
string[] inFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
string[] outFiles = Directory.GetFiles(outputDir, "*.pdf");
txtPageCount.Text = inFiles.Length.ToString();
btnMakePDF.Enabled = true; btnMakePDF.Enabled = true;
toolStripStatusLabel1.Text = "Conversion complete!";
btnMakePDF.Enabled = true;
toolStripStatusLabel1.Text = "Converting files to pdf. Done!";
strExtension = "tif"; // reset to tiff extension
} }
catch catch (Exception ex)
{ {
strExtension = "tif"; // reset to tiff extension MessageBox.Show($"Error during conversion: {ex.Message}", "Conversion Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
MessageBox.Show("No supported files in Work\\out directory!", "No supported files found!", MessageBoxButtons.OK, MessageBoxIcon.Error);
return;
} }
} }
private void btnCalcPageCount_Click(object sender, EventArgs e) private void btnCalcPageCount_Click(object sender, EventArgs e)
{ {
try try

View File

@@ -197,6 +197,7 @@
<Compile Include="SettingsForm.Designer.cs"> <Compile Include="SettingsForm.Designer.cs">
<DependentUpon>SettingsForm.cs</DependentUpon> <DependentUpon>SettingsForm.cs</DependentUpon>
</Compile> </Compile>
<Compile Include="SystemFontResolver.cs" />
<Compile Include="TiffToPdfConverter.cs" /> <Compile Include="TiffToPdfConverter.cs" />
<EmbeddedResource Include="LanguagesForm.resx"> <EmbeddedResource Include="LanguagesForm.resx">
<DependentUpon>LanguagesForm.cs</DependentUpon> <DependentUpon>LanguagesForm.cs</DependentUpon>
@@ -253,6 +254,7 @@
<ItemGroup> <ItemGroup>
<Content Include="chicken_lips.ico" /> <Content Include="chicken_lips.ico" />
</ItemGroup> </ItemGroup>
<ItemGroup />
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild"> <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup> <PropertyGroup>

View File

@@ -1,4 +1,5 @@
using System; using PdfSharp.Fonts;
using System;
using System.Windows.Forms; using System.Windows.Forms;
namespace PDFWorkflowManager namespace PDFWorkflowManager
@@ -14,6 +15,12 @@ namespace PDFWorkflowManager
Application.EnableVisualStyles(); Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false); Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new MainForm()); Application.Run(new MainForm());
GlobalFontSettings.FontResolver = new SystemFontResolver();
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new MainForm());
} }
} }
} }

Binary file not shown.

Binary file not shown.