update
This commit is contained in:
@@ -1,9 +1,6 @@
|
||||
using BitMiracle.LibTiff.Classic;
|
||||
using ImageMagick;
|
||||
using ImageMagick;
|
||||
using Microsoft.WindowsAPICodePack.Dialogs;
|
||||
using PdfSharp.Drawing;
|
||||
using PdfSharp.Pdf;
|
||||
using PdfSharp.Pdf.IO;
|
||||
using PdfSharp.Fonts;
|
||||
using RCEU_PDFWorkflowManager;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
@@ -15,9 +12,7 @@ using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Security.Cryptography;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.Windows.Forms;
|
||||
using Tesseract;
|
||||
using ImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
|
||||
@@ -175,6 +170,8 @@ namespace PDFWorkflowManager
|
||||
cmbBanner.SelectedIndex = Properties.Settings.Default.Banner;
|
||||
txtPDFAuthor.Text = Properties.Settings.Default.PDFMetaAuthor;
|
||||
radioSortNormal.Checked = true;
|
||||
GlobalFontSettings.FontResolver = new SystemFontResolver();
|
||||
|
||||
// cmbLanguage Populate
|
||||
#region cmbLanguage
|
||||
|
||||
@@ -702,286 +699,48 @@ namespace PDFWorkflowManager
|
||||
return null;
|
||||
}
|
||||
|
||||
private async Task convertToPdf(string[] strFiles, string outputDir, string selectedLanguage)
|
||||
{
|
||||
string outputPdf = Path.Combine(outputDir, "FinalDocument.pdf");
|
||||
|
||||
// Count total pages for progress bar
|
||||
int totalPages = 0;
|
||||
foreach (var tiffFile in strFiles)
|
||||
{
|
||||
using (var image = BitMiracle.LibTiff.Classic.Tiff.Open(tiffFile, "r"))
|
||||
{
|
||||
if (image != null)
|
||||
totalPages += image.NumberOfDirectories();
|
||||
}
|
||||
}
|
||||
|
||||
toolStripProgressBar1.Maximum = totalPages;
|
||||
toolStripProgressBar1.Value = 0;
|
||||
toolStripStatusLabel1.Text = "Converting files to PDF...";
|
||||
|
||||
var converter = new TiffToPdfConverter();
|
||||
await Task.Run(() =>
|
||||
{
|
||||
converter.ConvertTiffToPdfA(strFiles, Path.Combine(outputDir, "FinalDocument.pdf"), selectedLanguage, () =>
|
||||
{
|
||||
if (toolStripProgressBar1.Control.InvokeRequired)
|
||||
{
|
||||
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
|
||||
{
|
||||
toolStripProgressBar1.Value++;
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
toolStripProgressBar1.Value++;
|
||||
}
|
||||
});
|
||||
});
|
||||
toolStripStatusLabel1.Text = "Conversion complete!";
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void ConvertTiffToPdfAWithOcr(string[] tiffFiles, string outputPdfPath, string ocrLanguage)
|
||||
{
|
||||
PdfDocument pdf = new PdfDocument();
|
||||
pdf.Info.Title = "Converted TIFF to PDF/A";
|
||||
pdf.Info.Creator = "RCEU_PDFWorkflowManager";
|
||||
|
||||
foreach (var tiffFile in tiffFiles)
|
||||
{
|
||||
using (Tiff image = Tiff.Open(tiffFile, "r"))
|
||||
{
|
||||
int pageCount = image.NumberOfDirectories();
|
||||
for (int page = 0; page < pageCount; page++)
|
||||
{
|
||||
image.SetDirectory((short)page);
|
||||
|
||||
int width = image.GetField(TiffTag.IMAGEWIDTH)[0].ToInt();
|
||||
int height = image.GetField(TiffTag.IMAGELENGTH)[0].ToInt();
|
||||
|
||||
|
||||
int[] raster = new int[height * width]; // 32-bit pixels
|
||||
image.ReadRGBAImage(width, height, raster);
|
||||
|
||||
|
||||
using (var bmp = new System.Drawing.Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
|
||||
{
|
||||
var bmpData = bmp.LockBits(
|
||||
new System.Drawing.Rectangle(0, 0, width, height),
|
||||
System.Drawing.Imaging.ImageLockMode.WriteOnly,
|
||||
bmp.PixelFormat);
|
||||
|
||||
System.Runtime.InteropServices.Marshal.Copy(raster, 0, bmpData.Scan0, raster.Length);
|
||||
bmp.UnlockBits(bmpData);
|
||||
|
||||
PdfPage pagePdf = pdf.AddPage();
|
||||
pagePdf.Width = XUnit.FromPoint(width);
|
||||
pagePdf.Height = XUnit.FromPoint(height);
|
||||
|
||||
using (XGraphics gfx = XGraphics.FromPdfPage(pagePdf))
|
||||
{
|
||||
// Save Bitmap to a temporary PNG
|
||||
string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".png");
|
||||
bmp.Save(tempPath, System.Drawing.Imaging.ImageFormat.Png);
|
||||
|
||||
// Load PNG into XImage
|
||||
using (XImage img = XImage.FromFile(tempPath))
|
||||
{
|
||||
gfx.DrawImage(img, 0, 0, width, height);
|
||||
}
|
||||
|
||||
// Delete temporary file
|
||||
File.Delete(tempPath);
|
||||
}
|
||||
|
||||
// Perform OCR to extract text
|
||||
string extractedText = PerformOcr(bmp, ocrLanguage);
|
||||
// Overlay OCR text onto the PDF page
|
||||
OverlayTextOntoPdfPage(pdf, pagePdf, extractedText);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pdf.Save(outputPdfPath);
|
||||
}
|
||||
|
||||
private void MergePdfs(string[] pdfFiles, string outputPdfPath)
|
||||
{
|
||||
PdfDocument outputPdf = new PdfDocument();
|
||||
|
||||
foreach (var pdfFile in pdfFiles)
|
||||
{
|
||||
PdfDocument inputPdf = PdfReader.Open(pdfFile, PdfDocumentOpenMode.Import);
|
||||
foreach (PdfPage page in inputPdf.Pages)
|
||||
{
|
||||
outputPdf.AddPage(page);
|
||||
}
|
||||
}
|
||||
|
||||
outputPdf.Save(outputPdfPath);
|
||||
}
|
||||
|
||||
|
||||
private string PerformOcr(System.Drawing.Bitmap image, string language)
|
||||
{
|
||||
using (var engine = new TesseractEngine(@"./tessdata", language, EngineMode.Default))
|
||||
{
|
||||
using (var page = engine.Process(image))
|
||||
{
|
||||
return page.GetText();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void OverlayTextOntoPdfPage(PdfDocument pdf, PdfPage page, string text)
|
||||
{
|
||||
// Implement text overlay logic here
|
||||
}
|
||||
|
||||
private async Task prepConvertToTempOutdir(string[] strFiles)
|
||||
{
|
||||
try
|
||||
{
|
||||
await semaphore.WaitAsync();
|
||||
string tempJpg300Dir = Path.Combine(tempDir, "jpg300");
|
||||
string tempJpg150Dir = Path.Combine(tempDir, "jpg150");
|
||||
Directory.CreateDirectory(tempJpg300Dir);
|
||||
Directory.CreateDirectory(tempJpg150Dir);
|
||||
string outputFile = "";
|
||||
int filecount = strFiles.Count() * 2;
|
||||
|
||||
toolStripProgressBar1.Maximum = filecount;
|
||||
toolStripProgressBar1.Value = 0;
|
||||
toolStripStatusLabel1.Text = "Converting files to jpg.";
|
||||
|
||||
List<Task> tasks = new List<Task>();
|
||||
|
||||
await Task.Run(() =>
|
||||
{
|
||||
Parallel.ForEach(strFiles, async inputFile =>
|
||||
{
|
||||
outputFile = Path.Combine(tempJpg300Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
|
||||
convertToJpeg(inputFile, outputFile, 85, 300);
|
||||
|
||||
//Interlocked.Increment(ref toolStripProgressBar1.Value);
|
||||
//UpdateProgressBar();
|
||||
});
|
||||
});
|
||||
|
||||
await Task.Run(() =>
|
||||
{
|
||||
Parallel.ForEach(strFiles, async inputFile =>
|
||||
{
|
||||
outputFile = Path.Combine(tempJpg150Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
|
||||
convertToJpeg(inputFile, outputFile, 85, 150);
|
||||
|
||||
//Interlocked.Increment(ref toolStripProgressBar1.Value);
|
||||
//UpdateProgressBar();
|
||||
});
|
||||
});
|
||||
await Task.WhenAll(tasks);
|
||||
}
|
||||
finally
|
||||
{
|
||||
semaphore.Release(); // Release semaphore
|
||||
}
|
||||
}
|
||||
|
||||
private void UpdateProgressBar()
|
||||
{
|
||||
if (toolStripProgressBar1.Control.InvokeRequired)
|
||||
{
|
||||
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
|
||||
{
|
||||
toolStripProgressBar1.Value++;
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
toolStripProgressBar1.Value++;
|
||||
}
|
||||
}
|
||||
private async void btnConvertToPDF_Click(object sender, EventArgs e)
|
||||
{
|
||||
try
|
||||
{
|
||||
string[] strFiles = Directory.GetFiles(workOutDir, "*.tif");
|
||||
int sourceFileCount = strFiles.Length;
|
||||
string outputDir = Path.Combine(tempDir, "output");
|
||||
Directory.CreateDirectory(outputDir);
|
||||
string sourceDir = workOutDir;
|
||||
|
||||
var selectedLanguage = "eng";
|
||||
if (checkLanguage.Checked == true)
|
||||
// Determine OCR language
|
||||
string selectedLanguage = "eng";
|
||||
if (checkLanguage.Checked && cmbLanguage.SelectedItem != null)
|
||||
{
|
||||
var language = (dynamic)cmbLanguage.SelectedItem;
|
||||
selectedLanguage = language.Value;
|
||||
dynamic lang = cmbLanguage.SelectedItem;
|
||||
selectedLanguage = lang.Value;
|
||||
}
|
||||
else
|
||||
else if (!string.IsNullOrWhiteSpace(txtLanguages.Text))
|
||||
{
|
||||
string[] langArray = txtLanguages.Text.Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries);
|
||||
selectedLanguage = String.Join("+",
|
||||
trainLanguage
|
||||
.Where(kv => langArray
|
||||
.Contains(kv.Key))
|
||||
.Select(kv => kv.Value)
|
||||
);
|
||||
selectedLanguage = string.Join("+",
|
||||
trainLanguage.Where(kv => langArray.Contains(kv.Key)).Select(kv => kv.Value));
|
||||
}
|
||||
|
||||
sourceDir = prepCopyToTempOutdir(sourceFileCount, strFiles);
|
||||
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
|
||||
string jpg150dir = "";
|
||||
|
||||
if (chkMagazines.Checked == true)
|
||||
{
|
||||
await prepConvertToTempOutdir(strFiles);
|
||||
sourceDir = Path.Combine(tempDir, "jpg300");
|
||||
jpg150dir = Path.Combine(tempDir, "jpg150");
|
||||
// convert to jpg function
|
||||
strExtension = "jpg";
|
||||
}
|
||||
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
|
||||
var tiffConverter = new TiffToPdfConverter();
|
||||
await tiffConverter.ConvertTiffToPdfAWithOcrAsync(
|
||||
workOutDir,
|
||||
Path.Combine(outputDir, "output.pdf"),
|
||||
selectedLanguage,
|
||||
toolStripProgressBar1,
|
||||
toolStripStatusLabel1,
|
||||
this);
|
||||
|
||||
|
||||
|
||||
await convertToPdf(strFiles, outputDir, selectedLanguage);
|
||||
|
||||
try
|
||||
{
|
||||
string[] strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
|
||||
if (strFiles.Count() == strFiles150.Count())
|
||||
{
|
||||
strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
|
||||
await convertToPdf(strFiles150, jpg150dir, selectedLanguage);
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
|
||||
string[] inFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
|
||||
string[] outFiles = Directory.GetFiles(outputDir, "*.pdf");
|
||||
txtPageCount.Text = inFiles.Length.ToString();
|
||||
txtPageCount.Text = Directory.GetFiles(workOutDir, "*.tif").Length.ToString();
|
||||
btnMakePDF.Enabled = true;
|
||||
|
||||
|
||||
|
||||
btnMakePDF.Enabled = true;
|
||||
toolStripStatusLabel1.Text = "Converting files to pdf. Done!";
|
||||
strExtension = "tif"; // reset to tiff extension
|
||||
toolStripStatusLabel1.Text = "Conversion complete!";
|
||||
}
|
||||
catch
|
||||
catch (Exception ex)
|
||||
{
|
||||
strExtension = "tif"; // reset to tiff extension
|
||||
MessageBox.Show("No supported files in Work\\out directory!", "No supported files found!", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
return;
|
||||
MessageBox.Show($"Error during conversion: {ex.Message}", "Conversion Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void btnCalcPageCount_Click(object sender, EventArgs e)
|
||||
{
|
||||
try
|
||||
|
@@ -197,6 +197,7 @@
|
||||
<Compile Include="SettingsForm.Designer.cs">
|
||||
<DependentUpon>SettingsForm.cs</DependentUpon>
|
||||
</Compile>
|
||||
<Compile Include="SystemFontResolver.cs" />
|
||||
<Compile Include="TiffToPdfConverter.cs" />
|
||||
<EmbeddedResource Include="LanguagesForm.resx">
|
||||
<DependentUpon>LanguagesForm.cs</DependentUpon>
|
||||
@@ -253,6 +254,7 @@
|
||||
<ItemGroup>
|
||||
<Content Include="chicken_lips.ico" />
|
||||
</ItemGroup>
|
||||
<ItemGroup />
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
|
@@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using PdfSharp.Fonts;
|
||||
using System;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace PDFWorkflowManager
|
||||
@@ -14,6 +15,12 @@ namespace PDFWorkflowManager
|
||||
Application.EnableVisualStyles();
|
||||
Application.SetCompatibleTextRenderingDefault(false);
|
||||
Application.Run(new MainForm());
|
||||
|
||||
GlobalFontSettings.FontResolver = new SystemFontResolver();
|
||||
|
||||
Application.EnableVisualStyles();
|
||||
Application.SetCompatibleTextRenderingDefault(false);
|
||||
Application.Run(new MainForm());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
BIN
PDFWorkflowManager/PDFWorkflowManager/arial.ttf
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/arial.ttf
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/sRGB.icc
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/sRGB.icc
Normal file
Binary file not shown.
Reference in New Issue
Block a user