Improved support for magazines

additional output of tablet/150dpi version -  fixed parallel run.

FIXME parallel task doesn't always run smoothly.. force run single threaded   afterwards
This commit is contained in:
tomse 2024-03-04 23:45:14 +01:00
parent a314d4d1f8
commit 01387b9e4c
2 changed files with 216 additions and 182 deletions

View File

@ -145,6 +145,8 @@ namespace PDFWorkflowManager
// Path where banner pages are located
private string strExeFilePath = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
private SemaphoreSlim semaphore = new SemaphoreSlim(1);
string bannerPage = "";
private string workDir = Properties.Settings.Default.WorkDir;
@ -153,6 +155,9 @@ namespace PDFWorkflowManager
private string tempDir = Properties.Settings.Default.TempDir;
private string strPostProcessor = Properties.Settings.Default.PostProcessor;
private string strExtension = "tif";
private string bannerPageA5 = "banner_a5.pdf";
public MainForm()
{
@ -234,7 +239,8 @@ namespace PDFWorkflowManager
btnMakePDF.Enabled = true;
//public string txtPostProcessor.text = "";
}
}
private void btnProjectDir_Click(object sender, EventArgs e)
@ -550,7 +556,7 @@ namespace PDFWorkflowManager
Application.Exit();
}
private string prepCopyToTempOutdir(int sourceFileCount, string[] strFiles, string sourceDir)
private string prepCopyToTempOutdir(int sourceFileCount, string[] strFiles)
{
string tempSortDir = Path.Combine(tempDir, "sort");
Directory.CreateDirectory(tempSortDir);
@ -602,23 +608,40 @@ namespace PDFWorkflowManager
return tempSortDir;
}
static void ConvertToJpeg(string sourceFileName, string destinationFileName, int compressionLevel, int dpi = 300)
static void convertToJpeg(string sourceFileName, string destinationFileName, int compressionLevel, int dpi = 300)
{
try
{
// Load the source image
using (Image sourceImage = Image.FromFile(sourceFileName))
{
// Set encoding parameters for JPEG
EncoderParameters encoderParameters = new EncoderParameters(1);
encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, compressionLevel);
// Calculate new dimensions based on DPI
float scaleFactor = dpi / sourceImage.HorizontalResolution;
int newWidth = (int)(sourceImage.Width * scaleFactor);
int newHeight = (int)(sourceImage.Height * scaleFactor);
// Get the JPEG codec
ImageCodecInfo jpegCodec = GetEncoderInfo("image/jpeg");
using (var newImage = new Bitmap(newWidth, newHeight))
{
newImage.SetResolution(dpi, dpi);
// Save the image in JPEG format with specified compression level
sourceImage.Save(destinationFileName, jpegCodec, encoderParameters);
sourceImage.Dispose();
// Draw the source image onto the new bitmap with the new dimensions
using (Graphics graphics = Graphics.FromImage(newImage))
{
graphics.CompositingQuality = System.Drawing.Drawing2D.CompositingQuality.HighQuality;
graphics.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
graphics.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;
graphics.DrawImage(sourceImage, 0, 0, newWidth, newHeight);
}
// Set encoding parameters for JPEG
EncoderParameters encoderParameters = new EncoderParameters(1);
encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, compressionLevel);
// Get the JPEG codec
ImageCodecInfo jpegCodec = GetEncoderInfo(ImageFormat.Jpeg);
// Save the image in JPEG format with specified compression level
newImage.Save(destinationFileName, jpegCodec, encoderParameters);
}
}
}
catch (Exception ex)
@ -626,107 +649,157 @@ namespace PDFWorkflowManager
}
}
static void ConvertTiffTo150DpiJpg(string inputTiffPath, string outputJpgPath)
static ImageCodecInfo GetEncoderInfo(ImageFormat format)
{
using (var tiffImage = Image.FromFile(inputTiffPath))
foreach (ImageCodecInfo codec in ImageCodecInfo.GetImageDecoders())
{
var newWidth = (int)(tiffImage.Width * 150 / tiffImage.HorizontalResolution);
var newHeight = (int)(tiffImage.Height * 150 / tiffImage.VerticalResolution);
using (var newImage = new Bitmap(newWidth, newHeight))
{
newImage.SetResolution(150, 150);
using (var graphics = Graphics.FromImage(newImage))
{
graphics.SmoothingMode = SmoothingMode.HighQuality;
graphics.InterpolationMode = InterpolationMode.HighQualityBicubic;
graphics.DrawImage(tiffImage, 0, 0, newWidth, newHeight);
}
// Set JPEG compression level
var encoderParameters = new EncoderParameters(1);
var encoderParameter = new EncoderParameter(Encoder.Quality, 85L); // Set compression level here (0-100)
encoderParameters.Param[0] = encoderParameter;
// Save as JPEG with 150 DPI and specified compression level
var jpegCodecInfo = GetEncoderInfo(ImageFormat.Jpeg);
newImage.Save(outputJpgPath, jpegCodecInfo, encoderParameters);
}
}
}
static ImageCodecInfo GetEncoderInfo(string mimeType)
{
// Get image codecs for all image formats
ImageCodecInfo[] codecs = ImageCodecInfo.GetImageEncoders();
// Find the correct image codec
foreach (ImageCodecInfo codec in codecs)
{
if (codec.MimeType == mimeType)
if (codec.FormatID == format.Guid)
{
return codec;
}
}
// If no appropriate codec found, return null
return null;
}
private string prepConvertToTempOutdir(int sourceFileCount, string[] strFiles, string sourceDir)
private async Task convertToPdf(string[] strFiles, string outputDir, string selectedLanguage)
{
string tempSortDir = Path.Combine(tempDir, "sort");
Directory.CreateDirectory(tempSortDir);
int downCount = sourceFileCount;
int upCount = 1;
if (radioSortMagazine.Checked == true)
{
for (int i = 0; i < sourceFileCount;)
{
ConvertToJpeg(strFiles[i], Path.Combine(tempSortDir, downCount.ToString().PadLeft(4, '0') + ".jpg"), 85);
i++;
downCount--;
ConvertToJpeg(strFiles[i], Path.Combine(tempSortDir, upCount.ToString().PadLeft(4, '0') + ".jpg"), 85);
i++;
upCount++;
ConvertToJpeg(strFiles[i], Path.Combine(tempSortDir, upCount.ToString().PadLeft(4, '0') + ".jpg"), 85);
i++;
upCount++;
ConvertToJpeg(strFiles[i], Path.Combine(tempSortDir, downCount.ToString().PadLeft(4, '0') + ".jpg"), 85);
i++;
downCount--;
}
// fixme why sleep?
Thread.Sleep(1000);
sourceDir = tempSortDir;
}
else if (radioSortFlatBed.Checked == true)
{
for (int i = 1; i < sourceFileCount;)
{
ConvertToJpeg(strFiles[i], Path.Combine(tempSortDir, upCount.ToString().PadLeft(4, '0') + ".jpg"), 85);
i++;
upCount++;
}
ConvertToJpeg(strFiles[0], Path.Combine(tempSortDir, upCount.ToString().PadLeft(4, '0') + ".jpg"), 85);
ProcessStartInfo startInfo = new ProcessStartInfo();
startInfo.UseShellExecute = false;
startInfo.RedirectStandardOutput = true;
startInfo.CreateNoWindow = true;
// fixme why sleep?
Thread.Sleep(1000);
}
else if (radioSortNormal.Checked == true)
// TODO make this configurable
startInfo.FileName = Properties.Settings.Default.TesserAct;
string outputFile = "";
toolStripProgressBar1.Maximum = strFiles.Count();
toolStripProgressBar1.Value = 0;
toolStripStatusLabel1.Text = "Converting files to pdf.";
Task convertpdf = Task.Run(() =>
{
for (int i = 0; i < sourceFileCount;)
Parallel.ForEach(strFiles, inputFile =>
{
ConvertToJpeg(strFiles[i], Path.Combine(tempSortDir, upCount.ToString().PadLeft(4, '0') + ".jpg"), 85);
i++;
upCount++;
outputFile = Path.GetFileNameWithoutExtension(inputFile);
startInfo.Arguments = "\"" + inputFile + "\"" + " " + "\"" + Path.Combine(outputDir, outputFile) + "\"" + " -l " + selectedLanguage + " pdf txt";
using (var process = Process.Start(startInfo))
{
process.WaitForExit();
}
if (toolStripProgressBar1.Control.InvokeRequired)
{
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
{
toolStripProgressBar1.Value++;
});
}
else
{
toolStripProgressBar1.Value++;
}
});
});
await Task.WhenAll(convertpdf);
Thread.Sleep(1000);
// Parallel doesn't always return full result, we pick up the missing and process them again single threaded
/*
if (Directory.GetFiles(sourceDir, "*." + strExtension).Length != outFiles.Length)
{
string[] arrayInFiles = new string[inFiles.Length];
string[] arrayOutFiles = new string[outFiles.Length];
for (int i = 0; i < inFiles.Length; i++)
{
arrayInFiles[i] = Path.GetFileNameWithoutExtension(inFiles[i]);
}
for (int i = 0; i < outFiles.Length; i++)
{
arrayOutFiles[i] = Path.GetFileNameWithoutExtension(outFiles[i]);
}
string[] difference = arrayInFiles.Except(arrayOutFiles).ToArray();
foreach (string item in difference)
{
startInfo.Arguments = "\"" + Path.Combine(sourceDir, item + "." + strExtension) + "\"" + " " + "\"" + Path.Combine(outputDir, item) + "\"" + " -l " + selectedLanguage + " pdf txt";
using (var process = Process.Start(startInfo))
{
process.WaitForExit();
}
}
if (Directory.GetFiles(sourceDir, "*." + strExtension).Length != Directory.GetFiles(outputDir, "*.pdf").Length)
{
MessageBox.Show("Not all files were converted to PDF");
}
}
return tempSortDir;
*/
}
private async Task prepConvertToTempOutdir(string[] strFiles)
{
try
{
await semaphore.WaitAsync();
string tempJpg300Dir = Path.Combine(tempDir, "jpg300");
string tempJpg150Dir = Path.Combine(tempDir, "jpg150");
Directory.CreateDirectory(tempJpg300Dir);
Directory.CreateDirectory(tempJpg150Dir);
string outputFile = "";
int filecount = strFiles.Count() * 2;
toolStripProgressBar1.Maximum = filecount;
toolStripProgressBar1.Value = 0;
toolStripStatusLabel1.Text = "Converting files to jpg.";
List<Task> tasks = new List<Task>();
await Task.Run(() =>
{
Parallel.ForEach(strFiles, async inputFile =>
{
outputFile = Path.Combine(tempJpg300Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
convertToJpeg(inputFile, outputFile, 85, 300);
//Interlocked.Increment(ref toolStripProgressBar1.Value);
//UpdateProgressBar();
});
});
await Task.Run(() =>
{
Parallel.ForEach(strFiles, async inputFile =>
{
outputFile = Path.Combine(tempJpg150Dir, Path.GetFileNameWithoutExtension(inputFile) + ".jpg");
convertToJpeg(inputFile, outputFile, 85, 150);
//Interlocked.Increment(ref toolStripProgressBar1.Value);
//UpdateProgressBar();
});
});
await Task.WhenAll(tasks);
}
finally
{
semaphore.Release(); // Release semaphore
}
}
private void UpdateProgressBar()
{
if (toolStripProgressBar1.Control.InvokeRequired)
{
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
{
toolStripProgressBar1.Value++;
});
}
else
{
toolStripProgressBar1.Value++;
}
}
private async void btnConvertToPDF_Click(object sender, EventArgs e)
{
try
@ -754,99 +827,42 @@ namespace PDFWorkflowManager
);
}
// TODO - check for magazine - finish this function
sourceDir = prepCopyToTempOutdir(sourceFileCount, strFiles);
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
string jpg150dir = "";
if (chkMagazines.Checked == true)
{
sourceDir = prepConvertToTempOutdir(sourceFileCount, strFiles, sourceDir);
await prepConvertToTempOutdir(strFiles);
sourceDir = Path.Combine(tempDir, "jpg300");
jpg150dir = Path.Combine(tempDir, "jpg150");
// convert to jpg function
strExtension = "jpg";
}
else
{
sourceDir = prepCopyToTempOutdir(sourceFileCount, strFiles, sourceDir);
strExtension = "tif";
}
strFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
await convertToPdf(strFiles, outputDir, selectedLanguage);
ProcessStartInfo startInfo = new ProcessStartInfo();
startInfo.UseShellExecute = false;
startInfo.RedirectStandardOutput = true;
startInfo.CreateNoWindow = true;
// TODO make this configurable
startInfo.FileName = Properties.Settings.Default.TesserAct;
string outputFile = "";
toolStripProgressBar1.Maximum = sourceFileCount;
toolStripProgressBar1.Value = 0;
toolStripStatusLabel1.Text = "Converting files to pdf.";
await Task.Run(() =>
try
{
Parallel.ForEach(strFiles, inputFile =>
string[] strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
if (strFiles.Count() == strFiles150.Count())
{
outputFile = Path.GetFileNameWithoutExtension(inputFile);
startInfo.Arguments = "\"" + inputFile + "\"" + " " + "\"" + Path.Combine(outputDir, outputFile) + "\"" + " -l " + selectedLanguage + " pdf txt";
using (var process = Process.Start(startInfo))
{
process.WaitForExit();
}
if (toolStripProgressBar1.Control.InvokeRequired)
{
toolStripProgressBar1.Control.Invoke((MethodInvoker)delegate
{
toolStripProgressBar1.Value++;
});
}
else
{
toolStripProgressBar1.Value++;
}
});
});
strFiles150 = Directory.GetFiles(jpg150dir, "*." + strExtension);
await convertToPdf(strFiles150, jpg150dir, selectedLanguage);
}
}
catch { }
Thread.Sleep(1000);
string[] inFiles = Directory.GetFiles(sourceDir, "*." + strExtension);
string[] outFiles = Directory.GetFiles(outputDir, "*.pdf");
txtPageCount.Text = inFiles.Length.ToString();
btnMakePDF.Enabled = true;
// Parallel doesn't always return full result, we pick up the missing and process them again single threaded
if (Directory.GetFiles(sourceDir, "*." + strExtension).Length != outFiles.Length)
{
string[] arrayInFiles = new string[inFiles.Length];
string[] arrayOutFiles = new string[outFiles.Length];
for (int i = 0; i < inFiles.Length; i++)
{
arrayInFiles[i] = Path.GetFileNameWithoutExtension(inFiles[i]);
}
for (int i = 0; i < outFiles.Length; i++)
{
arrayOutFiles[i] = Path.GetFileNameWithoutExtension(outFiles[i]);
}
string[] difference = arrayInFiles.Except(arrayOutFiles).ToArray();
foreach (string item in difference)
{
startInfo.Arguments = "\"" + Path.Combine(sourceDir, item + "." + strExtension) + "\"" + " " + "\"" + Path.Combine(outputDir, item) + "\"" + " -l " + selectedLanguage + " pdf txt";
using (var process = Process.Start(startInfo))
{
process.WaitForExit();
}
}
if (Directory.GetFiles(sourceDir, "*." + strExtension).Length != Directory.GetFiles(outputDir, "*.pdf").Length)
{
MessageBox.Show("Not all files were converted to PDF");
}
}
btnMakePDF.Enabled = true;
toolStripStatusLabel1.Text = "Converting files to pdf. Done!";
@ -884,6 +900,7 @@ namespace PDFWorkflowManager
toolStripProgressBar1.Value = 0;
string outputDir = Path.Combine(tempDir, "output");
string tempFile = Path.Combine(tempDir, "_" + ".pdf");
string tempFile150 = Path.Combine(tempDir, "_150" + ".pdf");
// PDF 1.5 -> 1.7 + Metadata (gswin64c)
if (!Directory.Exists(tempDir))
@ -918,16 +935,29 @@ namespace PDFWorkflowManager
process.WaitForExit();
}
if (Directory.Exists(Path.Combine(tempDir, "jpg150")))
{
string bannera5 = Path.Combine(strExeFilePath, bannerPageA5);
startInfo.FileName = Properties.Settings.Default.PDFtk;
startInfo.Arguments = "\"" + Path.Combine(Path.Combine(tempDir, "jpg150", "*.pdf")) + "\" \"" + bannera5 + "\" cat output " + "\"" + tempFile150 + "\"";
using (var process = Process.Start(startInfo))
{
process.WaitForExit();
}
}
// Create Thumbnail
string[] outFiles = Directory.GetFiles(Path.Combine(tempDir, "sort"), "*." + strExtension);
string[] outFiles = Directory.GetFiles(Path.Combine(tempDir, "sort"), "*.tif");
createThumbNail(outFiles[0]);
textToOcrFile(outputDir);
generatePDFMetadata();
updatePDFMetaData(tempFile);
if (File.Exists(tempFile150))
{
updatePDFMetaData(tempFile150, "150");
}
toolStripStatusLabel1.Text = "Creating final pdf. Done.";
toolStripProgressBar1.Value = 1;
@ -940,8 +970,6 @@ namespace PDFWorkflowManager
File.WriteAllText(Path.Combine(txtProjectDir.Text, txtFileName.Text + ".pdf.md5"), md5string);
File.WriteAllText(Path.Combine(txtProjectDir.Text, txtFileName.Text + ".pdf.sha1"), sha1string);
}
catch
{
@ -951,14 +979,20 @@ namespace PDFWorkflowManager
File.Delete(Path.Combine(tempDir, "pdfmetadata.txt"));
}
private void updatePDFMetaData(string tempFile)
private void updatePDFMetaData(string tempFile, string modifier = "")
{
string filename = txtFileName.Text;
if (modifier != "")
{
filename = txtFileName.Text.Replace(cmbResolution.Text, modifier);
}
ProcessStartInfo updatePDF = new ProcessStartInfo();
updatePDF.UseShellExecute = false;
updatePDF.RedirectStandardOutput = true;
updatePDF.CreateNoWindow = true;
updatePDF.FileName = Properties.Settings.Default.GhostScript;
updatePDF.Arguments = "-dBATCH -dNOPAUSE -dAutoRotatePages=/None -sDEVICE=pdfwrite -sOutputFile=\"" + Path.Combine(txtProjectDir.Text, txtFileName.Text + ".pdf") + "\" \"" + tempFile + "\" \"" + Path.Combine(tempDir, "pdfmetadata.txt") + "\"";
updatePDF.Arguments = "-dBATCH -dNOPAUSE -dAutoRotatePages=/None -sDEVICE=pdfwrite -sOutputFile=\"" + Path.Combine(txtProjectDir.Text, filename + ".pdf") + "\" \"" + tempFile + "\" \"" + Path.Combine(tempDir, "pdfmetadata.txt") + "\"";
//updatePDF.Arguments = "-dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile=\"" + Path.Combine(txtProjectDir.Text, txtFileName.Text + ".pdf") + "\" \"" + tempFile + "\" -c\" " + generatePDFMetadata() + "\"";
using (var process = Process.Start(updatePDF))
{
@ -1119,7 +1153,7 @@ namespace PDFWorkflowManager
private void radioBannerA4_CheckedChanged(object sender, EventArgs e)
{
bannerPage = " \"" + Path.Combine(strExeFilePath, "banner_a4.pdf") + "\" ";
bannerPage = " \"" + Path.Combine(strExeFilePath, "banner_a4.pdf") + "\" ";
}
private void radioBannerA5_CheckedChanged(object sender, EventArgs e)
@ -1134,7 +1168,7 @@ namespace PDFWorkflowManager
private void radioSortNormal_CheckedChanged(object sender, EventArgs e)
{
sortNormal = true;
//sortNormal = true;
string[] strFiles = Directory.GetFiles(workOutDir, "*.tif");
if (File.Exists(strFiles[0]))
@ -1145,7 +1179,7 @@ namespace PDFWorkflowManager
private void radioSortMagazine_CheckedChanged(object sender, EventArgs e)
{
sortNormal = false;
//sortNormal = false;
string[] strFiles = Directory.GetFiles(workOutDir, "*.tif");
@ -1157,7 +1191,7 @@ namespace PDFWorkflowManager
private void radioSortFlatBed_CheckedChanged(object sender, EventArgs e)
{
sortNormal = false;
//sortNormal = false;
string[] strFiles = Directory.GetFiles(workOutDir, "*.tif");
@ -1260,7 +1294,7 @@ namespace PDFWorkflowManager
private void aboutToolStripMenuItem_Click(object sender, EventArgs e)
{
MessageBox.Show("PDF Workflow Manager v0.9.1 \r\n\r\nCopyright (c) 2023-2024 https://retro-commodore.eu", "Version", MessageBoxButtons.OK, MessageBoxIcon.Information);
MessageBox.Show("PDF Workflow Manager v0.9.2 \r\n\r\nCopyright (c) 2023-2024 https://retro-commodore.eu", "Version", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
private void button1_Click(object sender, EventArgs e)