update
This commit is contained in:
BIN
.vs/slnx.sqlite
Normal file
BIN
.vs/slnx.sqlite
Normal file
Binary file not shown.
10
.vs/tasks.vs.json
Normal file
10
.vs/tasks.vs.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"version": "0.2.1",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"taskLabel": "task-sRGB",
|
||||||
|
"appliesTo": "PDFWorkflowManager/PDFWorkflowManager/sRGB.icc",
|
||||||
|
"type": "launch"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
18
PDFWorkflowManager/PDFWorkflowManager/SystemFontResolver.cs
Normal file
18
PDFWorkflowManager/PDFWorkflowManager/SystemFontResolver.cs
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
using PdfSharp.Fonts;
|
||||||
|
using System.IO;
|
||||||
|
|
||||||
|
public class SystemFontResolver : IFontResolver
|
||||||
|
{
|
||||||
|
public FontResolverInfo ResolveTypeface(string familyName, bool isBold, bool isItalic)
|
||||||
|
{
|
||||||
|
// Map any requested font to Arial
|
||||||
|
return new FontResolverInfo("Arial#");
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] GetFont(string faceName)
|
||||||
|
{
|
||||||
|
// Use system fonts directory
|
||||||
|
string fontPath = Path.Combine(System.Environment.GetFolderPath(System.Environment.SpecialFolder.Fonts), "arial.ttf");
|
||||||
|
return File.ReadAllBytes(fontPath);
|
||||||
|
}
|
||||||
|
}
|
@@ -1,146 +1,173 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Drawing;
|
using System.Drawing;
|
||||||
using BitMiracle.LibTiff.Classic;
|
using System.Threading.Tasks;
|
||||||
|
using System.Windows.Forms;
|
||||||
using PdfSharp.Pdf;
|
using PdfSharp.Pdf;
|
||||||
using PdfSharp.Drawing;
|
using PdfSharp.Drawing;
|
||||||
using Tesseract;
|
using Tesseract;
|
||||||
using System.Windows.Forms;
|
using ImageMagick;
|
||||||
using System.Linq;
|
|
||||||
|
|
||||||
namespace RCEU_PDFWorkflowManager
|
namespace RCEU_PDFWorkflowManager
|
||||||
{
|
{
|
||||||
public class TiffToPdfConverter
|
public class TiffToPdfConverter
|
||||||
{
|
{
|
||||||
public void ConvertTiffToPdfAWithOcr(
|
/// <summary>
|
||||||
|
/// Maximum number of threads to use for page processing
|
||||||
|
/// </summary>
|
||||||
|
public int MaxDegreeOfParallelism { get; set; } = Environment.ProcessorCount;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Converts all TIFF files in a directory to a searchable PDF.
|
||||||
|
/// </summary>
|
||||||
|
public async Task ConvertTiffToPdfWithOcrAsync(
|
||||||
string workOutDir,
|
string workOutDir,
|
||||||
string outputPdfPath,
|
string outputPdfFileName, // e.g., "output.pdf"
|
||||||
string ocrLanguage,
|
string selectedLanguage,
|
||||||
ToolStripProgressBar progressBar,
|
ToolStripProgressBar progressBar,
|
||||||
ToolStripStatusLabel statusLabel)
|
ToolStripStatusLabel statusLabel,
|
||||||
|
Form mainForm,
|
||||||
|
string bannerPdfPath = null) // optional banner PDF
|
||||||
{
|
{
|
||||||
string[] tiffFiles = Directory.GetFiles(workOutDir, "*.tif");
|
string[] tiffFiles = Directory.GetFiles(workOutDir, "*.tif");
|
||||||
|
|
||||||
|
if (tiffFiles.Length == 0)
|
||||||
|
throw new FileNotFoundException("No TIFF files found in the directory.");
|
||||||
|
|
||||||
// Count total pages for progress bar
|
// Count total pages for progress bar
|
||||||
int totalPages = tiffFiles.Sum(file => Tiff.Open(file, "r")?.NumberOfDirectories() ?? 0);
|
int totalPages = 0;
|
||||||
progressBar.Maximum = totalPages;
|
foreach (var file in tiffFiles)
|
||||||
progressBar.Value = 0;
|
using (var collection = new MagickImageCollection(file))
|
||||||
statusLabel.Text = "Converting TIFFs to PDF...";
|
totalPages += collection.Count;
|
||||||
|
|
||||||
PdfDocument pdf = new PdfDocument();
|
PdfDocument pdf = new PdfDocument();
|
||||||
pdf.Info.Title = "Converted TIFF to PDF/A";
|
pdf.Info.Title = "Converted TIFF to PDF/A";
|
||||||
pdf.Info.Creator = "RCEU_PDFWorkflowManager";
|
pdf.Info.Creator = "RCEU_PDFWorkflowManager";
|
||||||
|
|
||||||
foreach (var tiffFile in tiffFiles)
|
mainForm.Invoke((MethodInvoker)(() =>
|
||||||
{
|
{
|
||||||
using (Tiff image = Tiff.Open(tiffFile, "r"))
|
progressBar.Maximum = totalPages;
|
||||||
|
progressBar.Value = 0;
|
||||||
|
statusLabel.Text = "Converting TIFFs to PDF...";
|
||||||
|
}));
|
||||||
|
|
||||||
|
bool success = true;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await Task.Run(() =>
|
||||||
{
|
{
|
||||||
if (image == null) continue;
|
foreach (var tiffFile in tiffFiles)
|
||||||
|
|
||||||
int pageCount = image.NumberOfDirectories();
|
|
||||||
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
|
|
||||||
{
|
{
|
||||||
image.SetDirectory((short)pageIndex);
|
using (var collection = new MagickImageCollection(tiffFile))
|
||||||
|
|
||||||
int width = image.GetField(TiffTag.IMAGEWIDTH)[0].ToInt();
|
|
||||||
int height = image.GetField(TiffTag.IMAGELENGTH)[0].ToInt();
|
|
||||||
|
|
||||||
int[] raster = new int[width * height];
|
|
||||||
image.ReadRGBAImage(width, height, raster);
|
|
||||||
|
|
||||||
using (var bmp = new Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
|
|
||||||
{
|
{
|
||||||
var bmpData = bmp.LockBits(
|
Parallel.ForEach(collection, new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }, magickImage =>
|
||||||
new Rectangle(0, 0, width, height),
|
|
||||||
System.Drawing.Imaging.ImageLockMode.WriteOnly,
|
|
||||||
bmp.PixelFormat);
|
|
||||||
|
|
||||||
System.Runtime.InteropServices.Marshal.Copy(raster, 0, bmpData.Scan0, raster.Length);
|
|
||||||
bmp.UnlockBits(bmpData);
|
|
||||||
|
|
||||||
PdfPage pagePdf = pdf.AddPage();
|
|
||||||
pagePdf.Width = XUnit.FromPoint(width);
|
|
||||||
pagePdf.Height = XUnit.FromPoint(height);
|
|
||||||
|
|
||||||
using (XGraphics gfx = XGraphics.FromPdfPage(pagePdf))
|
|
||||||
{
|
{
|
||||||
// Save temp PNG
|
using (Bitmap bmp = magickImage.ToBitmap())
|
||||||
string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".png");
|
|
||||||
bmp.Save(tempPath, System.Drawing.Imaging.ImageFormat.Png);
|
|
||||||
|
|
||||||
using (XImage ximg = XImage.FromFile(tempPath))
|
|
||||||
{
|
{
|
||||||
gfx.DrawImage(ximg, 0, 0, width, height);
|
string extractedText = "";
|
||||||
|
try
|
||||||
|
{
|
||||||
|
extractedText = PerformOcr(bmp, selectedLanguage);
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
// If OCR fails, continue but mark success false
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
lock (pdf)
|
||||||
|
{
|
||||||
|
PdfPage page = pdf.AddPage();
|
||||||
|
page.Width = XUnit.FromPoint(bmp.Width).Point;
|
||||||
|
page.Height = XUnit.FromPoint(bmp.Height).Point;
|
||||||
|
|
||||||
|
using (var gfx = XGraphics.FromPdfPage(page))
|
||||||
|
using (var ms = new MemoryStream())
|
||||||
|
{
|
||||||
|
bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Png);
|
||||||
|
ms.Position = 0;
|
||||||
|
using (var ximg = XImage.FromStream(ms))
|
||||||
|
gfx.DrawImage(ximg,
|
||||||
|
XUnit.FromPoint(0),
|
||||||
|
XUnit.FromPoint(0),
|
||||||
|
XUnit.FromPoint(bmp.Width),
|
||||||
|
XUnit.FromPoint(bmp.Height));
|
||||||
|
}
|
||||||
|
|
||||||
|
OverlayTextOntoPdfPage(page, extractedText);
|
||||||
|
}
|
||||||
|
|
||||||
|
mainForm.Invoke((MethodInvoker)(() =>
|
||||||
|
{
|
||||||
|
progressBar.Value++;
|
||||||
|
statusLabel.Text = $"Processing page {progressBar.Value} of {progressBar.Maximum}";
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
});
|
||||||
File.Delete(tempPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
// OCR and overlay text
|
|
||||||
string extractedText = PerformOcr(tiffFile, pageIndex, ocrLanguage);
|
|
||||||
OverlayTextOntoPdfPage(pdf, pagePdf, extractedText);
|
|
||||||
|
|
||||||
// Update progress bar safely
|
|
||||||
if (progressBar.InvokeRequired)
|
|
||||||
{
|
|
||||||
progressBar.Invoke((MethodInvoker)delegate
|
|
||||||
{
|
|
||||||
progressBar.Value++;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
progressBar.Value++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf.Save(outputPdfPath);
|
// Add banner page if selected
|
||||||
statusLabel.Text = "Conversion complete!";
|
if (!string.IsNullOrEmpty(bannerPdfPath) && File.Exists(bannerPdfPath))
|
||||||
}
|
|
||||||
|
|
||||||
// OCR method
|
|
||||||
private string PerformOcr(string tiffFile, int pageIndex, string language)
|
|
||||||
{
|
|
||||||
using (Tiff tiff = Tiff.Open(tiffFile, "r"))
|
|
||||||
{
|
|
||||||
tiff.SetDirectory((short)pageIndex);
|
|
||||||
int width = tiff.GetField(TiffTag.IMAGEWIDTH)[0].ToInt();
|
|
||||||
int height = tiff.GetField(TiffTag.IMAGELENGTH)[0].ToInt();
|
|
||||||
|
|
||||||
int[] raster = new int[width * height];
|
|
||||||
tiff.ReadRGBAImage(width, height, raster);
|
|
||||||
|
|
||||||
using (Bitmap bmp = new Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
|
|
||||||
{
|
|
||||||
for (int y = 0; y < height; y++)
|
|
||||||
for (int x = 0; x < width; x++)
|
|
||||||
{
|
|
||||||
int rgba = raster[y * width + x];
|
|
||||||
int r = rgba & 0xFF;
|
|
||||||
int g = (rgba >> 8) & 0xFF;
|
|
||||||
int b = (rgba >> 16) & 0xFF;
|
|
||||||
bmp.SetPixel(x, height - y - 1, Color.FromArgb(255, r, g, b));
|
|
||||||
}
|
|
||||||
|
|
||||||
using (var engine = new TesseractEngine(@"./tessdata", language, EngineMode.Default))
|
|
||||||
using (var page = engine.Process(bmp))
|
|
||||||
{
|
{
|
||||||
return page.GetText();
|
lock (pdf)
|
||||||
|
{
|
||||||
|
using (PdfDocument bannerDoc = PdfSharp.Pdf.IO.PdfReader.Open(bannerPdfPath, PdfSharp.Pdf.IO.PdfDocumentOpenMode.Import))
|
||||||
|
{
|
||||||
|
foreach (PdfPage bannerPage in bannerDoc.Pages)
|
||||||
|
{
|
||||||
|
pdf.AddPage(bannerPage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Only save if all pages processed successfully
|
||||||
|
if (success)
|
||||||
|
{
|
||||||
|
string outputPdfPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, outputPdfFileName);
|
||||||
|
pdf.Save(outputPdfPath);
|
||||||
|
mainForm.Invoke((MethodInvoker)(() => statusLabel.Text = $"PDF saved: {outputPdfPath}"));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mainForm.Invoke((MethodInvoker)(() => statusLabel.Text = "Conversion incomplete, PDF not saved."));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
mainForm.Invoke((MethodInvoker)(() => statusLabel.Text = $"Error during conversion: {ex.Message}"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Overlay OCR text (invisible but searchable)
|
private string PerformOcr(Bitmap bmp, string language)
|
||||||
private void OverlayTextOntoPdfPage(PdfDocument pdf, PdfPage page, string text)
|
|
||||||
{
|
{
|
||||||
using (XGraphics gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Prepend))
|
string tessDataPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "tessdata");
|
||||||
|
|
||||||
|
if (!Directory.Exists(tessDataPath))
|
||||||
|
throw new DirectoryNotFoundException($"Tessdata folder not found: {tessDataPath}");
|
||||||
|
|
||||||
|
using (var engine = new TesseractEngine(tessDataPath, language, EngineMode.Default))
|
||||||
|
using (var page = engine.Process(bmp))
|
||||||
{
|
{
|
||||||
XFont font = new XFont("Arial", 10); // regular font
|
return page.GetText();
|
||||||
gfx.DrawString(text, font, XBrushes.Transparent, new XRect(0, 0, page.Width, page.Height));
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OverlayTextOntoPdfPage(PdfPage page, string text)
|
||||||
|
{
|
||||||
|
using (var gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Prepend))
|
||||||
|
{
|
||||||
|
XFont font = new XFont("Arial", 10);
|
||||||
|
gfx.DrawString(text, font, XBrushes.Transparent,
|
||||||
|
new XRect(
|
||||||
|
XUnit.FromPoint(0),
|
||||||
|
XUnit.FromPoint(0),
|
||||||
|
XUnit.FromPoint(page.Width),
|
||||||
|
XUnit.FromPoint(page.Height)),
|
||||||
|
XStringFormats.TopLeft);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ScrollView.jar
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ScrollView.jar
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/afr.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/afr.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/amh.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/amh.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ara.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ara.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/asm.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/asm.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/aze.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/aze.traineddata
Normal file
Binary file not shown.
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bel.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bel.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ben.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ben.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bod.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bod.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bos.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bos.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bre.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bre.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bul.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/bul.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/cat.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/cat.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ceb.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ceb.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ces.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ces.traineddata
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/chr.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/chr.traineddata
Normal file
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
tessedit_create_alto 1
|
@@ -0,0 +1,7 @@
|
|||||||
|
tessedit_ambigs_training 1
|
||||||
|
load_freq_dawg 0
|
||||||
|
load_punc_dawg 0
|
||||||
|
load_system_dawg 0
|
||||||
|
load_number_dawg 0
|
||||||
|
ambigs_debug_level 3
|
||||||
|
load_fixed_length_dawgs 0
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_zero_rejection T
|
@@ -0,0 +1,5 @@
|
|||||||
|
load_bigram_dawg True
|
||||||
|
tessedit_enable_bigram_correction True
|
||||||
|
tessedit_bigram_debug 3
|
||||||
|
save_raw_choices True
|
||||||
|
save_alt_choices True
|
@@ -0,0 +1,12 @@
|
|||||||
|
disable_character_fragments T
|
||||||
|
file_type .bl
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_resegment_from_boxes T
|
||||||
|
tessedit_train_from_boxes T
|
||||||
|
textord_no_rejects T
|
@@ -0,0 +1,13 @@
|
|||||||
|
file_type .bl
|
||||||
|
#tessedit_use_nn F
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_resegment_from_boxes T
|
||||||
|
tessedit_train_from_boxes T
|
||||||
|
#textord_repeat_extraction F
|
||||||
|
textord_no_rejects T
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_char_whitelist 0123456789-.
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_write_images T
|
@@ -0,0 +1,2 @@
|
|||||||
|
tessedit_create_hocr 1
|
||||||
|
hocr_font_info 0
|
@@ -0,0 +1,2 @@
|
|||||||
|
interactive_display_mode T
|
||||||
|
tessedit_display_outwords T
|
@@ -0,0 +1,4 @@
|
|||||||
|
textord_skewsmooth_offset 8
|
||||||
|
textord_skewsmooth_offset2 8
|
||||||
|
textord_merge_desc 0.5
|
||||||
|
textord_no_rejects 1
|
@@ -0,0 +1,2 @@
|
|||||||
|
tessedit_resegment_from_line_boxes 1
|
||||||
|
tessedit_make_boxes_from_boxes 1
|
@@ -0,0 +1 @@
|
|||||||
|
debug_file tesseract.log
|
@@ -0,0 +1,11 @@
|
|||||||
|
file_type .bl
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_train_line_recognizer T
|
||||||
|
textord_no_rejects T
|
||||||
|
tessedit_init_config_only T
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_create_lstmbox 1
|
@@ -0,0 +1,4 @@
|
|||||||
|
stopper_debug_level 1
|
||||||
|
classify_debug_level 1
|
||||||
|
segsearch_debug_level 1
|
||||||
|
language_model_debug_level 3
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_create_boxfile 1
|
@@ -0,0 +1,3 @@
|
|||||||
|
tessedit_create_page_xml 1
|
||||||
|
# page_xml_polygon 1
|
||||||
|
# page_xml_level 0
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_create_pdf 1
|
@@ -0,0 +1 @@
|
|||||||
|
debug_file /dev/null
|
@@ -0,0 +1,2 @@
|
|||||||
|
tessedit_resegment_from_boxes 1
|
||||||
|
tessedit_make_boxes_from_boxes 1
|
@@ -0,0 +1,12 @@
|
|||||||
|
textord_show_blobs 0
|
||||||
|
textord_debug_tabfind 3
|
||||||
|
textord_tabfind_show_partitions 1
|
||||||
|
textord_tabfind_show_initial_partitions 1
|
||||||
|
textord_tabfind_show_columns 1
|
||||||
|
textord_tabfind_show_blocks 1
|
||||||
|
textord_tabfind_show_initialtabs 1
|
||||||
|
textord_tabfind_show_finaltabs 1
|
||||||
|
textord_tabfind_show_strokewidths 1
|
||||||
|
textord_tabfind_show_vlines 0
|
||||||
|
textord_tabfind_show_images 1
|
||||||
|
tessedit_dump_pageseg_images 0
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_create_tsv 1
|
@@ -0,0 +1,3 @@
|
|||||||
|
# This config file should be used with other config files which create renderers.
|
||||||
|
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
|
||||||
|
tessedit_create_txt 1
|
@@ -0,0 +1,2 @@
|
|||||||
|
tessedit_write_unlv 1
|
||||||
|
unlv_tilde_crunching T
|
@@ -0,0 +1 @@
|
|||||||
|
tessedit_create_wordstrbox 1
|
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/cos.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/cos.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/cym.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/cym.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/dan.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/dan.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/deu.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/deu.traineddata
Normal file
Binary file not shown.
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/div.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/div.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/dzo.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/dzo.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ell.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ell.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/eng.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/eng.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/enm.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/enm.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/epo.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/epo.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/equ.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/equ.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/est.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/est.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/eus.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/eus.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fao.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fao.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fas.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fas.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fil.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fil.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fin.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fin.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fra.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fra.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/frm.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/frm.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fry.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/fry.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/gla.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/gla.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/gle.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/gle.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/glg.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/glg.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/grc.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/grc.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/guj.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/guj.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hat.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hat.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/heb.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/heb.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hin.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hin.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hrv.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hrv.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hun.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hun.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hye.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/hye.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/iku.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/iku.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ind.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ind.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/isl.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/isl.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ita.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/ita.traineddata
Normal file
Binary file not shown.
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/jav.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/jav.traineddata
Normal file
Binary file not shown.
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn.traineddata
Normal file
Binary file not shown.
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kan.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kan.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kat.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kat.traineddata
Normal file
Binary file not shown.
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kaz.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kaz.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/khm.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/khm.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kir.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kir.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kmr.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kmr.traineddata
Normal file
Binary file not shown.
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kor.traineddata
Normal file
BIN
PDFWorkflowManager/PDFWorkflowManager/tessdata/kor.traineddata
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user