diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite new file mode 100644 index 0000000..f788c57 Binary files /dev/null and b/.vs/slnx.sqlite differ diff --git a/.vs/tasks.vs.json b/.vs/tasks.vs.json new file mode 100644 index 0000000..0aa845f --- /dev/null +++ b/.vs/tasks.vs.json @@ -0,0 +1,10 @@ +{ + "version": "0.2.1", + "tasks": [ + { + "taskLabel": "task-sRGB", + "appliesTo": "PDFWorkflowManager/PDFWorkflowManager/sRGB.icc", + "type": "launch" + } + ] +} \ No newline at end of file diff --git a/PDFWorkflowManager/PDFWorkflowManager/SystemFontResolver.cs b/PDFWorkflowManager/PDFWorkflowManager/SystemFontResolver.cs new file mode 100644 index 0000000..715960f --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/SystemFontResolver.cs @@ -0,0 +1,18 @@ +using PdfSharp.Fonts; +using System.IO; + +public class SystemFontResolver : IFontResolver +{ + public FontResolverInfo ResolveTypeface(string familyName, bool isBold, bool isItalic) + { + // Map any requested font to Arial + return new FontResolverInfo("Arial#"); + } + + public byte[] GetFont(string faceName) + { + // Use system fonts directory + string fontPath = Path.Combine(System.Environment.GetFolderPath(System.Environment.SpecialFolder.Fonts), "arial.ttf"); + return File.ReadAllBytes(fontPath); + } +} diff --git a/PDFWorkflowManager/PDFWorkflowManager/TiffToPdfConverter.cs b/PDFWorkflowManager/PDFWorkflowManager/TiffToPdfConverter.cs index d2d0c45..228e166 100644 --- a/PDFWorkflowManager/PDFWorkflowManager/TiffToPdfConverter.cs +++ b/PDFWorkflowManager/PDFWorkflowManager/TiffToPdfConverter.cs @@ -1,146 +1,173 @@ using System; using System.IO; using System.Drawing; -using BitMiracle.LibTiff.Classic; +using System.Threading.Tasks; +using System.Windows.Forms; using PdfSharp.Pdf; using PdfSharp.Drawing; using Tesseract; -using System.Windows.Forms; -using System.Linq; +using ImageMagick; namespace RCEU_PDFWorkflowManager { public class TiffToPdfConverter { - public void ConvertTiffToPdfAWithOcr( + /// + /// Maximum number of threads to use for page processing + /// + public int MaxDegreeOfParallelism { get; set; } = Environment.ProcessorCount; + + /// + /// Converts all TIFF files in a directory to a searchable PDF. + /// + public async Task ConvertTiffToPdfWithOcrAsync( string workOutDir, - string outputPdfPath, - string ocrLanguage, + string outputPdfFileName, // e.g., "output.pdf" + string selectedLanguage, ToolStripProgressBar progressBar, - ToolStripStatusLabel statusLabel) + ToolStripStatusLabel statusLabel, + Form mainForm, + string bannerPdfPath = null) // optional banner PDF { string[] tiffFiles = Directory.GetFiles(workOutDir, "*.tif"); + if (tiffFiles.Length == 0) + throw new FileNotFoundException("No TIFF files found in the directory."); + // Count total pages for progress bar - int totalPages = tiffFiles.Sum(file => Tiff.Open(file, "r")?.NumberOfDirectories() ?? 0); - progressBar.Maximum = totalPages; - progressBar.Value = 0; - statusLabel.Text = "Converting TIFFs to PDF..."; + int totalPages = 0; + foreach (var file in tiffFiles) + using (var collection = new MagickImageCollection(file)) + totalPages += collection.Count; PdfDocument pdf = new PdfDocument(); pdf.Info.Title = "Converted TIFF to PDF/A"; pdf.Info.Creator = "RCEU_PDFWorkflowManager"; - foreach (var tiffFile in tiffFiles) + mainForm.Invoke((MethodInvoker)(() => { - using (Tiff image = Tiff.Open(tiffFile, "r")) + progressBar.Maximum = totalPages; + progressBar.Value = 0; + statusLabel.Text = "Converting TIFFs to PDF..."; + })); + + bool success = true; + + try + { + await Task.Run(() => { - if (image == null) continue; - - int pageCount = image.NumberOfDirectories(); - for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) + foreach (var tiffFile in tiffFiles) { - image.SetDirectory((short)pageIndex); - - int width = image.GetField(TiffTag.IMAGEWIDTH)[0].ToInt(); - int height = image.GetField(TiffTag.IMAGELENGTH)[0].ToInt(); - - int[] raster = new int[width * height]; - image.ReadRGBAImage(width, height, raster); - - using (var bmp = new Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb)) + using (var collection = new MagickImageCollection(tiffFile)) { - var bmpData = bmp.LockBits( - new Rectangle(0, 0, width, height), - System.Drawing.Imaging.ImageLockMode.WriteOnly, - bmp.PixelFormat); - - System.Runtime.InteropServices.Marshal.Copy(raster, 0, bmpData.Scan0, raster.Length); - bmp.UnlockBits(bmpData); - - PdfPage pagePdf = pdf.AddPage(); - pagePdf.Width = XUnit.FromPoint(width); - pagePdf.Height = XUnit.FromPoint(height); - - using (XGraphics gfx = XGraphics.FromPdfPage(pagePdf)) + Parallel.ForEach(collection, new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }, magickImage => { - // Save temp PNG - string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".png"); - bmp.Save(tempPath, System.Drawing.Imaging.ImageFormat.Png); - - using (XImage ximg = XImage.FromFile(tempPath)) + using (Bitmap bmp = magickImage.ToBitmap()) { - gfx.DrawImage(ximg, 0, 0, width, height); + string extractedText = ""; + try + { + extractedText = PerformOcr(bmp, selectedLanguage); + } + catch + { + // If OCR fails, continue but mark success false + success = false; + } + + lock (pdf) + { + PdfPage page = pdf.AddPage(); + page.Width = XUnit.FromPoint(bmp.Width).Point; + page.Height = XUnit.FromPoint(bmp.Height).Point; + + using (var gfx = XGraphics.FromPdfPage(page)) + using (var ms = new MemoryStream()) + { + bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Png); + ms.Position = 0; + using (var ximg = XImage.FromStream(ms)) + gfx.DrawImage(ximg, + XUnit.FromPoint(0), + XUnit.FromPoint(0), + XUnit.FromPoint(bmp.Width), + XUnit.FromPoint(bmp.Height)); + } + + OverlayTextOntoPdfPage(page, extractedText); + } + + mainForm.Invoke((MethodInvoker)(() => + { + progressBar.Value++; + statusLabel.Text = $"Processing page {progressBar.Value} of {progressBar.Maximum}"; + })); } - - File.Delete(tempPath); - } - - // OCR and overlay text - string extractedText = PerformOcr(tiffFile, pageIndex, ocrLanguage); - OverlayTextOntoPdfPage(pdf, pagePdf, extractedText); - - // Update progress bar safely - if (progressBar.InvokeRequired) - { - progressBar.Invoke((MethodInvoker)delegate - { - progressBar.Value++; - }); - } - else - { - progressBar.Value++; - } + }); } } - } - } - pdf.Save(outputPdfPath); - statusLabel.Text = "Conversion complete!"; - } - - // OCR method - private string PerformOcr(string tiffFile, int pageIndex, string language) - { - using (Tiff tiff = Tiff.Open(tiffFile, "r")) - { - tiff.SetDirectory((short)pageIndex); - int width = tiff.GetField(TiffTag.IMAGEWIDTH)[0].ToInt(); - int height = tiff.GetField(TiffTag.IMAGELENGTH)[0].ToInt(); - - int[] raster = new int[width * height]; - tiff.ReadRGBAImage(width, height, raster); - - using (Bitmap bmp = new Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format32bppArgb)) - { - for (int y = 0; y < height; y++) - for (int x = 0; x < width; x++) - { - int rgba = raster[y * width + x]; - int r = rgba & 0xFF; - int g = (rgba >> 8) & 0xFF; - int b = (rgba >> 16) & 0xFF; - bmp.SetPixel(x, height - y - 1, Color.FromArgb(255, r, g, b)); - } - - using (var engine = new TesseractEngine(@"./tessdata", language, EngineMode.Default)) - using (var page = engine.Process(bmp)) + // Add banner page if selected + if (!string.IsNullOrEmpty(bannerPdfPath) && File.Exists(bannerPdfPath)) { - return page.GetText(); + lock (pdf) + { + using (PdfDocument bannerDoc = PdfSharp.Pdf.IO.PdfReader.Open(bannerPdfPath, PdfSharp.Pdf.IO.PdfDocumentOpenMode.Import)) + { + foreach (PdfPage bannerPage in bannerDoc.Pages) + { + pdf.AddPage(bannerPage); + } + } + } } + }); + + // Only save if all pages processed successfully + if (success) + { + string outputPdfPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, outputPdfFileName); + pdf.Save(outputPdfPath); + mainForm.Invoke((MethodInvoker)(() => statusLabel.Text = $"PDF saved: {outputPdfPath}")); } + else + { + mainForm.Invoke((MethodInvoker)(() => statusLabel.Text = "Conversion incomplete, PDF not saved.")); + } + } + catch (Exception ex) + { + mainForm.Invoke((MethodInvoker)(() => statusLabel.Text = $"Error during conversion: {ex.Message}")); } } - // Overlay OCR text (invisible but searchable) - private void OverlayTextOntoPdfPage(PdfDocument pdf, PdfPage page, string text) + private string PerformOcr(Bitmap bmp, string language) { - using (XGraphics gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Prepend)) + string tessDataPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "tessdata"); + + if (!Directory.Exists(tessDataPath)) + throw new DirectoryNotFoundException($"Tessdata folder not found: {tessDataPath}"); + + using (var engine = new TesseractEngine(tessDataPath, language, EngineMode.Default)) + using (var page = engine.Process(bmp)) { - XFont font = new XFont("Arial", 10); // regular font - gfx.DrawString(text, font, XBrushes.Transparent, new XRect(0, 0, page.Width, page.Height)); + return page.GetText(); + } + } + + private void OverlayTextOntoPdfPage(PdfPage page, string text) + { + using (var gfx = XGraphics.FromPdfPage(page, XGraphicsPdfPageOptions.Prepend)) + { + XFont font = new XFont("Arial", 10); + gfx.DrawString(text, font, XBrushes.Transparent, + new XRect( + XUnit.FromPoint(0), + XUnit.FromPoint(0), + XUnit.FromPoint(page.Width), + XUnit.FromPoint(page.Height)), + XStringFormats.TopLeft); } } } diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ScrollView.jar b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ScrollView.jar new file mode 100644 index 0000000..f0fc519 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ScrollView.jar differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/afr.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/afr.traineddata new file mode 100644 index 0000000..09e69cf Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/afr.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/amh.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/amh.traineddata new file mode 100644 index 0000000..7a302b2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/amh.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ara.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ara.traineddata new file mode 100644 index 0000000..c8d129c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ara.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/asm.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/asm.traineddata new file mode 100644 index 0000000..086035c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/asm.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/aze.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/aze.traineddata new file mode 100644 index 0000000..27bc73e Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/aze.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/aze_cyrl.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/aze_cyrl.traineddata new file mode 100644 index 0000000..e2b1b95 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/aze_cyrl.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/bel.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bel.traineddata new file mode 100644 index 0000000..65d83da Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bel.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ben.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ben.traineddata new file mode 100644 index 0000000..7e9054d Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ben.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/bod.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bod.traineddata new file mode 100644 index 0000000..7bb5029 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bod.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/bos.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bos.traineddata new file mode 100644 index 0000000..95bed5c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bos.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/bre.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bre.traineddata new file mode 100644 index 0000000..c016913 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bre.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/bul.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bul.traineddata new file mode 100644 index 0000000..1e65588 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/bul.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/cat.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/cat.traineddata new file mode 100644 index 0000000..3fcd350 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/cat.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ceb.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ceb.traineddata new file mode 100644 index 0000000..5a1b969 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ceb.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ces.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ces.traineddata new file mode 100644 index 0000000..dd6fc79 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ces.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_sim.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_sim.traineddata new file mode 100644 index 0000000..388bac2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_sim.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_sim_vert.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_sim_vert.traineddata new file mode 100644 index 0000000..f9898e9 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_sim_vert.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_tra.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_tra.traineddata new file mode 100644 index 0000000..1955cd8 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_tra.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_tra_vert.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_tra_vert.traineddata new file mode 100644 index 0000000..94bc5db Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chi_tra_vert.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/chr.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chr.traineddata new file mode 100644 index 0000000..e0799a2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/chr.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/alto b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/alto new file mode 100644 index 0000000..0dd12a7 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/alto @@ -0,0 +1 @@ +tessedit_create_alto 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/ambigs.train b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/ambigs.train new file mode 100644 index 0000000..23035a1 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/ambigs.train @@ -0,0 +1,7 @@ +tessedit_ambigs_training 1 +load_freq_dawg 0 +load_punc_dawg 0 +load_system_dawg 0 +load_number_dawg 0 +ambigs_debug_level 3 +load_fixed_length_dawgs 0 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/api_config b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/api_config new file mode 100644 index 0000000..5cd6ec0 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/api_config @@ -0,0 +1 @@ +tessedit_zero_rejection T diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/bigram b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/bigram new file mode 100644 index 0000000..5d6c2d0 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/bigram @@ -0,0 +1,5 @@ +load_bigram_dawg True +tessedit_enable_bigram_correction True +tessedit_bigram_debug 3 +save_raw_choices True +save_alt_choices True diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/box.train b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/box.train new file mode 100644 index 0000000..d39f268 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/box.train @@ -0,0 +1,12 @@ +disable_character_fragments T +file_type .bl +textord_fast_pitch_test T +tessedit_zero_rejection T +tessedit_minimal_rejection F +tessedit_write_rep_codes F +edges_children_fix F +edges_childarea 0.65 +edges_boxarea 0.9 +tessedit_resegment_from_boxes T +tessedit_train_from_boxes T +textord_no_rejects T diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/box.train.stderr b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/box.train.stderr new file mode 100644 index 0000000..82754e9 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/box.train.stderr @@ -0,0 +1,13 @@ +file_type .bl +#tessedit_use_nn F +textord_fast_pitch_test T +tessedit_zero_rejection T +tessedit_minimal_rejection F +tessedit_write_rep_codes F +edges_children_fix F +edges_childarea 0.65 +edges_boxarea 0.9 +tessedit_resegment_from_boxes T +tessedit_train_from_boxes T +#textord_repeat_extraction F +textord_no_rejects T diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/digits b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/digits new file mode 100644 index 0000000..6a329f8 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/digits @@ -0,0 +1 @@ +tessedit_char_whitelist 0123456789-. diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/get.images b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/get.images new file mode 100644 index 0000000..7d00b61 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/get.images @@ -0,0 +1 @@ +tessedit_write_images T diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/hocr b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/hocr new file mode 100644 index 0000000..5ab372e --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/hocr @@ -0,0 +1,2 @@ +tessedit_create_hocr 1 +hocr_font_info 0 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/inter b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/inter new file mode 100644 index 0000000..252f1a1 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/inter @@ -0,0 +1,2 @@ +interactive_display_mode T +tessedit_display_outwords T diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/kannada b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/kannada new file mode 100644 index 0000000..c6ac105 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/kannada @@ -0,0 +1,4 @@ +textord_skewsmooth_offset 8 +textord_skewsmooth_offset2 8 +textord_merge_desc 0.5 +textord_no_rejects 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/linebox b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/linebox new file mode 100644 index 0000000..bd9c114 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/linebox @@ -0,0 +1,2 @@ +tessedit_resegment_from_line_boxes 1 +tessedit_make_boxes_from_boxes 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/logfile b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/logfile new file mode 100644 index 0000000..a160f9b --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/logfile @@ -0,0 +1 @@ +debug_file tesseract.log diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstm.train b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstm.train new file mode 100644 index 0000000..5ff3772 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstm.train @@ -0,0 +1,11 @@ +file_type .bl +textord_fast_pitch_test T +tessedit_zero_rejection T +tessedit_minimal_rejection F +tessedit_write_rep_codes F +edges_children_fix F +edges_childarea 0.65 +edges_boxarea 0.9 +tessedit_train_line_recognizer T +textord_no_rejects T +tessedit_init_config_only T diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstmbox b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstmbox new file mode 100644 index 0000000..a6f2ced --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstmbox @@ -0,0 +1 @@ +tessedit_create_lstmbox 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstmdebug b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstmdebug new file mode 100644 index 0000000..3fa3dee --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/lstmdebug @@ -0,0 +1,4 @@ +stopper_debug_level 1 +classify_debug_level 1 +segsearch_debug_level 1 +language_model_debug_level 3 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/makebox b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/makebox new file mode 100644 index 0000000..3d90ac2 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/makebox @@ -0,0 +1 @@ +tessedit_create_boxfile 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/page b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/page new file mode 100644 index 0000000..9928884 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/page @@ -0,0 +1,3 @@ +tessedit_create_page_xml 1 +# page_xml_polygon 1 +# page_xml_level 0 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/pdf b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/pdf new file mode 100644 index 0000000..59645d7 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/pdf @@ -0,0 +1 @@ +tessedit_create_pdf 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/quiet b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/quiet new file mode 100644 index 0000000..35b59a9 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/quiet @@ -0,0 +1 @@ +debug_file /dev/null diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/rebox b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/rebox new file mode 100644 index 0000000..f8342b4 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/rebox @@ -0,0 +1,2 @@ +tessedit_resegment_from_boxes 1 +tessedit_make_boxes_from_boxes 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/strokewidth b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/strokewidth new file mode 100644 index 0000000..e95b592 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/strokewidth @@ -0,0 +1,12 @@ +textord_show_blobs 0 +textord_debug_tabfind 3 +textord_tabfind_show_partitions 1 +textord_tabfind_show_initial_partitions 1 +textord_tabfind_show_columns 1 +textord_tabfind_show_blocks 1 +textord_tabfind_show_initialtabs 1 +textord_tabfind_show_finaltabs 1 +textord_tabfind_show_strokewidths 1 +textord_tabfind_show_vlines 0 +textord_tabfind_show_images 1 +tessedit_dump_pageseg_images 0 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/tsv b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/tsv new file mode 100644 index 0000000..dc52478 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/tsv @@ -0,0 +1 @@ +tessedit_create_tsv 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/txt b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/txt new file mode 100644 index 0000000..a0cc952 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/txt @@ -0,0 +1,3 @@ +# This config file should be used with other config files which create renderers. +# usage example: tesseract eurotext.tif eurotext txt hocr pdf +tessedit_create_txt 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/unlv b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/unlv new file mode 100644 index 0000000..d2e22f5 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/unlv @@ -0,0 +1,2 @@ +tessedit_write_unlv 1 +unlv_tilde_crunching T diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/wordstrbox b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/wordstrbox new file mode 100644 index 0000000..38cd41c --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/configs/wordstrbox @@ -0,0 +1 @@ +tessedit_create_wordstrbox 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/cos.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/cos.traineddata new file mode 100644 index 0000000..3105b7f Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/cos.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/cym.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/cym.traineddata new file mode 100644 index 0000000..900cf17 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/cym.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/dan.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/dan.traineddata new file mode 100644 index 0000000..85501b5 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/dan.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/deu.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/deu.traineddata new file mode 100644 index 0000000..97ed7b2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/deu.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/deu_latf.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/deu_latf.traineddata new file mode 100644 index 0000000..11a6410 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/deu_latf.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/div.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/div.traineddata new file mode 100644 index 0000000..53b46df Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/div.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/dzo.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/dzo.traineddata new file mode 100644 index 0000000..3c31175 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/dzo.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ell.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ell.traineddata new file mode 100644 index 0000000..ed98ae1 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ell.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/eng.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/eng.traineddata new file mode 100644 index 0000000..bbef467 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/eng.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/enm.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/enm.traineddata new file mode 100644 index 0000000..e49b5b8 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/enm.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/epo.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/epo.traineddata new file mode 100644 index 0000000..235fc67 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/epo.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/equ.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/equ.traineddata new file mode 100644 index 0000000..27b4703 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/equ.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/est.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/est.traineddata new file mode 100644 index 0000000..a2b52c3 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/est.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/eus.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/eus.traineddata new file mode 100644 index 0000000..6247f97 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/eus.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/fao.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fao.traineddata new file mode 100644 index 0000000..1b845e7 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fao.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/fas.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fas.traineddata new file mode 100644 index 0000000..1a17ee0 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fas.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/fil.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fil.traineddata new file mode 100644 index 0000000..785f80d Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fil.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/fin.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fin.traineddata new file mode 100644 index 0000000..73f9240 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fin.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/fra.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fra.traineddata new file mode 100644 index 0000000..d9e2b21 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fra.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/frm.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/frm.traineddata new file mode 100644 index 0000000..ed06039 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/frm.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/fry.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fry.traineddata new file mode 100644 index 0000000..17199d2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/fry.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/gla.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/gla.traineddata new file mode 100644 index 0000000..e8bae7f Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/gla.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/gle.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/gle.traineddata new file mode 100644 index 0000000..a7abc7b Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/gle.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/glg.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/glg.traineddata new file mode 100644 index 0000000..7bc00b3 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/glg.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/grc.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/grc.traineddata new file mode 100644 index 0000000..e949900 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/grc.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/guj.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/guj.traineddata new file mode 100644 index 0000000..151b9aa Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/guj.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/hat.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hat.traineddata new file mode 100644 index 0000000..30d3bae Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hat.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/heb.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/heb.traineddata new file mode 100644 index 0000000..7356caf Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/heb.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/hin.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hin.traineddata new file mode 100644 index 0000000..a8f0aae Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hin.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/hrv.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hrv.traineddata new file mode 100644 index 0000000..6dd4f86 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hrv.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/hun.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hun.traineddata new file mode 100644 index 0000000..5d24fa8 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hun.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/hye.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hye.traineddata new file mode 100644 index 0000000..ec2e2f3 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/hye.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/iku.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/iku.traineddata new file mode 100644 index 0000000..6eab87f Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/iku.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ind.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ind.traineddata new file mode 100644 index 0000000..3013f50 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ind.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/isl.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/isl.traineddata new file mode 100644 index 0000000..20d3bdf Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/isl.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ita.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ita.traineddata new file mode 100644 index 0000000..edbffbe Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ita.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ita_old.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ita_old.traineddata new file mode 100644 index 0000000..c8a2c3c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ita_old.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/jav.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jav.traineddata new file mode 100644 index 0000000..3155b81 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jav.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/jaxb-api-2.3.1.jar b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jaxb-api-2.3.1.jar new file mode 100644 index 0000000..4565865 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jaxb-api-2.3.1.jar differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn.traineddata new file mode 100644 index 0000000..c4178f8 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn_vert.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn_vert.traineddata new file mode 100644 index 0000000..43f38de Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/jpn_vert.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/kan.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kan.traineddata new file mode 100644 index 0000000..78252ba Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kan.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/kat.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kat.traineddata new file mode 100644 index 0000000..1a3ae11 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kat.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/kat_old.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kat_old.traineddata new file mode 100644 index 0000000..f4ae5ab Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kat_old.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/kaz.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kaz.traineddata new file mode 100644 index 0000000..f889e2b Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kaz.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/khm.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/khm.traineddata new file mode 100644 index 0000000..d466dc2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/khm.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/kir.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kir.traineddata new file mode 100644 index 0000000..3e04f29 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kir.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/kmr.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kmr.traineddata new file mode 100644 index 0000000..d1afefb Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kmr.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/kor.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kor.traineddata new file mode 100644 index 0000000..60986d4 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/kor.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/lao.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lao.traineddata new file mode 100644 index 0000000..10bd41a Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lao.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/lat.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lat.traineddata new file mode 100644 index 0000000..30dab6a Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lat.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/lav.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lav.traineddata new file mode 100644 index 0000000..71d925c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lav.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/lit.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lit.traineddata new file mode 100644 index 0000000..af9e2b3 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/lit.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ltz.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ltz.traineddata new file mode 100644 index 0000000..c1ac5b9 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ltz.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/mal.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mal.traineddata new file mode 100644 index 0000000..70e96f0 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mal.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/mar.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mar.traineddata new file mode 100644 index 0000000..79138fa Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mar.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/mkd.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mkd.traineddata new file mode 100644 index 0000000..fb0b245 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mkd.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/mlt.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mlt.traineddata new file mode 100644 index 0000000..4242f9c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mlt.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/mon.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mon.traineddata new file mode 100644 index 0000000..2ef212c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mon.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/mri.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mri.traineddata new file mode 100644 index 0000000..eab6b1a Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mri.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/msa.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/msa.traineddata new file mode 100644 index 0000000..f0e2b8b Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/msa.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/mya.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mya.traineddata new file mode 100644 index 0000000..cf28ddc Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/mya.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/nep.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/nep.traineddata new file mode 100644 index 0000000..948e365 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/nep.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/nld.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/nld.traineddata new file mode 100644 index 0000000..53ce95a Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/nld.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/nor.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/nor.traineddata new file mode 100644 index 0000000..9209df2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/nor.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/oci.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/oci.traineddata new file mode 100644 index 0000000..4017d44 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/oci.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ori.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ori.traineddata new file mode 100644 index 0000000..ae0f9ed Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ori.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/osd.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/osd.traineddata new file mode 100644 index 0000000..527457c Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/osd.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/pan.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pan.traineddata new file mode 100644 index 0000000..d4f0053 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pan.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/pdf.ttf b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pdf.ttf new file mode 100644 index 0000000..d1472b2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pdf.ttf differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/piccolo2d-core-3.0.1.jar b/PDFWorkflowManager/PDFWorkflowManager/tessdata/piccolo2d-core-3.0.1.jar new file mode 100644 index 0000000..df84ed5 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/piccolo2d-core-3.0.1.jar differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/piccolo2d-extras-3.0.1.jar b/PDFWorkflowManager/PDFWorkflowManager/tessdata/piccolo2d-extras-3.0.1.jar new file mode 100644 index 0000000..daf51c0 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/piccolo2d-extras-3.0.1.jar differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/pol.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pol.traineddata new file mode 100644 index 0000000..2382f56 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pol.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/por.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/por.traineddata new file mode 100644 index 0000000..e9f373e Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/por.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/pus.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pus.traineddata new file mode 100644 index 0000000..220cdf9 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/pus.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/que.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/que.traineddata new file mode 100644 index 0000000..d80c1f3 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/que.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ron.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ron.traineddata new file mode 100644 index 0000000..04cd2bc Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ron.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/rus.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/rus.traineddata new file mode 100644 index 0000000..b146cb2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/rus.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/san.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/san.traineddata new file mode 100644 index 0000000..3df043f Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/san.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/sin.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/sin.traineddata new file mode 100644 index 0000000..d0c8584 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/sin.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/slk.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/slk.traineddata new file mode 100644 index 0000000..ca99e3e Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/slk.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/slv.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/slv.traineddata new file mode 100644 index 0000000..835954e Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/slv.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/snd.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/snd.traineddata new file mode 100644 index 0000000..1ef6548 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/snd.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/spa.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/spa.traineddata new file mode 100644 index 0000000..72e901f Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/spa.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/spa_old.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/spa_old.traineddata new file mode 100644 index 0000000..42b281f Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/spa_old.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/sqi.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/sqi.traineddata new file mode 100644 index 0000000..9624fbf Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/sqi.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/srp.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/srp.traineddata new file mode 100644 index 0000000..530fb4b Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/srp.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/srp_latn.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/srp_latn.traineddata new file mode 100644 index 0000000..5f81e59 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/srp_latn.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/sun.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/sun.traineddata new file mode 100644 index 0000000..8948914 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/sun.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/swa.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/swa.traineddata new file mode 100644 index 0000000..633d73b Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/swa.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/swe.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/swe.traineddata new file mode 100644 index 0000000..684e1fa Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/swe.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/syr.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/syr.traineddata new file mode 100644 index 0000000..462aefc Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/syr.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tam.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tam.traineddata new file mode 100644 index 0000000..fa1793d Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tam.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tat.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tat.traineddata new file mode 100644 index 0000000..9485ca2 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tat.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tel.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tel.traineddata new file mode 100644 index 0000000..ee8a33b Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tel.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/batch b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/batch new file mode 100644 index 0000000..a681e4a --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/batch @@ -0,0 +1 @@ +# No content needed as all defaults are correct. diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/batch.nochop b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/batch.nochop new file mode 100644 index 0000000..ebaab94 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/batch.nochop @@ -0,0 +1,2 @@ +chop_enable 0 +wordrec_enable_assoc 0 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/matdemo b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/matdemo new file mode 100644 index 0000000..c34567b --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/matdemo @@ -0,0 +1,7 @@ +################################################# +# Adaptive Matcher Using PreAdapted Templates +################################################# + +classify_enable_adaptive_debugger 1 +matcher_debug_flags 6 +matcher_debug_level 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/msdemo b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/msdemo new file mode 100644 index 0000000..9c1184a --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/msdemo @@ -0,0 +1,12 @@ +################################################# +# Adaptive Matcher Using PreAdapted Templates +################################################# + +classify_enable_adaptive_debugger 1 +matcher_debug_flags 6 +matcher_debug_level 1 + +wordrec_display_splits 0 +wordrec_display_all_blobs 1 +wordrec_display_segmentations 2 +classify_debug_level 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/nobatch b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/nobatch new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/nobatch @@ -0,0 +1 @@ + diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/segdemo b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/segdemo new file mode 100644 index 0000000..eaff69f --- /dev/null +++ b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tessconfigs/segdemo @@ -0,0 +1,9 @@ +################################################# +# Adaptive Matcher Using PreAdapted Templates +################################################# + +wordrec_display_splits 0 +wordrec_display_all_blobs 1 +wordrec_display_segmentations 2 +classify_debug_level 1 +stopper_debug_level 1 diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tgk.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tgk.traineddata new file mode 100644 index 0000000..0447667 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tgk.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tha.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tha.traineddata new file mode 100644 index 0000000..ea28de3 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tha.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tir.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tir.traineddata new file mode 100644 index 0000000..55e62eb Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tir.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ton.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ton.traineddata new file mode 100644 index 0000000..9eff0fd Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ton.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/tur.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tur.traineddata new file mode 100644 index 0000000..1f0b331 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/tur.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/uig.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/uig.traineddata new file mode 100644 index 0000000..36124d6 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/uig.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/ukr.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ukr.traineddata new file mode 100644 index 0000000..d02cf90 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/ukr.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/urd.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/urd.traineddata new file mode 100644 index 0000000..715a159 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/urd.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/uzb.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/uzb.traineddata new file mode 100644 index 0000000..109abc7 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/uzb.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/uzb_cyrl.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/uzb_cyrl.traineddata new file mode 100644 index 0000000..c2ff8b9 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/uzb_cyrl.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/vie.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/vie.traineddata new file mode 100644 index 0000000..e54a591 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/vie.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/yid.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/yid.traineddata new file mode 100644 index 0000000..6349588 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/yid.traineddata differ diff --git a/PDFWorkflowManager/PDFWorkflowManager/tessdata/yor.traineddata b/PDFWorkflowManager/PDFWorkflowManager/tessdata/yor.traineddata new file mode 100644 index 0000000..3e0cd58 Binary files /dev/null and b/PDFWorkflowManager/PDFWorkflowManager/tessdata/yor.traineddata differ