diff --git a/ScanScripts/Windows/0 - Special Make manual.cmd b/ScanScripts/Windows/0 - Special Make manual.cmd new file mode 100644 index 0000000..ec29ed2 --- /dev/null +++ b/ScanScripts/Windows/0 - Special Make manual.cmd @@ -0,0 +1,52 @@ +REM @echo off +set source=D:\Retro-Work + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +choice /m "Fix paging " +if %errorlevel%==1 ( +copy D:\Retro-Work\correct_pages.php .\ /y +php -f C:\Retro-Work\Tools\correct_pages.php +cd newpages +) + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) + +if NOT EXIST output mkdir output +set thumb=1 + +for %%i in (*.tif) DO ( +if !thumb!==1 ( +start convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg" +) +set thumb=2 +start convert %%i output\%%~ni.pdf +) +echo "Wait for all opened cmd windows to close and press a key" +pause + +cd output +REM for %%i in (*.jpg) DO ( +REM convert %%i %%~ni.pdf +REM ) + +REM del *.jpg /q +pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf" +pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf" +del *.pdf /q +REM del ..\*.* /q +del "%folder%\_manual.pdf" /q + +cd %folder% +start manual.pdf +pause + +rem requires OCR tool +rem call "3 - PdfToText.cmd" \ No newline at end of file diff --git a/ScanScripts/Windows/0 - simplex_scanned_sort.cmd b/ScanScripts/Windows/0 - simplex_scanned_sort.cmd new file mode 100644 index 0000000..7b97f50 --- /dev/null +++ b/ScanScripts/Windows/0 - simplex_scanned_sort.cmd @@ -0,0 +1,2 @@ +php.exe -f ..\simplex_scanned_sort.php +pause \ No newline at end of file diff --git a/ScanScripts/Windows/03-hash_em.cmd b/ScanScripts/Windows/03-hash_em.cmd new file mode 100644 index 0000000..74a9edc --- /dev/null +++ b/ScanScripts/Windows/03-hash_em.cmd @@ -0,0 +1,28 @@ +@echo off + +REM setlocal EnableDelayedExpansion +REM for %%i in (*_recognized.pdf) DO ( +REM set file=%%i +REM set output=!file:recognized=ocr! +REM pdftk %%i update_info "D:\Retro-Work\metadata.txt" output !output! +REM del %%i +REM ) +REM endlocal + + + + +for %%i in (*.pdf) DO ( + +md5sum %%i > %%i.md5 +sha1sum %%i > %%i.sha1 +) +for %%i in (*.ocr.txt) DO ( +md5sum %%i > %%i.md5 +sha1sum %%i > %%i.sha1 +) + +for %%i in (*.zip) DO ( +md5sum %%i > %%i.md5 +sha1sum %%i > %%i.sha1 +) diff --git a/ScanScripts/Windows/04-simplex_scanned_sort.cmd b/ScanScripts/Windows/04-simplex_scanned_sort.cmd new file mode 100644 index 0000000..15dd610 --- /dev/null +++ b/ScanScripts/Windows/04-simplex_scanned_sort.cmd @@ -0,0 +1,2 @@ +php.exe -f simplex_scanned_sort.php +pause \ No newline at end of file diff --git a/ScanScripts/Windows/1 - Indexed using ref image.cmd b/ScanScripts/Windows/1 - Indexed using ref image.cmd new file mode 100644 index 0000000..843963c --- /dev/null +++ b/ScanScripts/Windows/1 - Indexed using ref image.cmd @@ -0,0 +1,4 @@ +mkdir indexed +for %%i in (*.tif) DO ( +convert %%i +dither -remap D:\Retro-Work\8col.png indexed\%%~ni.tif +) \ No newline at end of file diff --git a/ScanScripts/Windows/1 - Indexed using ref image_old.cmd b/ScanScripts/Windows/1 - Indexed using ref image_old.cmd new file mode 100644 index 0000000..0f1428f --- /dev/null +++ b/ScanScripts/Windows/1 - Indexed using ref image_old.cmd @@ -0,0 +1,4 @@ +mkdir indexed +for %%i in (*.tif) DO ( +convert %%i -separate -threshold 75%% -combine -remap "..\..\..\..\commodore_3_colour_index_ref.tif" indexed\%%i +) \ No newline at end of file diff --git a/ScanScripts/Windows/1 - Indexed.cmd b/ScanScripts/Windows/1 - Indexed.cmd new file mode 100644 index 0000000..8c60d6e --- /dev/null +++ b/ScanScripts/Windows/1 - Indexed.cmd @@ -0,0 +1,10 @@ +@echo off +set /p nr="Amount of colors: " + +if %nr% LSS 1 DO set nr=256 + + +mkdir pages +for %%i in (*.tif) DO ( +convert %%i -colors %nr% pages\%%i +) \ No newline at end of file diff --git a/ScanScripts/Windows/1 - Make booklet.cmd b/ScanScripts/Windows/1 - Make booklet.cmd new file mode 100644 index 0000000..db13c95 --- /dev/null +++ b/ScanScripts/Windows/1 - Make booklet.cmd @@ -0,0 +1,40 @@ +@echo off +set source=D:\Retro-Work + + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +copy %source%\make_booklet.php .\ /y +php -f make_booklet.php + +cd odd +) + +if NOT EXIST output mkdir output + +for %%i in (*.tif) DO ( +echo Converting %%i +convert %%i output\%%~ni.pdf +) + +cd output +pdftk *.pdf cat output "%folder%\_odd.pdf" +rem del *.pdf /q + +cd %folder%\work\out\even +) + +if NOT EXIST output mkdir output + +for %%i in (*.tif) DO ( +echo Converting %%i +convert %%i output\%%~ni.pdf +) + +cd output +pdftk *.pdf cat output "%folder%\_even.pdf" + + +pause diff --git a/ScanScripts/Windows/1 - grayscale.cmd b/ScanScripts/Windows/1 - grayscale.cmd new file mode 100644 index 0000000..b3a0e7d --- /dev/null +++ b/ScanScripts/Windows/1 - grayscale.cmd @@ -0,0 +1,8 @@ +mkdir pages +for %%i in (*.tif) DO ( +convert %%i -grayscale rec709luma pages\%%i +) + +for %%i in (*.png) DO ( +convert %%i -grayscale rec709luma pages\%%i +) \ No newline at end of file diff --git a/ScanScripts/Windows/1 - remove_edges.cmd b/ScanScripts/Windows/1 - remove_edges.cmd new file mode 100644 index 0000000..d29c069 --- /dev/null +++ b/ScanScripts/Windows/1 - remove_edges.cmd @@ -0,0 +1,7 @@ +@echo off +rem start_x, start_y end_x, end_y (full cords) +mkdir work + +for %%i in (*.tif) DO ( +convert %%i -fill white -draw "rectangle 3278,0 3758,4922" work\%%i +) \ No newline at end of file diff --git a/ScanScripts/Windows/1 - thumbnail.cmd b/ScanScripts/Windows/1 - thumbnail.cmd new file mode 100644 index 0000000..db0d9a5 --- /dev/null +++ b/ScanScripts/Windows/1 - thumbnail.cmd @@ -0,0 +1,3 @@ +for %%i in (*.pdf) DO ( +convert -thumbnail x310 -background white -alpha remove "%%i[0]" "%%~ni.jpg" +) \ No newline at end of file diff --git a/ScanScripts/Windows/2 - Make EPP.cmd b/ScanScripts/Windows/2 - Make EPP.cmd new file mode 100644 index 0000000..bbc4b72 --- /dev/null +++ b/ScanScripts/Windows/2 - Make EPP.cmd @@ -0,0 +1,47 @@ +@echo off +set source=D:\Retro-Work + + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +choice /m "Fix paging " +if %errorlevel%==1 ( +copy D:\Retro-Work\correct_pages.php .\ /y +php -f correct_pages.php +cd newpages +) + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) + +if NOT EXIST output mkdir output +set thumb=1 + +for %%i in (*.tif) DO ( +if !thumb!==1 ( +convert %%i -resize 240 -quality 80%% "!folder!\thumb.jpg" +) +set thumb=2 +convert %%i output\%%~ni.pdf +) + + +cd output +REM for %%i in (*.jpg) DO ( +REM convert %%i %%~ni.pdf +REM ) + +REM del *.jpg /q +pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf" +pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf" +del *.pdf /q +REM del ..\*.* /q +del "%folder%\_manual.pdf" /q + +pause diff --git a/ScanScripts/Windows/2 - Make manual-ImageMagick.cmd b/ScanScripts/Windows/2 - Make manual-ImageMagick.cmd new file mode 100644 index 0000000..224b305 --- /dev/null +++ b/ScanScripts/Windows/2 - Make manual-ImageMagick.cmd @@ -0,0 +1,45 @@ +@echo off + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +choice /m "Fix paging " +if %errorlevel%==1 ( +copy D:\Retro-Work\correct_pages.php .\ /y +php -f correct_pages.php +cd newpages +) + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) + +if NOT EXIST output mkdir output +set thumb=1 + +for %%i in (*.tif) DO ( +if !thumb!==1 ( +convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg" +) +set thumb=2 +convert %%i output\%%~ni.pdf +) + + +cd output +REM for %%i in (*.jpg) DO ( +REM convert %%i %%~ni.pdf +REM ) + +REM del *.jpg /q +pdftk *.pdf "D:\Retro-Work\%page%" cat output "%folder%\_manual.pdf" +pdftk "%folder%\_manual.pdf" update_info D:\Retro-Work\metadata.txt output "%folder%\manual.pdf" +del *.pdf /q +REM del ..\*.* /q +del "%folder%\_manual.pdf" /q + +pause diff --git a/ScanScripts/Windows/2 - Make manual-IrfanView.cmd b/ScanScripts/Windows/2 - Make manual-IrfanView.cmd new file mode 100644 index 0000000..417dd6e --- /dev/null +++ b/ScanScripts/Windows/2 - Make manual-IrfanView.cmd @@ -0,0 +1,46 @@ +@echo off +set source=D:\Retro-Work + + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +choice /m "Fix paging " +if %errorlevel%==1 ( +copy D:\Retro-Work\correct_pages.php .\ /y +php -f correct_pages.php +cd newpages +) + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) + +if NOT EXIST output mkdir output +set thumb=1 + +for %%i in (*.tif) DO ( +if !thumb!==1 ( +convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg" +) +set thumb=2 +) +"c:\Program Files (x86)\IrfanView\i_view32.exe" .\*.tif /convert=.\output\*.pdf + +cd output +REM for %%i in (*.jpg) DO ( +REM convert %%i %%~ni.pdf +REM ) + +REM del *.jpg /q +pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf" +pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf" +del *.pdf /q +REM del ..\*.* /q +del "%folder%\_manual.pdf" /q + +pause diff --git a/ScanScripts/Windows/2 - Make manual_v1.cmd b/ScanScripts/Windows/2 - Make manual_v1.cmd new file mode 100644 index 0000000..ca4ad82 --- /dev/null +++ b/ScanScripts/Windows/2 - Make manual_v1.cmd @@ -0,0 +1,51 @@ +@echo off +set source=G:\Retro-Work + + + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +choice /m "Fix paging " +if %errorlevel%==1 ( +copy %source%\correct_pages.php .\ /y +php -f correct_pages.php +cd newpages +) + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) + +if NOT EXIST output mkdir output +set thumb=1 + +for %%i in (*.tif) DO ( +if !thumb!==1 ( +magick convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg" +) +set thumb=2 +start magick convert %%i output\%%~ni.pdf +) +pause + +cd output +REM for %%i in (*.jpg) DO ( +REM convert %%i %%~ni.pdf +REM ) + +REM del *.jpg /q +pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf" +pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf" +del *.pdf /q +REM del ..\*.* /q +del "%folder%\_manual.pdf" /q + +cd %folder% +start manual.pdf +pause +call "3 - PdfToText.cmd" \ No newline at end of file diff --git a/ScanScripts/Windows/2 - Make manual_v2.cmd b/ScanScripts/Windows/2 - Make manual_v2.cmd new file mode 100644 index 0000000..2435c6e --- /dev/null +++ b/ScanScripts/Windows/2 - Make manual_v2.cmd @@ -0,0 +1,88 @@ +@echo off +REM V2.0 Updated 2023-02-04 Tomse +REM Moved to using Tesseract OCR + +set source=G:\Retro-Work + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +choice /m "Fix paging " +if %errorlevel%==1 ( +copy %source%\correct_pages.php .\ /y +php -f correct_pages.php +cd newpages +) + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) else if %errorlevel%==3 ( +set page="" +) + +choice /C DEG /m "(D)anish, (E)nglish, (G)erman " +if %errorlevel%==1 ( +set lang=dan +) else if %errorlevel%==2 ( +set lang=eng +) else if %errorlevel%==3 ( +set lang=deu +) + + + + +if NOT EXIST output mkdir output +set thumb=1 + +for %%i in (*.tif) DO ( +if !thumb!==1 ( +magick convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg" +) +set thumb=2 +start tesseract %%i output\%%~ni -l %lang% pdf +) +echo Wait till all other screens are gone before continuing +pause + +cd output +pdftk *.pdf "%source%\%page%" cat output "%folder%\manual.pdf" + +cd %folder% +mkdir temp + +echo [ /Title () > temp\pdfmarks.txt +echo /Author (Scanned and Processed by Tomse @ http://retro-commodore.eu) >> temp\pdfmarks.txt +echo /Subject () >> temp\pdfmarks.txt +echo /Keywords (RCEU, Commodore) >> temp\pdfmarks.txt +echo /ModDate (D:%DATE:~6,4%%DATE:~3,2%%DATE:~0,2%%TIME:~0,2%%TIME:~3,2%%TIME:~6,2%) >> temp\pdfmarks.txt +echo /CreationDate (D:%DATE:~6,4%%DATE:~3,2%%DATE:~0,2%%TIME:~0,2%%TIME:~3,2%%TIME:~6,2%) >> temp\pdfmarks.txt +echo /Creator (retro-commodore.eu) >> temp\pdfmarks.txt +echo /Producer (retro-commodore.eu) >> temp\pdfmarks.txt +echo /DOCINFO pdfmark >> temp\pdfmarks.txt + + + +if exist manual.pdf pdftotext manual.pdf manual2.ocr.txt + +if exist manual.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile=manual2.pdf "manual.pdf" temp\pdfmarks.txt +if exist mag_recognized.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile="[300dpi].pdf" "mag_recognized.pdf" temp\pdfmarks.txt +if exist _tablet_recognized.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile="[150dpi][ocr].pdf" "_tablet_recognized.pdf" temp\pdfmarks.txt +if exist audiomedia_recognized.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile="Privat_Computer_199x_-_xx.pdf" "audiomedia_recognized.pdf" temp\pdfmarks.txt +cls + +start manual2.pdf + +choice /m "Delete old files?" + +if %ERRORLEVEL% EQU 1 goto delete +goto end + +:delete +del manual.pdf + +:end \ No newline at end of file diff --git a/ScanScripts/Windows/2 - Make single page.cmd b/ScanScripts/Windows/2 - Make single page.cmd new file mode 100644 index 0000000..ce72063 --- /dev/null +++ b/ScanScripts/Windows/2 - Make single page.cmd @@ -0,0 +1,30 @@ +@echo off + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) + +if NOT EXIST output mkdir output +set thumb=1 + +for %%i in (*.tif) DO ( +convert %%i -resize 240x310^^! -quality 80%% "!folder!\%%~ni.jpg" +convert %%i "output\%%~ni.pdf" +pdftk "output\%%~ni.pdf" "D:\Retro-Work\%page%" cat output "output\_temp.pdf" +pdftk "output\_temp.pdf" update_info D:\Retro-Work\metadata2.txt output "%folder%\%%~ni.pdf" +del output\_temp.pdf /q + +echo url: %%~ni.pdf > "%folder%\%%~ni.txt" +echo title: >> "%folder%\%%~ni.txt" +echo lang: English >> "%folder%\%%~ni.txt" + +) +pause diff --git a/ScanScripts/Windows/2 - Make vi_og_vic magazine.cmd b/ScanScripts/Windows/2 - Make vi_og_vic magazine.cmd new file mode 100644 index 0000000..9ffa4c1 --- /dev/null +++ b/ScanScripts/Windows/2 - Make vi_og_vic magazine.cmd @@ -0,0 +1,58 @@ +@echo off +set source=D:\Retro-Work + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out +choice /m "Fix paging " +if %errorlevel%==1 ( +copy D:\Retro-Work\correct_pages.php .\ /y +php -f correct_pages.php +cd newpages +) + +choice /C LSN /m "Use large (L), small (S) or no (N) lastpage " +if %errorlevel%==1 ( +set page=brought_to_you_by.pdf +) else if %errorlevel%==2 ( +set page=brought_to_you_by_[TABLET].pdf +) + +if NOT EXIST output mkdir output +set thumb=1 + +if NOT EXIST tablet mkdir tablet + + +for %%i in (*.tif) DO ( +if !thumb!==1 ( +convert %%i -resize 240x310^^! -quality 75%% "!folder!\thumb.jpg" +) +set thumb=2 +convert -units pixelsperinch %%i -resample 150 tablet\%%~ni.jpg +) + +rem 300dpi mag + +for %%i in (*.tif) DO ( +convert %%i output\%%~ni.pdf +) +cd output +del *.jpg /q +pdftk *.pdf %source%\brought_to_you_by.pdf cat output "!folder!\_mag.pdf" +pdftk "!folder!\_mag.pdf" update_info %source%\metadata.txt output "!folder!\mag.pdf" +del *.pdf /q +del "!folder!\_mag.pdf" +cd .. + + +rem Tablet +cd tablet +for %%i in (*.jpg) DO ( +convert %%i %%~ni.pdf +) +del *.jpg /q +pdftk *.pdf %source%\brought_to_you_by_[TABLET].pdf cat output _tablet.pdf +pdftk _tablet.pdf update_info %source%\metadata.txt output "!folder!\_tablet.pdf" +del *.pdf /q +pause diff --git a/ScanScripts/Windows/2 - resize_to_tablet.cmd b/ScanScripts/Windows/2 - resize_to_tablet.cmd new file mode 100644 index 0000000..6781409 --- /dev/null +++ b/ScanScripts/Windows/2 - resize_to_tablet.cmd @@ -0,0 +1,5 @@ +@echo off +mkdir tablet +for %%i in (*.tif) DO ( +convert %%i -resize 1024x4096 -quality 85%% tablet\%%~ni.jpg +) \ No newline at end of file diff --git a/ScanScripts/Windows/3 - PdfToText_Martin.cmd b/ScanScripts/Windows/3 - PdfToText_Martin.cmd new file mode 100644 index 0000000..8174845 --- /dev/null +++ b/ScanScripts/Windows/3 - PdfToText_Martin.cmd @@ -0,0 +1,5 @@ +@echo off + + +pdftotext manual_recognized.pdf manual2.ocr.txt +pdftk "manual_recognized.pdf" update_info D:\Retro-Work\metadata_martin.txt output "manual2.pdf" \ No newline at end of file diff --git a/ScanScripts/Windows/Zip-folders.cmd b/ScanScripts/Windows/Zip-folders.cmd new file mode 100644 index 0000000..7bd89c4 --- /dev/null +++ b/ScanScripts/Windows/Zip-folders.cmd @@ -0,0 +1,5 @@ +@echo off +for /D %%i in (*) DO ( +"c:\Program Files\7-Zip\7z.exe" a -r -tzip "%%i.zip" "%%i" +) +pause \ No newline at end of file diff --git a/ScanScripts/Windows/jpgsplit.php b/ScanScripts/Windows/jpgsplit.php new file mode 100644 index 0000000..6613e48 --- /dev/null +++ b/ScanScripts/Windows/jpgsplit.php @@ -0,0 +1,282 @@ +$v) + { + $leadzeros = $offset; + if ($k % 2 == 0) + { + $odd = false; + } + else + { + $odd = true; + } + + // leading zeros for pages prior the numbered page 1 + if ($offset > $k) + { + $zeros = ""; + + if ($odd === false) + { + for ($i = 0; $leadzeros-$k > $i; $leadzeros--) + { + $zeros = $zeros . $zero; + } + $rightfilename = "page_0" . $zeros . "1"; + $leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + else + { + for ($i = 0; $leadzeros-$k > $i; $leadzeros--) + { + $zeros = $zeros . $zero; + } + $leftfilename = "page_0" . $zeros . "1"; + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + } + else + { + + if ($odd === true) + { + // Right page = high, highpage - $offset + // left page = low, $k + lowpage + $leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT); + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + else + { + $rightfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT); + $leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + $lowpage++; + } + print "Processing $k => $v \n"; + print "Saving $rightfilename \n"; + print "Saving $leftfilename \n"; + print "\n"; + + + $highpage--; + + + + $thumb = new Imagick(); + $thumb->readImage($v); + $thumb->setImageFormat('jpeg'); + $thumb->setImagecompression(imagick::COMPRESSION_JPEG); + $thumb->setImageCompressionQuality(90); + + // Copy image so we can crop left and right sides + $left = new Imagick(); + $left->readImage($v); + $left->setImageFormat('jpeg'); + $left->setImagecompression(imagick::COMPRESSION_JPEG); + $left->setImageCompressionQuality(90); + + + // Get geometry of image + $geometry = $thumb->getImageGeometry(); + $x = $geometry['width']; + $y = $geometry['height']; + $cropsize = round($x / 2); + + $geox = $cropsize-1; + + //print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n"); + // Cropping left side + $left->setImagePage(0,0,0,0); + $left->cropImage($cropsize, $y, 0, 0); + + // Cropping right side + $thumb->setImagePage(0,0,0,0); + $thumb->cropImage($cropsize, $y, $geox, 0); + + + if ($k == 0) + { + $max = ($count * 2) - 1; + //$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".jpg"); + $left->writeImage($output . $leftfilename . ".jpg"); + $thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb.jpg"); + $thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb64.jpg"); + + } + else + { + //$leftpage = ($i * 2) - 1; + //$rightpage = ($i * 2); + //$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".jpg"); + $left->writeImage($output . $leftfilename . ".jpg"); + } + + $thumb->clear(); + $thumb->destroy(); + $left->clear(); + $left->destroy(); + } + } + else + { + $zero = "0"; + $files = glob($input . "*.[jJ][pP][gG]"); + $count = count($files); + // $pagenum = 0; + $lowpage = 1; + + $highpage = $lowpage + 1; + foreach($files as $k=>$v) + { + $leadzeros = $offset; + if ($k % 2 == 0) + { + $odd = false; + } + else + { + $odd = true; + } + + // leading zeros for pages prior the numbered page 1 + if ($offset > $k) + { + $zeros = ""; + + for ($i = 0; $leadzeros-$k > $i; $leadzeros--) + { + $zeros = $zeros . $zero; + } + $leftfilename = "page_0" . $zeros . "1"; + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + else + { + $leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT); + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + $lowpage += 2; + $highpage += 2; + print "Processing $k => $v \n"; + print "Saving $rightfilename \n"; + print "Saving $leftfilename \n"; + print "\n"; + $thumb = new Imagick(); + $thumb->readImage($v); + $thumb->setImageFormat('jpeg'); + $thumb->setImagecompression(imagick::COMPRESSION_JPEG); + $thumb->setImageCompressionQuality(90); + + // Copy image so we can crop left and right sides + $left = new Imagick(); + $left->readImage($v); + $left->setImageFormat('jpeg'); + $left->setImagecompression(imagick::COMPRESSION_JPEG); + $left->setImageCompressionQuality(90); + + + // Get geometry of image + $geometry = $thumb->getImageGeometry(); + $x = $geometry['width']; + $y = $geometry['height']; + $cropsize = round($x / 2); + + $geox = $cropsize-1; + + //print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n"); + // Cropping left side + $left->setImagePage(0,0,0,0); + $left->cropImage($cropsize, $y, 0, 0); + + // Cropping right side + $thumb->setImagePage(0,0,0,0); + $thumb->cropImage($cropsize, $y, $geox, 0); + + + if ($k == 0) + { + $max = ($count * 2) - 1; + //$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".jpg"); + $left->writeImage($output . $leftfilename . ".jpg"); + $thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb.jpg"); + $thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb64.jpg"); + + } + else + { + //$leftpage = ($i * 2) - 1; + //$rightpage = ($i * 2); + //$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".jpg"); + $left->writeImage($output . $leftfilename . ".jpg"); + } + + $thumb->clear(); + $thumb->destroy(); + $left->clear(); + $left->destroy(); + } + + } +} +?> diff --git a/ScanScripts/Windows/make_manual.txt b/ScanScripts/Windows/make_manual.txt new file mode 100644 index 0000000..306906c --- /dev/null +++ b/ScanScripts/Windows/make_manual.txt @@ -0,0 +1,17 @@ +@echo off + +setlocal EnableDelayedExpansion +set folder=%cd% +cd work\out + +mkdir output + +for %%i in (*.tif) DO ( +convert %%i output\%%~ni.pdf +) + +pdftk *.pdf cat output "%folder%\_manual.pdf" +del *.pdf /q +del "%folder%\_manual.pdf" /q + +pause diff --git a/ScanScripts/Windows/metadata.txt b/ScanScripts/Windows/metadata.txt new file mode 100644 index 0000000..23a6d2b --- /dev/null +++ b/ScanScripts/Windows/metadata.txt @@ -0,0 +1,3 @@ +InfoBegin +InfoKey: Author +InfoValue: Scanned and Processed by Tomse @ http://retro-commodore.eu diff --git a/ScanScripts/Windows/metadata2.txt b/ScanScripts/Windows/metadata2.txt new file mode 100644 index 0000000..0ecba6b --- /dev/null +++ b/ScanScripts/Windows/metadata2.txt @@ -0,0 +1,3 @@ +InfoBegin +InfoKey: Author +InfoValue: Scanned and Processed by Tomse @ http://retro-commodore.eu, thanks to Thomas Roenne Viborg \ No newline at end of file diff --git a/ScanScripts/Windows/metadata_martin.txt b/ScanScripts/Windows/metadata_martin.txt new file mode 100644 index 0000000..ea5e379 --- /dev/null +++ b/ScanScripts/Windows/metadata_martin.txt @@ -0,0 +1,3 @@ +InfoBegin +InfoKey: Author +InfoValue: Processed by demolition/Laxity @ http://retro-commodore.eu diff --git a/ScanScripts/Windows/ocr.cmd b/ScanScripts/Windows/ocr.cmd new file mode 100644 index 0000000..462500c --- /dev/null +++ b/ScanScripts/Windows/ocr.cmd @@ -0,0 +1,29 @@ +#!/bin/sh +START=$(date +%s) + +SOURCE=$1 +LANG=$2 +PAGES=`/usr/local/libexec/xpdf/pdfinfo $SOURCE | grep -i pages | awk '{print $2}'` # set to the number of pages in the PDF +#SOURCE=pamphlet-low.pdf # set to the file name of the PDF +OUTPUT=$SOURCE +RESOLUTION=600 # set to the resolution the scanner used (the higher, the better) + +#xpdf-pdfinfo pamphlet-low.pdf | grep Pages: | awk '{print $2}' | tail -n 1 + +touch $OUTPUT.txt +for i in `seq 1 $PAGES`; do + convert -density $RESOLUTION -depth 8 $SOURCE\[$(($i - 1 ))\] page$i.png +# tesseract page$i.tif >> $OUTPUT + tesseract page$i.png $OUTPUT$i -l $2 + rm page$i.png + cat $OUTPUT$i.txt >> $OUTPUT.txt + rm $OUTPUT$i.txt +done + + + + +END=$(date +%s) +DIFF=$(echo "$END - $START" | bc) +echo $DIFF + diff --git a/ScanScripts/Windows/ocr.sh b/ScanScripts/Windows/ocr.sh new file mode 100644 index 0000000..462500c --- /dev/null +++ b/ScanScripts/Windows/ocr.sh @@ -0,0 +1,29 @@ +#!/bin/sh +START=$(date +%s) + +SOURCE=$1 +LANG=$2 +PAGES=`/usr/local/libexec/xpdf/pdfinfo $SOURCE | grep -i pages | awk '{print $2}'` # set to the number of pages in the PDF +#SOURCE=pamphlet-low.pdf # set to the file name of the PDF +OUTPUT=$SOURCE +RESOLUTION=600 # set to the resolution the scanner used (the higher, the better) + +#xpdf-pdfinfo pamphlet-low.pdf | grep Pages: | awk '{print $2}' | tail -n 1 + +touch $OUTPUT.txt +for i in `seq 1 $PAGES`; do + convert -density $RESOLUTION -depth 8 $SOURCE\[$(($i - 1 ))\] page$i.png +# tesseract page$i.tif >> $OUTPUT + tesseract page$i.png $OUTPUT$i -l $2 + rm page$i.png + cat $OUTPUT$i.txt >> $OUTPUT.txt + rm $OUTPUT$i.txt +done + + + + +END=$(date +%s) +DIFF=$(echo "$END - $START" | bc) +echo $DIFF + diff --git a/ScanScripts/Windows/png_to_1bit.cmd b/ScanScripts/Windows/png_to_1bit.cmd new file mode 100644 index 0000000..ea68d90 --- /dev/null +++ b/ScanScripts/Windows/png_to_1bit.cmd @@ -0,0 +1,7 @@ +@echo off +mkdir pages + +for %%i in (*.png) DO ( +convert %%i -colors 2 -threshold 75%% +repage pages\%%i + +) \ No newline at end of file diff --git a/ScanScripts/Windows/pngsplit.php b/ScanScripts/Windows/pngsplit.php new file mode 100644 index 0000000..8686100 --- /dev/null +++ b/ScanScripts/Windows/pngsplit.php @@ -0,0 +1,282 @@ +$v) + { + $leadzeros = $offset; + if ($k % 2 == 0) + { + $odd = false; + } + else + { + $odd = true; + } + + // leading zeros for pages prior the numbered page 1 + if ($offset > $k) + { + $zeros = ""; + + if ($odd === false) + { + for ($i = 0; $leadzeros-$k > $i; $leadzeros--) + { + $zeros = $zeros . $zero; + } + $rightfilename = "page_0" . $zeros . "1"; + $leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + else + { + for ($i = 0; $leadzeros-$k > $i; $leadzeros--) + { + $zeros = $zeros . $zero; + } + $leftfilename = "page_0" . $zeros . "1"; + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + } + else + { + + if ($odd === true) + { + // Right page = high, highpage - $offset + // left page = low, $k + lowpage + $leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT); + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + else + { + $rightfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT); + $leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + $lowpage++; + } + print "Processing $k => $v \n"; + print "Saving $rightfilename \n"; + print "Saving $leftfilename \n"; + print "\n"; + + + $highpage--; + + + + $thumb = new Imagick(); + $thumb->readImage($v); + $thumb->setImageFormat('png'); +// $thumb->setImagecompression(imagick::COMPRESSION_JPEG); + // $thumb->setImageCompressionQuality(90); + + // Copy image so we can crop left and right sides + $left = new Imagick(); + $left->readImage($v); + $left->setImageFormat('png'); +// $left->setImagecompression(imagick::COMPRESSION_JPEG); + // $left->setImageCompressionQuality(90); + + + // Get geometry of image + $geometry = $thumb->getImageGeometry(); + $x = $geometry['width']; + $y = $geometry['height']; + $cropsize = round($x / 2); + + $geox = $cropsize-1; + + //print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n"); + // Cropping left side + $left->setImagePage(0,0,0,0); + $left->cropImage($cropsize, $y, 0, 0); + + // Cropping right side + $thumb->setImagePage(0,0,0,0); + $thumb->cropImage($cropsize, $y, $geox, 0); + + + if ($k == 0) + { + $max = ($count * 2) - 1; + //$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".png"); + $left->writeImage($output . $leftfilename . ".png"); + $thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb.jpg"); + $thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb64.jpg"); + + } + else + { + //$leftpage = ($i * 2) - 1; + //$rightpage = ($i * 2); + //$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".png"); + $left->writeImage($output . $leftfilename . ".png"); + } + + $thumb->clear(); + $thumb->destroy(); + $left->clear(); + $left->destroy(); + } + } + else + { + $zero = "0"; + $files = glob($input . "*.[pP][nN][gG]"); + $count = count($files); + // $pagenum = 0; + $lowpage = 1; + + $highpage = $lowpage + 1; + foreach($files as $k=>$v) + { + $leadzeros = $offset; + if ($k % 2 == 0) + { + $odd = false; + } + else + { + $odd = true; + } + + // leading zeros for pages prior the numbered page 1 + if ($offset > $k) + { + $zeros = ""; + + for ($i = 0; $leadzeros-$k > $i; $leadzeros--) + { + $zeros = $zeros . $zero; + } + $leftfilename = "page_0" . $zeros . "1"; + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + else + { + $leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT); + $rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT); + } + $lowpage += 2; + $highpage += 2; + print "Processing $k => $v \n"; + print "Saving $rightfilename \n"; + print "Saving $leftfilename \n"; + print "\n"; + $thumb = new Imagick(); + $thumb->readImage($v); + $thumb->setImageFormat('png'); +// $thumb->setImagecompression(imagick::COMPRESSION_JPEG); + // $thumb->setImageCompressionQuality(90); + + // Copy image so we can crop left and right sides + $left = new Imagick(); + $left->readImage($v); + $left->setImageFormat('png'); +// $left->setImagecompression(imagick::COMPRESSION_JPEG); + // $left->setImageCompressionQuality(90); + + + // Get geometry of image + $geometry = $thumb->getImageGeometry(); + $x = $geometry['width']; + $y = $geometry['height']; + $cropsize = round($x / 2); + + $geox = $cropsize-1; + + //print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n"); + // Cropping left side + $left->setImagePage(0,0,0,0); + $left->cropImage($cropsize, $y, 0, 0); + + // Cropping right side + $thumb->setImagePage(0,0,0,0); + $thumb->cropImage($cropsize, $y, $geox, 0); + + + if ($k == 0) + { + $max = ($count * 2) - 1; + //$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".png"); + $left->writeImage($output . $leftfilename . ".png"); + $thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb.jpg"); + $thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true); + $thumb->writeImage($output . "_thumb64.jpg"); + + } + else + { + //$leftpage = ($i * 2) - 1; + //$rightpage = ($i * 2); + //$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT); + //$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT); + $thumb->writeImage($output . $rightfilename . ".png"); + $left->writeImage($output . $leftfilename . ".png"); + } + + $thumb->clear(); + $thumb->destroy(); + $left->clear(); + $left->destroy(); + } + + } +} +?> diff --git a/ScanScripts/Windows/runme.cmd b/ScanScripts/Windows/runme.cmd new file mode 100644 index 0000000..52c6ff6 --- /dev/null +++ b/ScanScripts/Windows/runme.cmd @@ -0,0 +1,2 @@ +php.exe -f correct_pages.php +pause \ No newline at end of file diff --git a/ScanScripts/Windows/simplex_scanned_sort.php b/ScanScripts/Windows/simplex_scanned_sort.php new file mode 100644 index 0000000..1b336c1 --- /dev/null +++ b/ScanScripts/Windows/simplex_scanned_sort.php @@ -0,0 +1,40 @@ +