more scripts added

This commit is contained in:
tomse 2023-12-21 01:48:37 +01:00
parent 220437e651
commit c9b25e6672
34 changed files with 1299 additions and 0 deletions

View File

@ -0,0 +1,52 @@
REM @echo off
set source=D:\Retro-Work
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /m "Fix paging "
if %errorlevel%==1 (
copy D:\Retro-Work\correct_pages.php .\ /y
php -f C:\Retro-Work\Tools\correct_pages.php
cd newpages
)
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
)
if NOT EXIST output mkdir output
set thumb=1
for %%i in (*.tif) DO (
if !thumb!==1 (
start convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg"
)
set thumb=2
start convert %%i output\%%~ni.pdf
)
echo "Wait for all opened cmd windows to close and press a key"
pause
cd output
REM for %%i in (*.jpg) DO (
REM convert %%i %%~ni.pdf
REM )
REM del *.jpg /q
pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf"
pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf"
del *.pdf /q
REM del ..\*.* /q
del "%folder%\_manual.pdf" /q
cd %folder%
start manual.pdf
pause
rem requires OCR tool
rem call "3 - PdfToText.cmd"

View File

@ -0,0 +1,2 @@
php.exe -f ..\simplex_scanned_sort.php
pause

View File

@ -0,0 +1,28 @@
@echo off
REM setlocal EnableDelayedExpansion
REM for %%i in (*_recognized.pdf) DO (
REM set file=%%i
REM set output=!file:recognized=ocr!
REM pdftk %%i update_info "D:\Retro-Work\metadata.txt" output !output!
REM del %%i
REM )
REM endlocal
for %%i in (*.pdf) DO (
md5sum %%i > %%i.md5
sha1sum %%i > %%i.sha1
)
for %%i in (*.ocr.txt) DO (
md5sum %%i > %%i.md5
sha1sum %%i > %%i.sha1
)
for %%i in (*.zip) DO (
md5sum %%i > %%i.md5
sha1sum %%i > %%i.sha1
)

View File

@ -0,0 +1,2 @@
php.exe -f simplex_scanned_sort.php
pause

View File

@ -0,0 +1,4 @@
mkdir indexed
for %%i in (*.tif) DO (
convert %%i +dither -remap D:\Retro-Work\8col.png indexed\%%~ni.tif
)

View File

@ -0,0 +1,4 @@
mkdir indexed
for %%i in (*.tif) DO (
convert %%i -separate -threshold 75%% -combine -remap "..\..\..\..\commodore_3_colour_index_ref.tif" indexed\%%i
)

View File

@ -0,0 +1,10 @@
@echo off
set /p nr="Amount of colors: "
if %nr% LSS 1 DO set nr=256
mkdir pages
for %%i in (*.tif) DO (
convert %%i -colors %nr% pages\%%i
)

View File

@ -0,0 +1,40 @@
@echo off
set source=D:\Retro-Work
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
copy %source%\make_booklet.php .\ /y
php -f make_booklet.php
cd odd
)
if NOT EXIST output mkdir output
for %%i in (*.tif) DO (
echo Converting %%i
convert %%i output\%%~ni.pdf
)
cd output
pdftk *.pdf cat output "%folder%\_odd.pdf"
rem del *.pdf /q
cd %folder%\work\out\even
)
if NOT EXIST output mkdir output
for %%i in (*.tif) DO (
echo Converting %%i
convert %%i output\%%~ni.pdf
)
cd output
pdftk *.pdf cat output "%folder%\_even.pdf"
pause

View File

@ -0,0 +1,8 @@
mkdir pages
for %%i in (*.tif) DO (
convert %%i -grayscale rec709luma pages\%%i
)
for %%i in (*.png) DO (
convert %%i -grayscale rec709luma pages\%%i
)

View File

@ -0,0 +1,7 @@
@echo off
rem start_x, start_y end_x, end_y (full cords)
mkdir work
for %%i in (*.tif) DO (
convert %%i -fill white -draw "rectangle 3278,0 3758,4922" work\%%i
)

View File

@ -0,0 +1,3 @@
for %%i in (*.pdf) DO (
convert -thumbnail x310 -background white -alpha remove "%%i[0]" "%%~ni.jpg"
)

View File

@ -0,0 +1,47 @@
@echo off
set source=D:\Retro-Work
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /m "Fix paging "
if %errorlevel%==1 (
copy D:\Retro-Work\correct_pages.php .\ /y
php -f correct_pages.php
cd newpages
)
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
)
if NOT EXIST output mkdir output
set thumb=1
for %%i in (*.tif) DO (
if !thumb!==1 (
convert %%i -resize 240 -quality 80%% "!folder!\thumb.jpg"
)
set thumb=2
convert %%i output\%%~ni.pdf
)
cd output
REM for %%i in (*.jpg) DO (
REM convert %%i %%~ni.pdf
REM )
REM del *.jpg /q
pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf"
pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf"
del *.pdf /q
REM del ..\*.* /q
del "%folder%\_manual.pdf" /q
pause

View File

@ -0,0 +1,45 @@
@echo off
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /m "Fix paging "
if %errorlevel%==1 (
copy D:\Retro-Work\correct_pages.php .\ /y
php -f correct_pages.php
cd newpages
)
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
)
if NOT EXIST output mkdir output
set thumb=1
for %%i in (*.tif) DO (
if !thumb!==1 (
convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg"
)
set thumb=2
convert %%i output\%%~ni.pdf
)
cd output
REM for %%i in (*.jpg) DO (
REM convert %%i %%~ni.pdf
REM )
REM del *.jpg /q
pdftk *.pdf "D:\Retro-Work\%page%" cat output "%folder%\_manual.pdf"
pdftk "%folder%\_manual.pdf" update_info D:\Retro-Work\metadata.txt output "%folder%\manual.pdf"
del *.pdf /q
REM del ..\*.* /q
del "%folder%\_manual.pdf" /q
pause

View File

@ -0,0 +1,46 @@
@echo off
set source=D:\Retro-Work
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /m "Fix paging "
if %errorlevel%==1 (
copy D:\Retro-Work\correct_pages.php .\ /y
php -f correct_pages.php
cd newpages
)
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
)
if NOT EXIST output mkdir output
set thumb=1
for %%i in (*.tif) DO (
if !thumb!==1 (
convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg"
)
set thumb=2
)
"c:\Program Files (x86)\IrfanView\i_view32.exe" .\*.tif /convert=.\output\*.pdf
cd output
REM for %%i in (*.jpg) DO (
REM convert %%i %%~ni.pdf
REM )
REM del *.jpg /q
pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf"
pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf"
del *.pdf /q
REM del ..\*.* /q
del "%folder%\_manual.pdf" /q
pause

View File

@ -0,0 +1,51 @@
@echo off
set source=G:\Retro-Work
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /m "Fix paging "
if %errorlevel%==1 (
copy %source%\correct_pages.php .\ /y
php -f correct_pages.php
cd newpages
)
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
)
if NOT EXIST output mkdir output
set thumb=1
for %%i in (*.tif) DO (
if !thumb!==1 (
magick convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg"
)
set thumb=2
start magick convert %%i output\%%~ni.pdf
)
pause
cd output
REM for %%i in (*.jpg) DO (
REM convert %%i %%~ni.pdf
REM )
REM del *.jpg /q
pdftk *.pdf "%source%\%page%" cat output "%folder%\_manual.pdf"
pdftk "%folder%\_manual.pdf" update_info %source%\metadata.txt output "%folder%\manual.pdf"
del *.pdf /q
REM del ..\*.* /q
del "%folder%\_manual.pdf" /q
cd %folder%
start manual.pdf
pause
call "3 - PdfToText.cmd"

View File

@ -0,0 +1,88 @@
@echo off
REM V2.0 Updated 2023-02-04 Tomse
REM Moved to using Tesseract OCR
set source=G:\Retro-Work
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /m "Fix paging "
if %errorlevel%==1 (
copy %source%\correct_pages.php .\ /y
php -f correct_pages.php
cd newpages
)
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
) else if %errorlevel%==3 (
set page=""
)
choice /C DEG /m "(D)anish, (E)nglish, (G)erman "
if %errorlevel%==1 (
set lang=dan
) else if %errorlevel%==2 (
set lang=eng
) else if %errorlevel%==3 (
set lang=deu
)
if NOT EXIST output mkdir output
set thumb=1
for %%i in (*.tif) DO (
if !thumb!==1 (
magick convert %%i -resize 240x310^^! -quality 80%% "!folder!\thumb.jpg"
)
set thumb=2
start tesseract %%i output\%%~ni -l %lang% pdf
)
echo Wait till all other screens are gone before continuing
pause
cd output
pdftk *.pdf "%source%\%page%" cat output "%folder%\manual.pdf"
cd %folder%
mkdir temp
echo [ /Title () > temp\pdfmarks.txt
echo /Author (Scanned and Processed by Tomse @ http://retro-commodore.eu) >> temp\pdfmarks.txt
echo /Subject () >> temp\pdfmarks.txt
echo /Keywords (RCEU, Commodore) >> temp\pdfmarks.txt
echo /ModDate (D:%DATE:~6,4%%DATE:~3,2%%DATE:~0,2%%TIME:~0,2%%TIME:~3,2%%TIME:~6,2%) >> temp\pdfmarks.txt
echo /CreationDate (D:%DATE:~6,4%%DATE:~3,2%%DATE:~0,2%%TIME:~0,2%%TIME:~3,2%%TIME:~6,2%) >> temp\pdfmarks.txt
echo /Creator (retro-commodore.eu) >> temp\pdfmarks.txt
echo /Producer (retro-commodore.eu) >> temp\pdfmarks.txt
echo /DOCINFO pdfmark >> temp\pdfmarks.txt
if exist manual.pdf pdftotext manual.pdf manual2.ocr.txt
if exist manual.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile=manual2.pdf "manual.pdf" temp\pdfmarks.txt
if exist mag_recognized.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile="[300dpi].pdf" "mag_recognized.pdf" temp\pdfmarks.txt
if exist _tablet_recognized.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile="[150dpi][ocr].pdf" "_tablet_recognized.pdf" temp\pdfmarks.txt
if exist audiomedia_recognized.pdf gswin64c -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile="Privat_Computer_199x_-_xx.pdf" "audiomedia_recognized.pdf" temp\pdfmarks.txt
cls
start manual2.pdf
choice /m "Delete old files?"
if %ERRORLEVEL% EQU 1 goto delete
goto end
:delete
del manual.pdf
:end

View File

@ -0,0 +1,30 @@
@echo off
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
)
if NOT EXIST output mkdir output
set thumb=1
for %%i in (*.tif) DO (
convert %%i -resize 240x310^^! -quality 80%% "!folder!\%%~ni.jpg"
convert %%i "output\%%~ni.pdf"
pdftk "output\%%~ni.pdf" "D:\Retro-Work\%page%" cat output "output\_temp.pdf"
pdftk "output\_temp.pdf" update_info D:\Retro-Work\metadata2.txt output "%folder%\%%~ni.pdf"
del output\_temp.pdf /q
echo url: %%~ni.pdf > "%folder%\%%~ni.txt"
echo title: >> "%folder%\%%~ni.txt"
echo lang: English >> "%folder%\%%~ni.txt"
)
pause

View File

@ -0,0 +1,58 @@
@echo off
set source=D:\Retro-Work
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
choice /m "Fix paging "
if %errorlevel%==1 (
copy D:\Retro-Work\correct_pages.php .\ /y
php -f correct_pages.php
cd newpages
)
choice /C LSN /m "Use large (L), small (S) or no (N) lastpage "
if %errorlevel%==1 (
set page=brought_to_you_by.pdf
) else if %errorlevel%==2 (
set page=brought_to_you_by_[TABLET].pdf
)
if NOT EXIST output mkdir output
set thumb=1
if NOT EXIST tablet mkdir tablet
for %%i in (*.tif) DO (
if !thumb!==1 (
convert %%i -resize 240x310^^! -quality 75%% "!folder!\thumb.jpg"
)
set thumb=2
convert -units pixelsperinch %%i -resample 150 tablet\%%~ni.jpg
)
rem 300dpi mag
for %%i in (*.tif) DO (
convert %%i output\%%~ni.pdf
)
cd output
del *.jpg /q
pdftk *.pdf %source%\brought_to_you_by.pdf cat output "!folder!\_mag.pdf"
pdftk "!folder!\_mag.pdf" update_info %source%\metadata.txt output "!folder!\mag.pdf"
del *.pdf /q
del "!folder!\_mag.pdf"
cd ..
rem Tablet
cd tablet
for %%i in (*.jpg) DO (
convert %%i %%~ni.pdf
)
del *.jpg /q
pdftk *.pdf %source%\brought_to_you_by_[TABLET].pdf cat output _tablet.pdf
pdftk _tablet.pdf update_info %source%\metadata.txt output "!folder!\_tablet.pdf"
del *.pdf /q
pause

View File

@ -0,0 +1,5 @@
@echo off
mkdir tablet
for %%i in (*.tif) DO (
convert %%i -resize 1024x4096 -quality 85%% tablet\%%~ni.jpg
)

View File

@ -0,0 +1,5 @@
@echo off
pdftotext manual_recognized.pdf manual2.ocr.txt
pdftk "manual_recognized.pdf" update_info D:\Retro-Work\metadata_martin.txt output "manual2.pdf"

View File

@ -0,0 +1,5 @@
@echo off
for /D %%i in (*) DO (
"c:\Program Files\7-Zip\7z.exe" a -r -tzip "%%i.zip" "%%i"
)
pause

View File

@ -0,0 +1,282 @@
<?php
/* This file is meant to run from the CLI/Shell
* it splits images on the middle, and saves individual pages
* It creates 240px + 64px wide thumbs of the first page right side
* Author Carsten Jensen aka Tomse @ http://awesome.commodore.me
* Copyright (C) 2013 Carsten Jensen
* This is distributed under GNU GPLv2
*/
/* Requirements:
* php (cli/shell version) / or as apache site
* imagemagick module
*/
/* Usage:
* Run the file from the browser (apache setup)
* or from cli : php -f split
* thumb files will be saved in same dir as pdf files
*/
// Edit the path here where your files to be split are stored.
$input = "./input/";
// Edit the path here where your result files will be ssaved.
$output = "./output/";
// Counting from 0 where 0 is the front page, to the first "page 1"
$offset = 0;
// Sequential pages = true or if clip has been removed and it's the paper page
$sequential = false;
// Replace false with true to make the script work
$work = true;
// code is here.. nothing more to modify
// -------------------------------------
if ($work == true)
{
if ($sequential === false)
{
$zero = "0";
$files = glob($input . "*.[jJ][pP][gG]");
$count = count($files);
// $pagenum = 0;
$lowpage = 1;
$highpage = ($count * 2) - $offset;
foreach($files as $k=>$v)
{
$leadzeros = $offset;
if ($k % 2 == 0)
{
$odd = false;
}
else
{
$odd = true;
}
// leading zeros for pages prior the numbered page 1
if ($offset > $k)
{
$zeros = "";
if ($odd === false)
{
for ($i = 0; $leadzeros-$k > $i; $leadzeros--)
{
$zeros = $zeros . $zero;
}
$rightfilename = "page_0" . $zeros . "1";
$leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
else
{
for ($i = 0; $leadzeros-$k > $i; $leadzeros--)
{
$zeros = $zeros . $zero;
}
$leftfilename = "page_0" . $zeros . "1";
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
}
else
{
if ($odd === true)
{
// Right page = high, highpage - $offset
// left page = low, $k + lowpage
$leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT);
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
else
{
$rightfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT);
$leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
$lowpage++;
}
print "Processing $k => $v \n";
print "Saving $rightfilename \n";
print "Saving $leftfilename \n";
print "\n";
$highpage--;
$thumb = new Imagick();
$thumb->readImage($v);
$thumb->setImageFormat('jpeg');
$thumb->setImagecompression(imagick::COMPRESSION_JPEG);
$thumb->setImageCompressionQuality(90);
// Copy image so we can crop left and right sides
$left = new Imagick();
$left->readImage($v);
$left->setImageFormat('jpeg');
$left->setImagecompression(imagick::COMPRESSION_JPEG);
$left->setImageCompressionQuality(90);
// Get geometry of image
$geometry = $thumb->getImageGeometry();
$x = $geometry['width'];
$y = $geometry['height'];
$cropsize = round($x / 2);
$geox = $cropsize-1;
//print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n");
// Cropping left side
$left->setImagePage(0,0,0,0);
$left->cropImage($cropsize, $y, 0, 0);
// Cropping right side
$thumb->setImagePage(0,0,0,0);
$thumb->cropImage($cropsize, $y, $geox, 0);
if ($k == 0)
{
$max = ($count * 2) - 1;
//$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".jpg");
$left->writeImage($output . $leftfilename . ".jpg");
$thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb.jpg");
$thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb64.jpg");
}
else
{
//$leftpage = ($i * 2) - 1;
//$rightpage = ($i * 2);
//$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".jpg");
$left->writeImage($output . $leftfilename . ".jpg");
}
$thumb->clear();
$thumb->destroy();
$left->clear();
$left->destroy();
}
}
else
{
$zero = "0";
$files = glob($input . "*.[jJ][pP][gG]");
$count = count($files);
// $pagenum = 0;
$lowpage = 1;
$highpage = $lowpage + 1;
foreach($files as $k=>$v)
{
$leadzeros = $offset;
if ($k % 2 == 0)
{
$odd = false;
}
else
{
$odd = true;
}
// leading zeros for pages prior the numbered page 1
if ($offset > $k)
{
$zeros = "";
for ($i = 0; $leadzeros-$k > $i; $leadzeros--)
{
$zeros = $zeros . $zero;
}
$leftfilename = "page_0" . $zeros . "1";
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
else
{
$leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT);
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
$lowpage += 2;
$highpage += 2;
print "Processing $k => $v \n";
print "Saving $rightfilename \n";
print "Saving $leftfilename \n";
print "\n";
$thumb = new Imagick();
$thumb->readImage($v);
$thumb->setImageFormat('jpeg');
$thumb->setImagecompression(imagick::COMPRESSION_JPEG);
$thumb->setImageCompressionQuality(90);
// Copy image so we can crop left and right sides
$left = new Imagick();
$left->readImage($v);
$left->setImageFormat('jpeg');
$left->setImagecompression(imagick::COMPRESSION_JPEG);
$left->setImageCompressionQuality(90);
// Get geometry of image
$geometry = $thumb->getImageGeometry();
$x = $geometry['width'];
$y = $geometry['height'];
$cropsize = round($x / 2);
$geox = $cropsize-1;
//print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n");
// Cropping left side
$left->setImagePage(0,0,0,0);
$left->cropImage($cropsize, $y, 0, 0);
// Cropping right side
$thumb->setImagePage(0,0,0,0);
$thumb->cropImage($cropsize, $y, $geox, 0);
if ($k == 0)
{
$max = ($count * 2) - 1;
//$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".jpg");
$left->writeImage($output . $leftfilename . ".jpg");
$thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb.jpg");
$thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb64.jpg");
}
else
{
//$leftpage = ($i * 2) - 1;
//$rightpage = ($i * 2);
//$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".jpg");
$left->writeImage($output . $leftfilename . ".jpg");
}
$thumb->clear();
$thumb->destroy();
$left->clear();
$left->destroy();
}
}
}
?>

View File

@ -0,0 +1,17 @@
@echo off
setlocal EnableDelayedExpansion
set folder=%cd%
cd work\out
mkdir output
for %%i in (*.tif) DO (
convert %%i output\%%~ni.pdf
)
pdftk *.pdf cat output "%folder%\_manual.pdf"
del *.pdf /q
del "%folder%\_manual.pdf" /q
pause

View File

@ -0,0 +1,3 @@
InfoBegin
InfoKey: Author
InfoValue: Scanned and Processed by Tomse @ http://retro-commodore.eu

View File

@ -0,0 +1,3 @@
InfoBegin
InfoKey: Author
InfoValue: Scanned and Processed by Tomse @ http://retro-commodore.eu, thanks to Thomas Roenne Viborg

View File

@ -0,0 +1,3 @@
InfoBegin
InfoKey: Author
InfoValue: Processed by demolition/Laxity @ http://retro-commodore.eu

View File

@ -0,0 +1,29 @@
#!/bin/sh
START=$(date +%s)
SOURCE=$1
LANG=$2
PAGES=`/usr/local/libexec/xpdf/pdfinfo $SOURCE | grep -i pages | awk '{print $2}'` # set to the number of pages in the PDF
#SOURCE=pamphlet-low.pdf # set to the file name of the PDF
OUTPUT=$SOURCE
RESOLUTION=600 # set to the resolution the scanner used (the higher, the better)
#xpdf-pdfinfo pamphlet-low.pdf | grep Pages: | awk '{print $2}' | tail -n 1
touch $OUTPUT.txt
for i in `seq 1 $PAGES`; do
convert -density $RESOLUTION -depth 8 $SOURCE\[$(($i - 1 ))\] page$i.png
# tesseract page$i.tif >> $OUTPUT
tesseract page$i.png $OUTPUT$i -l $2
rm page$i.png
cat $OUTPUT$i.txt >> $OUTPUT.txt
rm $OUTPUT$i.txt
done
END=$(date +%s)
DIFF=$(echo "$END - $START" | bc)
echo $DIFF

View File

@ -0,0 +1,29 @@
#!/bin/sh
START=$(date +%s)
SOURCE=$1
LANG=$2
PAGES=`/usr/local/libexec/xpdf/pdfinfo $SOURCE | grep -i pages | awk '{print $2}'` # set to the number of pages in the PDF
#SOURCE=pamphlet-low.pdf # set to the file name of the PDF
OUTPUT=$SOURCE
RESOLUTION=600 # set to the resolution the scanner used (the higher, the better)
#xpdf-pdfinfo pamphlet-low.pdf | grep Pages: | awk '{print $2}' | tail -n 1
touch $OUTPUT.txt
for i in `seq 1 $PAGES`; do
convert -density $RESOLUTION -depth 8 $SOURCE\[$(($i - 1 ))\] page$i.png
# tesseract page$i.tif >> $OUTPUT
tesseract page$i.png $OUTPUT$i -l $2
rm page$i.png
cat $OUTPUT$i.txt >> $OUTPUT.txt
rm $OUTPUT$i.txt
done
END=$(date +%s)
DIFF=$(echo "$END - $START" | bc)
echo $DIFF

View File

@ -0,0 +1,7 @@
@echo off
mkdir pages
for %%i in (*.png) DO (
convert %%i -colors 2 -threshold 75%% +repage pages\%%i
)

View File

@ -0,0 +1,282 @@
<?php
/* This file is meant to run from the CLI/Shell
* it splits images on the middle, and saves individual pages
* It creates 240px + 64px wide thumbs of the first page right side
* Author Carsten Jensen aka Tomse @ http://awesome.commodore.me
* Copyright (C) 2013 Carsten Jensen
* This is distributed under GNU GPLv2
*/
/* Requirements:
* php (cli/shell version) / or as apache site
* imagemagick module
*/
/* Usage:
* Run the file from the browser (apache setup)
* or from cli : php -f split
* thumb files will be saved in same dir as pdf files
*/
// Edit the path here where your files to be split are stored.
$input = "./input/";
// Edit the path here where your result files will be ssaved.
$output = "./output/";
// Counting from 0 where 0 is the front page, to the first "page 1"
$offset = 0;
// Sequential pages = true or if clip has been removed and it's the paper page
$sequential = false;
// Replace false with true to make the script work
$work = true;
// code is here.. nothing more to modify
// -------------------------------------
if ($work == true)
{
if ($sequential === false)
{
$zero = "0";
$files = glob($input . "*.[pP][nN][gG]");
$count = count($files);
// $pagenum = 0;
$lowpage = 1;
$highpage = ($count * 2) - $offset;
foreach($files as $k=>$v)
{
$leadzeros = $offset;
if ($k % 2 == 0)
{
$odd = false;
}
else
{
$odd = true;
}
// leading zeros for pages prior the numbered page 1
if ($offset > $k)
{
$zeros = "";
if ($odd === false)
{
for ($i = 0; $leadzeros-$k > $i; $leadzeros--)
{
$zeros = $zeros . $zero;
}
$rightfilename = "page_0" . $zeros . "1";
$leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
else
{
for ($i = 0; $leadzeros-$k > $i; $leadzeros--)
{
$zeros = $zeros . $zero;
}
$leftfilename = "page_0" . $zeros . "1";
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
}
else
{
if ($odd === true)
{
// Right page = high, highpage - $offset
// left page = low, $k + lowpage
$leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT);
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
else
{
$rightfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT);
$leftfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
$lowpage++;
}
print "Processing $k => $v \n";
print "Saving $rightfilename \n";
print "Saving $leftfilename \n";
print "\n";
$highpage--;
$thumb = new Imagick();
$thumb->readImage($v);
$thumb->setImageFormat('png');
// $thumb->setImagecompression(imagick::COMPRESSION_JPEG);
// $thumb->setImageCompressionQuality(90);
// Copy image so we can crop left and right sides
$left = new Imagick();
$left->readImage($v);
$left->setImageFormat('png');
// $left->setImagecompression(imagick::COMPRESSION_JPEG);
// $left->setImageCompressionQuality(90);
// Get geometry of image
$geometry = $thumb->getImageGeometry();
$x = $geometry['width'];
$y = $geometry['height'];
$cropsize = round($x / 2);
$geox = $cropsize-1;
//print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n");
// Cropping left side
$left->setImagePage(0,0,0,0);
$left->cropImage($cropsize, $y, 0, 0);
// Cropping right side
$thumb->setImagePage(0,0,0,0);
$thumb->cropImage($cropsize, $y, $geox, 0);
if ($k == 0)
{
$max = ($count * 2) - 1;
//$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".png");
$left->writeImage($output . $leftfilename . ".png");
$thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb.jpg");
$thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb64.jpg");
}
else
{
//$leftpage = ($i * 2) - 1;
//$rightpage = ($i * 2);
//$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".png");
$left->writeImage($output . $leftfilename . ".png");
}
$thumb->clear();
$thumb->destroy();
$left->clear();
$left->destroy();
}
}
else
{
$zero = "0";
$files = glob($input . "*.[pP][nN][gG]");
$count = count($files);
// $pagenum = 0;
$lowpage = 1;
$highpage = $lowpage + 1;
foreach($files as $k=>$v)
{
$leadzeros = $offset;
if ($k % 2 == 0)
{
$odd = false;
}
else
{
$odd = true;
}
// leading zeros for pages prior the numbered page 1
if ($offset > $k)
{
$zeros = "";
for ($i = 0; $leadzeros-$k > $i; $leadzeros--)
{
$zeros = $zeros . $zero;
}
$leftfilename = "page_0" . $zeros . "1";
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
else
{
$leftfilename = "page_". str_pad($lowpage, strlen($count), 0, STR_PAD_LEFT);
$rightfilename = "page_". str_pad($highpage, strlen($count), 0, STR_PAD_LEFT);
}
$lowpage += 2;
$highpage += 2;
print "Processing $k => $v \n";
print "Saving $rightfilename \n";
print "Saving $leftfilename \n";
print "\n";
$thumb = new Imagick();
$thumb->readImage($v);
$thumb->setImageFormat('png');
// $thumb->setImagecompression(imagick::COMPRESSION_JPEG);
// $thumb->setImageCompressionQuality(90);
// Copy image so we can crop left and right sides
$left = new Imagick();
$left->readImage($v);
$left->setImageFormat('png');
// $left->setImagecompression(imagick::COMPRESSION_JPEG);
// $left->setImageCompressionQuality(90);
// Get geometry of image
$geometry = $thumb->getImageGeometry();
$x = $geometry['width'];
$y = $geometry['height'];
$cropsize = round($x / 2);
$geox = $cropsize-1;
//print ($cropsize . " " . $y . " " . $geox . " " . $x ."\n");
// Cropping left side
$left->setImagePage(0,0,0,0);
$left->cropImage($cropsize, $y, 0, 0);
// Cropping right side
$thumb->setImagePage(0,0,0,0);
$thumb->cropImage($cropsize, $y, $geox, 0);
if ($k == 0)
{
$max = ($count * 2) - 1;
//$rightfilename = "page_". str_pad($pagenum, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($max, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".png");
$left->writeImage($output . $leftfilename . ".png");
$thumb->resizeImage(240, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb.jpg");
$thumb->resizeImage(64, 1024, imagick::FILTER_LANCZOS, 1, true);
$thumb->writeImage($output . "_thumb64.jpg");
}
else
{
//$leftpage = ($i * 2) - 1;
//$rightpage = ($i * 2);
//$rightfilename = "page_". str_pad($rightpage, strlen($count), 0, STR_PAD_LEFT);
//$leftfilename = "page_". str_pad($leftpage, strlen($count), 0, STR_PAD_LEFT);
$thumb->writeImage($output . $rightfilename . ".png");
$left->writeImage($output . $leftfilename . ".png");
}
$thumb->clear();
$thumb->destroy();
$left->clear();
$left->destroy();
}
}
}
?>

View File

@ -0,0 +1,2 @@
php.exe -f correct_pages.php
pause

View File

@ -0,0 +1,40 @@
<?php
// Sorts images which has first been scanned face up,
// then scanned face down using ADF simplex scanning on double
// sided pages.
$files = glob("input/*.tif");
$count = count($files);
$split = $count / 2;
$outdir = "./output/";
$counter = 1;
$pagenum = 1;
@mkdir($outdir);
while ($counter <= $split)
{
echo $counter . " ";
print_r($files[$counter-1]);
copy($files[$counter-1], $outdir . str_pad($pagenum, 4, 0, STR_PAD_LEFT) . '.tif');
echo " " . $pagenum ."\r\n";
$counter++;
$pagenum += 2;
}
echo $counter;
$pagenum--;
while ($counter <= $count)
{
echo $counter . " ";
print_r($files[$counter-1]);
copy($files[$counter-1], $outdir . str_pad($pagenum, 4, 0, STR_PAD_LEFT) . '.tif');
echo " " . $pagenum ."\r\n";
$counter++;
$pagenum -= 2;
}

View File

@ -0,0 +1,31 @@
url: .pdf
title:
lang: English
type:
company:
text: .ocr.txt
author:
isbn:
issn: 0283-3115
date:
pagecount:
resolution: 600
contributor: Rave/Triad
# true/false
photocopy: false
#Overall Quality 1-5
quality: 4
#Replace when possible true/false
replace: false
#ocr: _ocr.pdf
#url
scener: https://csdb.dk/scener/?id=857
# Commodore Part number
partno:
# time spent in minutes
timespent:

31
ScanScripts/_template.txt Normal file
View File

@ -0,0 +1,31 @@
url: .pdf
title:
lang: English
type:
company:
text: .ocr.txt
author:
isbn:
date:
pagecount:
resolution: 600
contributor:
# true/false
photocopy: false
#Overall Quality 1-5
quality: 4
#Replace when possible true/false
replace: false
#url
scener:
# Commodore Part number
partno:
# time spent in minutes
timespent:
# who has done the post working, write name or nick here
postprocessor: