(view source code of pages2txt.bat as plain text)
@ECHO OFF
:: Check command lineIF "%~1"=="" GOTO Syntax
IF NOT "%~3"=="" GOTO Syntax
ECHO.%* | FIND "?" >NUL && GOTO Syntax
IF /I NOT "%~x1"==".pages" GOTO Syntax
IF NOT EXIST "%~1" (
ECHO ←[1;31mFile not found: "%~1"←[0m
GOTO Syntax
):: Make %TEMP% the working directoryPUSHD "%TEMP%"
:: Check if files already existIF EXIST preview.jpg (
ECHO ←[1;33mFile preview.jpg already exists.
CHOICE.EXE /D N /T 10 /M "Do you want to delete it?←[0;30m"
IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 (
ECHO ←[0m
DEL preview.jpg
) ELSE (
ECHO ←[1;33mPlease move or rename preview.jpg and try again.←[0m
POPD
EXIT /B 1
))IF EXIST "%~dpn1.txt" (
ECHO ←[1;33mFile "%~n1.txt" already exists.
CHOICE.EXE /D N /T 10 /M "Do you want to delete it?←[0;30m"
IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 (
ECHO ←[0m
DEL "%~dpn1.txt"
) ELSE (
ECHO ←[1;33mPlease move or rename "%~n1.txt" and try again.←[0m
POPD
EXIT /B 1
)):: Extract preview.jpg from .pages fileFOR /F "tokens=*" %%A IN ('DIR /AD /B "%ProgramFiles%\7*"') DO (
FOR /F "tokens=*" %%B IN ('DIR /B /S "%ProgramFiles%\%%~A\7z.exe"') DO (
"%%~B" e "%~f1" preview.jpg
))IF NOT EXIST preview.jpg (
ECHO ←[1;33mThis batch file requires 7zip, available at←[0m
ECHO ←[1mhttps://7-zip.org/←[1;33m
CHOICE /D N /T 10 /M "Do you want to download it?←[0;30m"
IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 (
START "" https://7-zip.org/
)ECHO ←[0m
POPD
EXIT /B 1
):: Perform OCR on extracted preview.jpg and save it with same name as specified input file and .txt extensionFOR /F "tokens=*" %%A IN ('DIR /AD /B "%ProgramFiles%\tesseract*"') DO (
REM Check if language code is specified, and if it is validIF NOT "%~2"=="" (
IF NOT EXIST "%ProgramFiles%\%%~A\tessdata\%~2.*data*" (
ECHO ←[1;31mUnsupported Tesseract language code: "%~2"
FOR /F %%B IN ('DIR /B "%ProgramFiles%\%%~A\tessdata\???.*data*" ^| FIND.EXE /C "data"') DO (
IF %%B GTR 1 (
ECHO ←[0mUse one of the following language codes:
FOR %%C IN ("%ProgramFiles%\%%~A\tessdata\???.*data*") DO (
IF /I NOT "%%~nC"=="osd" (
SET /P "=←[1;32m%%~nC←[0m, " < NUL
) )SET /P "=or omit the language code to use the default (←[1;32meng←[0m)" < NUL
) )ECHO ←[0m
POPD
EXIT /B 1
) )FOR /F "tokens=*" %%B IN ('DIR /B /S "%ProgramFiles%\%%~A\tesseract.exe"') DO (
IF "%~2"=="" (
"%%~B" preview.jpg "%~dpn1" -l eng
) ELSE (
"%%~B" preview.jpg "%~dpn1" -l %~2
) ))IF NOT EXIST "%~dpn1.txt" (
ECHO ←[1;33mThis batch file requires Tesseract OCR, available at←[0m
ECHO ←[1mhttps://github.com/UB-Mannheim/tesseract/wiki←[1;33m
CHOICE /D N /T 10 /M "Do you want to download it?←[0;30m"
IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 (
START "" https://github.com/UB-Mannheim/tesseract/wiki
)ECHO ←[0m
POPD
EXIT /B 1
)ECHO ←[1;32mExtracted text successfully saved as "%~dpn1.txt"←[0m
:: Delete temporary fileDEL preview.jpg
:: Open extracted text in Word, if availableIF EXIST "%ProgramFiles%\Microsoft Office\" (
FOR /F "tokens=*" %%A IN ('DIR /B /S "%ProgramFiles%\Microsoft Office\winword.exe"') DO (
START "" "%%~A" /t "%~dpn1.txt"
)):: Restore working directoryPOPD
:: DoneEXIT /B 0
:SyntaxECHO.
ECHO %~nx0, Version 1.00
ECHO Use OCR to extract text from a *.pages document.
ECHO.
ECHO Usage: ←[1;33m%~nx0 file.pages [ languagecode ]←[0m
ECHO.
ECHO Where: ←[1;33mfile.pages←[0m *.pages file from which text is to be extracted
ECHO ←[1;33mlanguagecode←[0m optional Tesseract 3 letter language code (default: eng)
ECHO.
ECHO Notes: This program requires 7-zip as well as Tesseract OCR.
ECHO The extracted text will be saved as plain text in the .pages file's
ECHO parent folder, using the specified file's name, and .txt extension.
ECHO If the specified file name contains multiple dots, the output file
ECHO name will be truncated at the first dot. If the output file already
ECHO exists, you will be prompted to delete it or abort.
ECHO If an invalid language code is specified, the batch file will abort
ECHO after showing a list of available language codes.
ECHO A temporary file preview.jpg will be created. If it already exists,
ECHO you will be prompted to delete it or abort.
ECHO If MS Word is available, the extracted text will be opened in Word.
ECHO The batch file's return code ("Errorlevel") will equal 0 if the
ECHO specified file was successfully converted, otherwise it will equal 1.
ECHO.
ECHO Written by Rob van der Woude
ECHO https://www.robvanderwoude.com
EXIT /B 1
page last modified: 2025-10-11; loaded in 0.0091 seconds