Skip to content

Commit b9db922

Browse files
Add: PDf to DOCX conversion function
1 parent a3e4272 commit b9db922

3 files changed

Lines changed: 93 additions & 0 deletions

File tree

convertTodocx.ps1

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
param (
2+
[string]$inputPath,
3+
[string]$outputPath,
4+
[string]$keepActive
5+
)
6+
7+
# Convert $keepActive to a boolean
8+
$keepActiveBoolean = $keepActive -eq 'true'
9+
10+
# Check if the input file exists
11+
if (-not (Test-Path $inputPath)) {
12+
Write-Error "Input file does not exist: $inputPath"
13+
exit 1
14+
}
15+
16+
# Add COM object for Microsoft Word
17+
Add-Type -AssemblyName "Microsoft.Office.Interop.Word"
18+
19+
# Create a new Word application instance
20+
$word = New-Object -ComObject Word.Application
21+
22+
# Set visibility of the Word application
23+
if (-not $keepActiveBoolean) {
24+
$word.Visible = $false
25+
}
26+
27+
try {
28+
# Open the PDF document
29+
$document = $word.Documents.Open($inputPath)
30+
31+
# Save the document as DOCX
32+
$document.SaveAs([ref]$outputPath, [ref]16) # 16 represents the default DOCX format
33+
34+
# Close the document
35+
$document.Close()
36+
37+
Write-Host "Conversion completed: $inputPath to $outputPath"
38+
} catch {
39+
Write-Error "An error occurred during conversion: $_"
40+
} finally {
41+
# Quit the Word application if keepActive is false
42+
if (-not $keepActiveBoolean) {
43+
$word.Quit()
44+
}
45+
}

convertTodocx.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
3+
inputFilePath=$1
4+
outputFilePath=$2
5+
keepActive=$3
6+
7+
# Check if input file exists
8+
if [ ! -f "$inputFilePath" ]; then
9+
echo "Input file does not exist: $inputFilePath"
10+
exit 1
11+
fi
12+
13+
# Extract text from PDF using pdftotext (requires Poppler installed)
14+
tempTextFilePath="$(mktemp).txt"
15+
pdftotext -layout "$inputFilePath" "$tempTextFilePath"
16+
17+
if [ ! -f "$tempTextFilePath" ]; then
18+
echo "Failed to extract text from PDF: $inputFilePath"
19+
exit 1
20+
fi
21+
22+
# Convert extracted text to DOCX using pandoc (requires pandoc installed)
23+
if [ -z "$outputFilePath" ]; then
24+
outputFilePath="${inputFilePath%.pdf}.docx"
25+
fi
26+
27+
pandoc "$tempTextFilePath" -o "$outputFilePath"
28+
29+
# Clean up temporary text file
30+
rm "$tempTextFilePath"
31+
echo "Conversion complete: $outputFilePath"

index.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@ function windows(inputPath, outputPath, keepActive) {
2222

2323
execSync(command);
2424
}
25+
26+
// New PDF to DOCX conversion functions for Windows and macOS hell yeahhh
27+
function windowsPdfToDocx(inputPath, outputPath, keepActive = false) {
28+
if (!inputPath) {
29+
console.error('Input path is not provided.');
30+
return;
31+
}
32+
33+
const scriptPath = path.resolve(__dirname, 'convertTodocx.ps1');
34+
const inputFilePath = path.resolve(inputPath);
35+
const outputFilePath = path.resolve(outputPath);
36+
37+
const command = `powershell -File "${scriptPath}" "${inputFilePath}" "${outputFilePath}" ${keepActive ? 'true' : 'false'}`;
38+
39+
execSync(command);
40+
}
2541

2642

2743
/**
@@ -111,6 +127,7 @@ module.exports = {
111127
convert,
112128
resolvePaths,
113129
windows,
130+
windowsPdfToDocx,
114131
macos,
115132
packageVersion,
116133
};

0 commit comments

Comments
 (0)