Skip to content

Commit 6b5901e

Browse files
committed
Make the tests portable:
This patch addresses two problems with tests - Various tests rely on Dir.glob ordering. This is not reliable; this patch introduces assert_directory_contains to avoid Dir.glob ordering inconsistencies. - test_ocr_extraction relies on exact text match from tesseract. However, this differs with each version of tesseract. This patch instead checks that all required txt files exist and that they have reasonable size.
1 parent 18447cd commit 6b5901e

7 files changed

Lines changed: 17 additions & 122 deletions

File tree

test/fixtures/corrosion/corrosion_1.txt

Lines changed: 0 additions & 26 deletions
This file was deleted.

test/fixtures/corrosion/corrosion_2.txt

Lines changed: 0 additions & 41 deletions
This file was deleted.

test/fixtures/corrosion/corrosion_3.txt

Lines changed: 0 additions & 29 deletions
This file was deleted.

test/fixtures/corrosion/corrosion_4.txt

Lines changed: 0 additions & 18 deletions
This file was deleted.

test/test_helper.rb

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,13 @@ def teardown
1414
clear_output
1515
end
1616

17-
end
17+
def assert_directory_contains(dir, files)
18+
files_in_directory = Dir["#{dir}/*"]
19+
if files.kind_of?(Array)
20+
assert files_in_directory.length == files.length, "Expected directory to contain exactly #{files.length} files"
21+
else
22+
files = [files]
23+
end
24+
files.each { |f| assert files_in_directory.include?(File.join(dir, f)), "Expected directory #{dir} to contain file #{f}, but it contains #{files_in_directory.inspect}" }
25+
end
26+
end

test/unit/test_extract_images.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ class ExtractImagesTest < Test::Unit::TestCase
44

55
def test_basic_image_extraction
66
Docsplit.extract_images('test/fixtures/obama_arts.pdf', :format => :gif, :size => "250x", :output => OUTPUT)
7-
assert Dir["#{OUTPUT}/*"] == ['test/output/obama_arts_1.gif', 'test/output/obama_arts_2.gif']
7+
assert_directory_contains(OUTPUT, ['obama_arts_1.gif', 'obama_arts_2.gif'])
88
end
99

1010
def test_image_formatting
@@ -36,14 +36,15 @@ def test_password_protected_extraction
3636

3737
def test_repeated_extraction_in_the_same_directory
3838
Docsplit.extract_images('test/fixtures/obama_arts.pdf', :format => :gif, :size => "250x", :output => OUTPUT)
39-
assert Dir["#{OUTPUT}/*"] == ['test/output/obama_arts_1.gif', 'test/output/obama_arts_2.gif']
39+
assert_directory_contains(OUTPUT, ['obama_arts_1.gif', 'obama_arts_2.gif'])
4040
Docsplit.extract_images('test/fixtures/obama_arts.pdf', :format => :gif, :size => "250x", :output => OUTPUT)
41-
assert Dir["#{OUTPUT}/*"] == ['test/output/obama_arts_1.gif', 'test/output/obama_arts_2.gif']
41+
assert_directory_contains(OUTPUT, ['obama_arts_1.gif', 'obama_arts_2.gif'])
4242
end
4343

4444
def test_name_escaping_while_extracting_images
4545
Docsplit.extract_images('test/fixtures/PDF file with spaces \'single\' and "double quotes".pdf', :format => :gif, :size => "250x", :output => OUTPUT)
46-
assert Dir["#{OUTPUT}/*"] == ['test/output/PDF file with spaces \'single\' and "double quotes"_1.gif', 'test/output/PDF file with spaces \'single\' and "double quotes"_2.gif']
46+
assert_directory_contains(OUTPUT, ['PDF file with spaces \'single\' and "double quotes"_1.gif',
47+
'PDF file with spaces \'single\' and "double quotes"_1.gif'])
4748
end
4849

4950
end

test/unit/test_extract_text.rb

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,10 @@ def test_unicode_extraction
3030

3131
def test_ocr_extraction
3232
Docsplit.extract_text('test/fixtures/corrosion.pdf', :pages => 'all', :output => OUTPUT)
33-
assert Dir["#{OUTPUT}/*.txt"].length == 4
3433
4.times do |i|
3534
file = "corrosion_#{i + 1}.txt"
36-
# File.open("test/fixtures/corrosion/#{file}", "w+") {|f| f.write(File.read("#{OUTPUT}/#{file}")) }
37-
assert File.read("#{OUTPUT}/#{file}") == File.read("test/fixtures/corrosion/#{file}")
35+
assert_directory_contains(OUTPUT, file)
36+
assert File.read(File.join(OUTPUT, file)).size > 1, "Expected that file with extracted text should have reasonable size"
3837
end
3938
end
4039

0 commit comments

Comments
 (0)