ai-forever
diff --git a/‎DPF/dataloaders/images/raw_dataset.py‎
Lines changed: 10 additions & 1 deletion b/‎DPF/dataloaders/images/raw_dataset.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎DPF/dataloaders/images/shards_dataset.py‎
Lines changed: 10 additions & 1 deletion b/‎DPF/dataloaders/images/shards_dataset.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎DPF/dataloaders/images/universal_dataloader.py‎
Lines changed: 3 additions & 0 deletions b/‎DPF/dataloaders/images/universal_dataloader.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎DPF/filters/images/ocr_filter.py‎
Lines changed: 148 additions & 0 deletions b/‎DPF/filters/images/ocr_filter.py‎
Lines changed: 148 additions & 0 deletions
diff --git a/‎DPF/filters/images/ocr_model/__init__.py‎ b/‎DPF/filters/images/ocr_model/__init__.py‎
diff --git a/‎DPF/filters/images/ocr_model/dataset.py‎
Lines changed: 100 additions & 0 deletions b/‎DPF/filters/images/ocr_model/dataset.py‎
Lines changed: 100 additions & 0 deletions
@@ -17,6 +17,7 @@ def __init__(
         df: pd.DataFrame,
         cols_to_return: Optional[List[str]] = None,
         preprocess_f=default_preprocess,
+        return_none_on_error: bool = False
     ):
         super(RawDataset).__init__()
         if cols_to_return is None:
@@ -25,6 +26,7 @@ def __init__(
         self.columns = ["image_path"] + cols_to_return
         self.data_to_iterate = df[self.columns].values
         self.preprocess_f = preprocess_f
+        self.return_none_on_error = return_none_on_error
 
     def __len__(self):
         return len(self.data_to_iterate)
@@ -34,5 +36,12 @@ def __getitem__(self, idx):
             self.columns[c]: item for c, item in enumerate(self.data_to_iterate[idx])
         }
         image_path = data["image_path"]
-        image_bytes = self.filesystem.read_file(image_path, binary=True).getvalue()
+        if self.return_none_on_error:
+            try:
+                image_bytes = self.filesystem.read_file(image_path, binary=True).getvalue()
+            except Exception as err:
+                img_bytes = None
+        else:
+            image_bytes = self.filesystem.read_file(image_path, binary=True).getvalue()
+
         return self.preprocess_f(image_bytes, data)
@@ -21,6 +21,7 @@ def __init__(
         df: pd.DataFrame,
         cols_to_return: Optional[List[str]] = None,
         preprocess_f=default_preprocess,
+        return_none_on_error: bool = False
     ):
         super(ShardsDataset).__init__()
         if cols_to_return is None:
@@ -32,6 +33,7 @@ def __init__(
         )
         self.total_samples = len(df)
         self.preprocess_f = preprocess_f
+        self.return_none_on_error = return_none_on_error
 
     def __len__(self):
         return self.total_samples
@@ -49,6 +51,13 @@ def __iter__(self):
             for data in data_all:
                 data = {self.columns[i]: item for i, item in enumerate(data)}
                 filename = os.path.basename(data["image_path"])
-                img_bytes = tar.extractfile(filename).read()
+                if self.return_none_on_error:
+                    try:
+                        img_bytes = tar.extractfile(filename).read()
+                    except Exception as err:
+                        img_bytes = None
+                else:
+                    img_bytes = tar.extractfile(filename).read()
+                    
                 yield self.preprocess_f(img_bytes, data)
             tar.close()
@@ -24,6 +24,7 @@ def __init__(
         df,
         cols_to_return=None,
         preprocess_f=default_preprocess,
+        return_none_on_error: bool = False,
         **dataloader_kwargs,
     ):
         if cols_to_return is None:
@@ -36,6 +37,7 @@ def __init__(
         ), "Unknown data format in dataloader"
         self.cols_to_return = cols_to_return
         self.preprocess_f = preprocess_f
+        self.return_none_on_error = return_none_on_error
         self.dataloader_kwargs = dataloader_kwargs
         self.len = None
 
@@ -47,6 +49,7 @@ def test(self):
                 self.df[self.df["data_format"] == data_format],
                 self.cols_to_return,
                 self.preprocess_f,
+                self.return_none_on_error
             )
             print(f'"{data_format}" dataset created')
             dataloader = DataLoader(dataset, **self.dataloader_kwargs)
 
@@ -0,0 +1,148 @@
+from typing import Optional
+import os
+import torch
+from torch import nn
+import numpy as np
+import json
+
+try:
+    from torch.utils.data.dataloader import default_collate
+except ImportError:
+    from torch.utils.data import default_collate
+from torchvision import models, transforms
+from huggingface_hub import hf_hub_url, cached_download
+
+from DPF.filters.utils import FP16Module, identical_collate_fn
+from DPF.utils import read_image_rgb_from_bytes
+from .img_filter import ImageFilter
+
+from .ocr_model.utils import AttnLabelConverter
+from .ocr_model.dataset import AlignCollate
+from .ocr_model.model import Model
+
+
+class Options:
+    pass
+
+
+class OCRFilter(ImageFilter):
+
+    def __init__(
+        self,
+        weights_path: str,
+        model_name: Optional[str] = None,
+        device: str = "cuda:0",
+        workers: int = 16,
+        pad: int = 5,
+        pbar: bool = True,
+    ):
+        super().__init__(pbar)
+
+        self.num_workers = workers
+        self.batch_size = 1
+        self.device = device
+
+        self.weights_path = weights_path
+        self.model_name = model_name or os.path.basename(self.weights_path).split('.')[0]
+        # load model
+        self.opt = Options()
+        self.opt.workers = 4
+        self.opt.batch_size = 192
+        self.opt.batch_max_length = 32
+        self.opt.imgH = 32
+        self.opt.imgW = 100
+        self.opt.rgb = False
+        self.opt.character = '0123456789abcdefghijklmnopqrstuvwxyz'
+        self.opt.sensitive = False
+        self.opt.PAD = False
+        self.opt.Transformation = "TPS"
+        self.opt.FeatureExtraction = "ResNet"
+        self.opt.SequenceModeling = "BiLSTM"
+        self.opt.Prediction = "Attn"
+        self.opt.num_fiducial = 20
+        self.opt.input_channel = 1
+        self.opt.output_channel = 512
+        self.opt.hidden_size = 256
+        
+        self.converter = AttnLabelConverter(self.opt.character)
+        self.opt.num_class = len(self.converter.character)
+        
+        self.model = Model(self.opt)
+        weights = torch.load(self.weights_path)
+        keys = list(weights.keys())
+        for key in keys:
+            weights[key.lstrip('module.')] = weights[key]
+            weights.pop(key)
+
+        self.model.load_state_dict(weights)
+        self.model.to(self.device)
+        self.model.eval()
+        
+        self.AlignCollate = AlignCollate(imgH=self.opt.imgH, imgW=self.opt.imgW, keep_ratio_with_pad=self.opt.PAD)
+        #
+        self.text_box_col = "text_boxes"
+        self.ocr_col = f"OCR_{self.model_name}"
+        
+        self.schema = ["image_path", self.ocr_col]
+        self.dataloader_kwargs = {
+            "num_workers": self.num_workers,
+            "batch_size": self.batch_size,
+            "preprocess_f": self.preprocess,
+            "collate_fn": lambda x: x,
+            "drop_last": False,
+            "cols_to_return": [self.text_box_col],
+        }
+
+    def preprocess(self, img_bytes: bytes, data: dict):
+        image_path = data["image_path"]
+        boxes = json.loads(data[self.text_box_col])
+        pil_img = read_image_rgb_from_bytes(img_bytes).convert('L')
+        return image_path, pil_img, boxes
+
+    def process_batch(self, batch) -> dict:
+        df_batch_labels = self._generate_dict_from_schema()
+        image_path, pil_img, boxes = batch[0]
+        w, h = pil_img.size
+        
+        input_data = []
+        for box in boxes:
+            left = max(box[0][0], 0)
+            upper = max(box[0][1], 0)
+            right = min(box[1][0], w)
+            lower = min(box[1][1], h)
+            if upper > lower:
+                upper, lower = lower, upper
+            if left > right:
+                left, right = right, left
+                
+            crop = pil_img.crop(
+                (left, upper, right, lower)
+            )
+            input_data.append((crop, ''))
+            
+        if len(input_data) == 0:
+            df_batch_labels[self.ocr_col].append("[]")
+            df_batch_labels["image_path"].append(image_path)
+            return df_batch_labels
+        
+        data_preproc = self.AlignCollate(input_data)
+        image_tensors = data_preproc[0]
+        
+        batch_size = image_tensors.size(0)
+        image = image_tensors.to(self.device)
+        length_for_pred = torch.IntTensor([self.opt.batch_max_length] * batch_size).to(self.device)
+        text_for_pred = torch.LongTensor(batch_size, self.opt.batch_max_length + 1).fill_(0).to(self.device)
+
+        preds = self.model(image, text_for_pred, is_train=False)
+        _, preds_index = preds.max(2)
+        preds_str = self.converter.decode(preds_index, length_for_pred)
+        preds_str = [s.replace('[s]', '') for s in preds_str]
+        
+        res = []
+        for box, prediction in zip(boxes, preds_str):
+            res.append((box, prediction))
+            
+        df_batch_labels[self.ocr_col].append(json.dumps(res))
+        df_batch_labels["image_path"].append(image_path)
+
+        return df_batch_labels
@@ -0,0 +1,100 @@
+import os
+import sys
+import re
+import six
+import math
+import lmdb
+import torch
+
+from natsort import natsorted
+from PIL import Image
+import numpy as np
+from torch.utils.data import Dataset, ConcatDataset, Subset
+from torch._utils import _accumulate
+import torchvision.transforms as transforms
+
+
+class ResizeNormalize(object):
+
+    def __init__(self, size, interpolation=Image.BICUBIC):
+        self.size = size
+        self.interpolation = interpolation
+        self.toTensor = transforms.ToTensor()
+
+    def __call__(self, img):
+        img = img.resize(self.size, self.interpolation)
+        img = self.toTensor(img)
+        img.sub_(0.5).div_(0.5)
+        return img
+
+
+class NormalizePAD(object):
+
+    def __init__(self, max_size, PAD_type='right'):
+        self.toTensor = transforms.ToTensor()
+        self.max_size = max_size
+        self.max_width_half = math.floor(max_size[2] / 2)
+        self.PAD_type = PAD_type
+
+    def __call__(self, img):
+        img = self.toTensor(img)
+        img.sub_(0.5).div_(0.5)
+        c, h, w = img.size()
+        Pad_img = torch.FloatTensor(*self.max_size).fill_(0)
+        Pad_img[:, :, :w] = img  # right pad
+        if self.max_size[2] != w:  # add border Pad
+            Pad_img[:, :, w:] = img[:, :, w - 1].unsqueeze(2).expand(c, h, self.max_size[2] - w)
+
+        return Pad_img
+
+
+class AlignCollate(object):
+
+    def __init__(self, imgH=32, imgW=100, keep_ratio_with_pad=False):
+        self.imgH = imgH
+        self.imgW = imgW
+        self.keep_ratio_with_pad = keep_ratio_with_pad
+
+    def __call__(self, batch):
+        batch = filter(lambda x: x is not None, batch)
+        images, labels = zip(*batch)
+
+        if self.keep_ratio_with_pad:  # same concept with 'Rosetta' paper
+            resized_max_w = self.imgW
+            input_channel = 3 if images[0].mode == 'RGB' else 1
+            transform = NormalizePAD((input_channel, self.imgH, resized_max_w))
+
+            resized_images = []
+            for image in images:
+                w, h = image.size
+                ratio = w / float(h)
+                if math.ceil(self.imgH * ratio) > self.imgW:
+                    resized_w = self.imgW
+                else:
+                    resized_w = math.ceil(self.imgH * ratio)
+
+                resized_image = image.resize((resized_w, self.imgH), Image.BICUBIC)
+                resized_images.append(transform(resized_image))
+                # resized_image.save('./image_test/%d_test.jpg' % w)
+
+            image_tensors = torch.cat([t.unsqueeze(0) for t in resized_images], 0)
+
+        else:
+            transform = ResizeNormalize((self.imgW, self.imgH))
+            image_tensors = [transform(image) for image in images]
+            image_tensors = torch.cat([t.unsqueeze(0) for t in image_tensors], 0)
+
+        return image_tensors, labels
+
+
+def tensor2im(image_tensor, imtype=np.uint8):
+    image_numpy = image_tensor.cpu().float().numpy()
+    if image_numpy.shape[0] == 1:
+        image_numpy = np.tile(image_numpy, (3, 1, 1))
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
+    return image_numpy.astype(imtype)
+
+
+def save_image(image_numpy, image_path):
+    image_pil = Image.fromarray(image_numpy)
+    image_pil.save(image_path)