Add Layout Modeling

lolipopshock · lolipopshock · commit 688d10f829c0 · 2020-06-23T16:33:22.000-04:00
* Add Detectron2LayoutModel
* Add tests for Detectron2LayoutModel
* Update setup requirements
diff --git a/docs/api_doc/models.rst b/docs/api_doc/models.rst
@@ -0,0 +1,8 @@
+DL Layout Model
+================================
+
+
+.. autoclass:: layoutparser.models.Detectron2LayoutModel
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/index.rst b/docs/index.rst
@@ -20,6 +20,7 @@ Welcome to Layout Parser's documentation!
    api_doc/elements
    api_doc/ocr
    api_doc/visualization
+   api_doc/models
 
 Indices and tables
 ==================
diff --git a/setup.py b/setup.py
@@ -22,7 +22,11 @@
         "numpy", 
         "opencv-python",
         "pandas",
-        "pillow"
+        "pillow",
+        "pyyaml>=5.1",
+        "torch==1.4",
+        "torchvision==0.5",
+        "detectron2 @ git+https://github.com/facebookresearch/detectron2.git@v0.1.3#egg=detectron2"
       ],
       extras_require={
         "GCV": ['google-cloud-vision'], 
diff --git a/src/layoutparser/models.py b/src/layoutparser/models.py
@@ -0,0 +1,64 @@
+from abc import ABC, abstractmethod
+import os
+import torch
+from detectron2.config import get_cfg
+from detectron2.engine import DefaultPredictor
+from .elements import *
+
+
+class BaseLayoutModel(ABC):
+    
+    @abstractmethod
+    def detect(self): pass
+
+
+class Detectron2LayoutModel(BaseLayoutModel):
+
+    def __init__(self, config_name,
+                       model_path = None,
+                       label_map  = None,
+                       extra_config= []):
+
+        cfg = get_cfg()
+        cfg.merge_from_file(config_name)
+        cfg.merge_from_list(extra_config)
+        
+        if model_path is not None:
+            cfg.MODEL.WEIGHTS = model_path            
+        cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.cfg = cfg
+        
+        self.label_map = label_map
+        self._create_model()
+
+    def gather_output(self, outputs):
+
+        instance_pred = outputs['instances'].to("cpu")
+
+        layout = Layout()
+        scores = instance_pred.scores.tolist()
+        boxes  = instance_pred.pred_boxes.tensor.tolist()
+        labels = instance_pred.pred_classes.tolist()
+
+        for score, box, label in zip(scores, boxes, labels):
+            x_1, y_1, x_2, y_2 = box
+
+            if self.label_map is not None:
+                label = self.label_map[label]
+
+            cur_block = TextBlock(
+                    Rectangle(x_1, y_1, x_2, y_2),
+                    type=label, 
+                    score=score)
+            layout.append(cur_block)
+
+        return layout
+
+    def _create_model(self):
+        self.model = DefaultPredictor(self.cfg)
+
+    def detect(self, image):
+
+        outputs = self.model(image)
+        layout  = self.gather_output(outputs)
+        return layout
diff --git a/tests/source/config.yml b/tests/source/config.yml
diff --git a/tests/test_model.py b/tests/test_model.py