mindee · ianardee · Jun 22, 2026 · Jun 22, 2026
diff --git a/mindee/image/__init__.py b/mindee/image/__init__.py
@@ -1,3 +0,0 @@
-from mindee.image.image_compressor import compress_image
-
-__all__ = ["compress_image"]

diff --git a/mindee/image/extracted_image.py b/mindee/image/extracted_image.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-import io
 from pathlib import Path
-from typing import Any
+from typing import Any, BinaryIO
 
 from mindee.dependencies.checkers import PILLOW_AVAILABLE
 from mindee.dependencies.decorators import requires_pillow
 from mindee.error.mindee_error import MindeeError
-from mindee.input.file_input import FileInput
-from mindee.input.local_input_source import LocalInputSource
+from mindee.input.bytes_input import BytesInput
 from mindee.logger import logger
 
 if PILLOW_AVAILABLE:
@@ -21,6 +19,7 @@
 class ExtractedImage:
     """Generic class for image extraction."""
 
+    buffer: BinaryIO
     _page_id: int
     """Id of the page the image was extracted from."""
     _element_id: int
@@ -29,70 +28,72 @@ class ExtractedImage:
     """Name of the file the image was extracted from."""
 
     def __init__(
-        self, input_source: LocalInputSource, page_id: int, element_id: int
+        self,
+        img_byte_stream: BinaryIO,
+        orig_filename: str,
+        orig_extension: str,
+        page_id: int,
+        element_id: int,
     ) -> None:
         """
         Initialize the ExtractedImage with a buffer and an internal file name.
 
-        :param input_source: Local source for input.
+        :param img_byte_stream: The raw image bytes.
+        :param orig_filename: Name of the file the image was extracted from.
         :param page_id: ID of the page the element was found on.
         :param element_id: ID of the element in a page.
         """
-        self.buffer = io.BytesIO(input_source.file_object.read())
-        self.buffer.name = input_source.filename
-        self.filename = input_source.filename
-        if input_source.is_pdf():
+        self.buffer = img_byte_stream
+        self.filename = orig_filename
+
+        if orig_extension.lower().endswith("pdf"):
             extension = "jpg"
         else:
-            extension = Path(input_source.filename).resolve().suffix
+            extension = orig_extension.lower()
         self.buffer.seek(0)
         pg_number = str(page_id).zfill(3)
         elem_number = str(element_id).zfill(3)
         self.internal_file_name = (
-            f"{input_source.filename}_page{pg_number}-{elem_number}.{extension}"
+            f"{orig_filename}_page{pg_number}-{elem_number}.{extension}"
         )
         self._page_id = page_id
         self._element_id = 0 if element_id is None else element_id
 
     @requires_pillow
-    def save_to_file(self, output_path: Path | str, file_format: str | None = None):
+    def save_to_file(self, output_path: Path | str):
         """
         Saves the document to a file.
 
         :param output_path: Path to save the file to.
-        :param file_format: Optional PIL-compatible format for the file. Inferred from file extension if not provided.
         :raises MindeeError: If an invalid path or filename is provided.
         """
         try:
             resolved_path = Path(output_path).resolve()
-            if not file_format and len(resolved_path.suffix) < 1:
+            if not len(resolved_path.suffix) < 1:
                 raise ValueError("Invalid file format.")
             self.buffer.seek(0)
             image = Image.open(self.buffer)
-            if file_format:
-                image.save(resolved_path, format=file_format)
-            else:
-                image.save(resolved_path)
+            image.save(resolved_path)
             logger.info("File saved successfully to '%s'.", resolved_path)
         except TypeError as e:
             raise MindeeError("Invalid path/filename provided.") from e
         except Exception as e:
             print(e)
             raise MindeeError(f"Could not save file {Path(output_path).name}.") from e
 
-    def as_input_source(self) -> FileInput:
+    def as_input_source(self) -> BytesInput:
         """
         Return the file as a Mindee-compatible BufferInput source.
 
         :returns: A BufferInput source.
         """
         self.buffer.seek(0)
-        return FileInput(self.buffer)
+        return BytesInput(self.buffer.read(), self.internal_file_name)
 
     @property
     def page_id(self):
         """
-        ID of the page the receipt was found on.
+        ID of the page the image was found on.
 
         :return: A valid page ID.
         """

diff --git a/mindee/image/extracted_images.py b/mindee/image/extracted_images.py
@@ -0,0 +1,5 @@
+from mindee.image.extracted_image import ExtractedImage
+
+
+class ExtractedImages(list[ExtractedImage]):
+    """List of extracted images."""
diff --git a/mindee/image/image_extractor.py b/mindee/image/image_extractor.py
@@ -10,7 +10,6 @@
 from mindee.geometry.point import Point
 from mindee.geometry.polygon import Polygon, get_min_max_x, get_min_max_y
 from mindee.image.extracted_image import ExtractedImage
-from mindee.input.bytes_input import BytesInput
 from mindee.input.local_input_source import LocalInputSource
 
 if PYPDFIUM2_AVAILABLE:
@@ -66,7 +65,7 @@ def extract_image_from_polygon(
     width: float,
     height: float,
     file_format: str,
-) -> bytes:
+) -> BinaryIO:
     """
     Crops the image from the given polygon.
 
@@ -91,7 +90,7 @@ def extract_image_from_polygon(
 
 
 @requires_pillow
-def save_image_to_buffer(image: Image.Image, file_format: str) -> bytes:
+def save_image_to_buffer(image: Image.Image, file_format: str) -> BinaryIO:
     """
     Saves an image as a buffer.
 
@@ -102,7 +101,7 @@ def save_image_to_buffer(image: Image.Image, file_format: str) -> bytes:
     buffer = io.BytesIO()
     image.save(buffer, format=file_format)
     buffer.seek(0)
-    return buffer.read()
+    return buffer
 
 
 @requires_pillow
@@ -159,10 +158,9 @@ def extract_multiple_images_from_source(
         )
         extracted_elements.append(
             ExtractedImage(
-                BytesInput(
-                    image_data,
-                    f"{input_source.filename}_page{page_id + 1}-{element_id}.{file_extension}",
-                ),
+                image_data,
+                input_source.filename,
+                file_extension,
                 page_id,
                 element_id,
             )

diff --git a/mindee/input/local_input_source.py b/mindee/input/local_input_source.py
@@ -10,7 +10,7 @@
 from mindee.dependencies.checkers import PYPDFIUM2_AVAILABLE
 from mindee.error.mimetype_error import MimeTypeError
 from mindee.error.mindee_error import MindeeError, MindeeSourceError
-from mindee.image import compress_image
+from mindee.image.image_compressor import compress_image
 from mindee.input.page_options import KEEP_ONLY, REMOVE, PageOptions
 from mindee.logger import logger
 from mindee.pdf.pdf_compressor import compress_pdf

diff --git a/mindee/pdf/extracted_pdf.py b/mindee/pdf/extracted_pdf.py
@@ -18,18 +18,18 @@
 class ExtractedPDF:
     """An extracted sub-Pdf."""
 
-    pdf_bytes: BinaryIO
+    buffer: BinaryIO
     filename: str
 
-    def __init__(self, pdf_bytes: BinaryIO, filename: str):
-        self.pdf_bytes = pdf_bytes
+    def __init__(self, pdf_byte_stream: BinaryIO, filename: str):
+        self.buffer = pdf_byte_stream
         self.filename = filename
 
     @requires_pypdfium2
     def get_page_count(self) -> int:
         """Get the number of pages in the PDF file."""
         try:
-            pdf = pdfium.PdfDocument(self.pdf_bytes)
+            pdf = pdfium.PdfDocument(self.buffer)
             return len(pdf)
         except Exception as e:
             raise MindeeError(
@@ -50,11 +50,11 @@ def save_to_file(self, output_path: Path | str):
             raise MindeeError("Invalid save path provided {}.")
         if out_path.suffix.lower() != "pdf":
             out_path = out_path.parent / (out_path.stem + "." + "pdf")
-        self.pdf_bytes.seek(0)
+        self.buffer.seek(0)
         with open(out_path, "wb") as out_file:
-            out_file.write(self.pdf_bytes.read())
+            out_file.write(self.buffer.read())
 
     def as_input_source(self) -> BytesInput:
         """Returns the current PDF object as a usable BytesInput source."""
-        self.pdf_bytes.seek(0)
-        return BytesInput(self.pdf_bytes.read(), self.filename)
+        self.buffer.seek(0)
+        return BytesInput(self.buffer.read(), self.filename)
diff --git a/mindee/pdf/extracted_pdfs.py b/mindee/pdf/extracted_pdfs.py
@@ -0,0 +1,5 @@
+from mindee.pdf.extracted_pdf import ExtractedPDF
+
+
+class ExtractedPDFs(list[ExtractedPDF]):
+    """List of extracted PDFs."""
diff --git a/mindee/v2/file_operations/crop.py b/mindee/v2/file_operations/crop.py
@@ -1,9 +1,9 @@
 from mindee.error import MindeeError
 from mindee.geometry import Point, Polygon
 from mindee.image.extracted_image import ExtractedImage
+from mindee.image.extracted_images import ExtractedImages
 from mindee.image.image_extractor import extract_multiple_images_from_source
 from mindee.input.local_input_source import LocalInputSource
-from mindee.v2.file_operations.crop_files import CropFiles
 from mindee.v2.parsing.inference.field import FieldLocation
 from mindee.v2.product.crop.crop_item import CropItem
 
@@ -25,7 +25,7 @@ def extract_single_crop(
 
 def extract_multiple_crops(
     input_source: LocalInputSource, crops: list[CropItem]
-) -> CropFiles:
+) -> ExtractedImages:
     """
     Extracts individual receipts from multi-receipts documents.
 
@@ -49,4 +49,4 @@ def extract_multiple_crops(
                 polygon,
             )
         )
-    return CropFiles(images)
+    return ExtractedImages(images)
diff --git a/mindee/v2/file_operations/crop_files.py b/mindee/v2/file_operations/crop_files.py
diff --git a/mindee/v2/file_operations/split.py b/mindee/v2/file_operations/split.py
@@ -1,8 +1,8 @@
 from mindee.error import MindeeError
 from mindee.input.local_input_source import LocalInputSource
 from mindee.pdf.extracted_pdf import ExtractedPDF
+from mindee.pdf.extracted_pdfs import ExtractedPDFs
 from mindee.pdf.pdf_extractor import PDFExtractor
-from mindee.v2.file_operations.split_files import SplitFiles
 
 
 def extract_single_split(
@@ -21,7 +21,7 @@ def extract_single_split(
 def extract_multiple_splits(
     input_source: LocalInputSource,
     splits: list[list[int]],
-) -> SplitFiles:
+) -> ExtractedPDFs:
     """
     Extracts splits as complete PDFs from the document.
 
@@ -35,4 +35,4 @@ def extract_multiple_splits(
         page_groups.append(list(range(split[0], split[1] + 1)))
     if len(splits) < 1:
         raise MindeeError("No indexes provided.")
-    return SplitFiles(pdf_extractor.extract_sub_documents(page_groups))
+    return ExtractedPDFs(pdf_extractor.extract_sub_documents(page_groups))
diff --git a/mindee/v2/file_operations/split_files.py b/mindee/v2/file_operations/split_files.py
diff --git a/mindee/v2/product/crop/crop_result.py b/mindee/v2/product/crop/crop_result.py
@@ -1,7 +1,7 @@
+from mindee.image.extracted_images import ExtractedImages
 from mindee.input.local_input_source import LocalInputSource
 from mindee.parsing.common.string_dict import StringDict
 from mindee.v2.file_operations.crop import extract_multiple_crops
-from mindee.v2.file_operations.crop_files import CropFiles
 from mindee.v2.product.crop.crop_item import CropItem
 
 
@@ -20,7 +20,9 @@ def __str__(self) -> str:
         out_str = f"Crops\n====={crops}"
         return out_str
 
-    def extract_from_input_source(self, input_source: LocalInputSource) -> CropFiles:
+    def extract_from_input_source(
+        self, input_source: LocalInputSource
+    ) -> ExtractedImages:
         """
         Apply all the crops to a file and return a single extracted PDF.
 

diff --git a/mindee/v2/product/split/split_result.py b/mindee/v2/product/split/split_result.py
@@ -1,7 +1,7 @@
 from mindee.input.local_input_source import LocalInputSource
 from mindee.parsing.common.string_dict import StringDict
+from mindee.pdf.extracted_pdfs import ExtractedPDFs
 from mindee.v2.file_operations.split import extract_multiple_splits
-from mindee.v2.file_operations.split_files import SplitFiles
 from mindee.v2.product.split.split_range import SplitRange
 
 
@@ -20,7 +20,9 @@ def __str__(self) -> str:
         out_str = f"Splits\n======{splits}"
         return out_str
 
-    def extract_from_input_source(self, input_source: LocalInputSource) -> SplitFiles:
+    def extract_from_input_source(
+        self, input_source: LocalInputSource
+    ) -> ExtractedPDFs:
         """
         Apply all the crops to a file and return a single extracted PDF.
 

diff --git a/tests/input/test_compression.py b/tests/input/test_compression.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from mindee.image import compress_image
+from mindee.image.image_compressor import compress_image
 from mindee.input import PathInput
 from mindee.pdf.pdf_compressor import compress_pdf
 from mindee.pdf.pdf_utils import extract_text_from_pdf

diff --git a/tests/v1/extraction/test_invoice_splitter_auto_extraction.py b/tests/v1/extraction/test_invoice_splitter_auto_extraction.py
@@ -53,9 +53,7 @@ def test_pdf_should_extract_invoices_strict():
     )
     for i, extracted_pdf in enumerate(extracted_base_pdfs):
         assert extracted_pdf.filename == extracted_pdfs_strict[i].filename
-        assert (
-            extracted_pdf.pdf_bytes.read() == extracted_pdfs_strict[i].pdf_bytes.read()
-        )
+        assert extracted_pdf.buffer.read() == extracted_pdfs_strict[i].buffer.read()
 
     assert len(extracted_pdfs_not_strict) == 2
     assert extracted_pdfs_not_strict[0].filename == "default_sample_001-001.pdf"
Original file line number	Diff line number	Diff line change
		@@ -1,3 +0,0 @@
		from mindee.image.image_compressor import compress_image

		__all__ = ["compress_image"]