Source code for stringalign.visualize

from __future__ import annotations

import base64
import html
import io
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:  # pragma: no cover
    import PIL.Image

    from stringalign.align import AlignmentTuple



[docs]
class HtmlString(str):
    """A string that represents HTML content. It has the `_repr_html_` method for rendering in Jupyter notebooks."""

    def __repr__(self) -> str:
        return f"HtmlString({super().__repr__()})"

    def _repr_html_(self) -> str:
        return self




[docs]
def compress_css(css: str) -> str:
    """Simple compression of css that turns all whitespace into a single space.

    This will remove newlines, tabs, and multiple spaces, (somewhat similar to minification, but not as thorough)

    Parameters
    ----------
    css
        CSS-content to compress

    Returns
    -------
    compressed_css : str
        The compressed CSS.
    """
    compressed_css = " ".join(css.split())
    return compressed_css




[docs]
def create_alignment_stylesheet() -> str:
    """Get the css used for styling the alignment operation visualisation.

    Returns
    -------
    str
        String containing the alignment operation visualisation CSS.
    """
    stylesheet = compress_css((Path(__file__).with_name("assets") / "stylesheet.css").read_text())

    return stylesheet



def _create_alignment_html(
    alignment: AlignmentTuple, reference_label: str, predicted_label: str, space_tokens: bool
) -> str:
    alignment_html = ['<div class="alignment">']

    alignment_html.append('<div class="alignment-labels">')
    alignment_html.append(f'<span class="reference label">{html.escape(reference_label)}</span>')
    alignment_html.append(f'<span class="predicted label">{html.escape(predicted_label)}</span>')
    alignment_html.append("</div>")
    if space_tokens:
        extra_class = " spaced"
    else:
        extra_class = ""
    for operation in alignment:
        reference, predicted = operation.to_html()

        alignment_chunk_html = f"<div class='alignment-chunk{extra_class}'>"
        alignment_chunk_html += f"{reference} {predicted}"
        alignment_chunk_html += "</div>"

        alignment_html.append(alignment_chunk_html)

    alignment_html.append("</div>")
    return "".join(alignment_html)



[docs]
def create_alignment_html(
    alignment: AlignmentTuple,
    reference_label: str = "Reference:",
    predicted_label: str = "Predicted:",
    stylesheet: str | None = None,
    space_alignment_ops: bool = False,
) -> HtmlString:
    """Create an HTML representation of the alignment with embedded CSS styles.

    See :ref:`visualize_example` for an example.

    Arguments:
    ----------
    alignment:
        The alignment data to visualize.
    reference_label:
        The label for the reference text.
    predicted_label:
        The label for the predicted text.
    stylesheet:
        Optional CSS stylesheet to apply. If None, a default stylesheet is used. For no styling, pass an empty string.
    space_alignment_ops
        If this is True, then there will be a small space between each alignment operation.

    Returns:
    --------
    HtmlString:
        An HTML string representing the alignment with embedded styles.
    """
    if stylesheet is None:
        stylesheet = create_alignment_stylesheet()

    if stylesheet:
        style = f"<style>{stylesheet}</style>"
    else:
        style = ""

    alignment_html = _create_alignment_html(
        alignment=alignment,
        reference_label=reference_label,
        predicted_label=predicted_label,
        space_tokens=space_alignment_ops,
    )
    return HtmlString(style + alignment_html)




[docs]
def base64_encode_image(image: PIL.Image.Image) -> bytes:
    """Convert a PIL image into a base64-encoded JPEG image.

    Paramters
    ---------
    image
        Image to serialize

    Returns
    -------
    bytes
        Base64 encoded JPEG image
    """
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue())




[docs]
def create_html_image(image: PIL.Image.Image | Path | str, width=500, alt=None) -> HtmlString:
    """Convert a PIL image into a HTML image tag with a base64-encoded JPEG image to e.g. embed in Jupyter notebooks.

    Parameters
    ----------
    image
        The image to convert into an HTML image tag with base64 encoded data.
    width
        The width of the image tag
    alt : optional
        The alt text of the image tag

    Returns
    -------
    HtmlString
        A string with an image tag containing the base64-encoded image.
    """
    if alt is None:
        alt = ""
    else:
        alt = f'alt="{alt}"'

    if isinstance(image, Path | str):
        file_type = Path(image).suffix.removeprefix(".")
        with open(image, "rb") as file:
            bytes_img = file.read()
            b64_img = base64.b64encode(bytes_img)
    else:
        try:
            b64_img = base64_encode_image(image)
        except Exception as e:
            raise TypeError(f"Image must be PIL.Image.Image, Path or string, not {type(image)}") from e
        else:
            file_type = "jpeg"

    # JPG is not a valid MIME type, so if the file type is .jpg, we need to convert it to .jpeg
    if file_type == "jpg":
        file_type = "jpeg"

    return HtmlString(f'<img src="data:image/{file_type};base64, {b64_img.decode("ascii")}" width="{width}px" {alt}/>')