Source code for stringalign.visualize
from __future__ import annotations
import base64
import html
import io
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING: # pragma: no cover
import PIL.Image
from stringalign.align import AlignmentTuple
[docs]
class HtmlString(str):
"""A string that represents HTML content. It has the `_repr_html_` method for rendering in Jupyter notebooks."""
def __repr__(self) -> str:
return f"HtmlString({super().__repr__()})"
def _repr_html_(self) -> str:
return self
[docs]
def compress_css(css: str) -> str:
"""Simple compression of css that turns all whitespace into a single space.
This will remove newlines, tabs, and multiple spaces, (somewhat similar to minification, but not as thorough)
Parameters
----------
css
CSS-content to compress
Returns
-------
compressed_css : str
The compressed CSS.
"""
compressed_css = " ".join(css.split())
return compressed_css
[docs]
def create_alignment_stylesheet() -> str:
"""Get the css used for styling the alignment operation visualisation.
Returns
-------
str
String containing the alignment operation visualisation CSS.
"""
stylesheet = compress_css((Path(__file__).with_name("assets") / "stylesheet.css").read_text())
return stylesheet
def _create_alignment_html(
alignment: AlignmentTuple, reference_label: str, predicted_label: str, space_tokens: bool
) -> str:
alignment_html = ['<div class="alignment">']
alignment_html.append('<div class="alignment-labels">')
alignment_html.append(f'<span class="reference label">{html.escape(reference_label)}</span>')
alignment_html.append(f'<span class="predicted label">{html.escape(predicted_label)}</span>')
alignment_html.append("</div>")
if space_tokens:
extra_class = " spaced"
else:
extra_class = ""
for operation in alignment:
reference, predicted = operation.to_html()
alignment_chunk_html = f"<div class='alignment-chunk{extra_class}'>"
alignment_chunk_html += f"{reference} {predicted}"
alignment_chunk_html += "</div>"
alignment_html.append(alignment_chunk_html)
alignment_html.append("</div>")
return "".join(alignment_html)
[docs]
def create_alignment_html(
alignment: AlignmentTuple,
reference_label: str = "Reference:",
predicted_label: str = "Predicted:",
stylesheet: str | None = None,
space_alignment_ops: bool = False,
) -> HtmlString:
"""Create an HTML representation of the alignment with embedded CSS styles.
See :ref:`visualize_example` for an example.
Arguments:
----------
alignment:
The alignment data to visualize.
reference_label:
The label for the reference text.
predicted_label:
The label for the predicted text.
stylesheet:
Optional CSS stylesheet to apply. If None, a default stylesheet is used. For no styling, pass an empty string.
space_alignment_ops
If this is True, then there will be a small space between each alignment operation.
Returns:
--------
HtmlString:
An HTML string representing the alignment with embedded styles.
"""
if stylesheet is None:
stylesheet = create_alignment_stylesheet()
if stylesheet:
style = f"<style>{stylesheet}</style>"
else:
style = ""
alignment_html = _create_alignment_html(
alignment=alignment,
reference_label=reference_label,
predicted_label=predicted_label,
space_tokens=space_alignment_ops,
)
return HtmlString(style + alignment_html)
[docs]
def base64_encode_image(image: PIL.Image.Image) -> bytes:
"""Convert a PIL image into a base64-encoded JPEG image.
Paramters
---------
image
Image to serialize
Returns
-------
bytes
Base64 encoded JPEG image
"""
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue())
[docs]
def create_html_image(image: PIL.Image.Image | Path | str, width=500, alt=None) -> HtmlString:
"""Convert a PIL image into a HTML image tag with a base64-encoded JPEG image to e.g. embed in Jupyter notebooks.
Parameters
----------
image
The image to convert into an HTML image tag with base64 encoded data.
width
The width of the image tag
alt : optional
The alt text of the image tag
Returns
-------
HtmlString
A string with an image tag containing the base64-encoded image.
"""
if alt is None:
alt = ""
else:
alt = f'alt="{alt}"'
if isinstance(image, Path | str):
file_type = Path(image).suffix.removeprefix(".")
with open(image, "rb") as file:
bytes_img = file.read()
b64_img = base64.b64encode(bytes_img)
else:
try:
b64_img = base64_encode_image(image)
except Exception as e:
raise TypeError(f"Image must be PIL.Image.Image, Path or string, not {type(image)}") from e
else:
file_type = "jpeg"
# JPG is not a valid MIME type, so if the file type is .jpg, we need to convert it to .jpeg
if file_type == "jpg":
file_type = "jpeg"
return HtmlString(f'<img src="data:image/{file_type};base64, {b64_img.decode("ascii")}" width="{width}px" {alt}/>')