Source code for ipyannotations.text.tagging

import ipywidgets as widgets
import traitlets

from ..base import LabellingWidgetMixin
from .._frontend import module_name, module_version


@widgets.register
class TextTaggerCore(widgets.DOMWidget):
    """A text tagging javascript widget."""

    # properties to make sure the right frontend widget is found:
    _view_name = traitlets.Unicode("TextTaggerView").tag(sync=True)
    _model_name = traitlets.Unicode("TextTaggerModel").tag(sync=True)
    _model_module = traitlets.Unicode(module_name).tag(sync=True)
    _model_module_version = traitlets.Unicode(module_version).tag(sync=True)
    _view_module = traitlets.Unicode(module_name).tag(sync=True)
    _view_module_version = traitlets.Unicode(module_version).tag(sync=True)

    text = traitlets.Unicode("Lorem ipsum", help="The text to display.").tag(
        sync=True
    )
    classes = traitlets.List(
        trait=traitlets.Unicode(), default_value=["MISC", "PER", "LOC", "ORG"]
    ).tag(sync=True)
    selected_class = traitlets.Unicode().tag(sync=True)
    snap_to_word_boundary = traitlets.Bool().tag(sync=True)
    entity_spans = traitlets.List(
        trait=traitlets.Tuple(
            traitlets.Int(), traitlets.Int(), traitlets.Unicode()
        )
    ).tag(sync=True)
    palette = traitlets.List(
        trait=traitlets.Unicode(),
        default_value=[
            "#8dd3c7",
            "#ffffb3",
            "#bebada",
            "#fb8072",
            "#80b1d3",
            "#fdb462",
            "#b3de69",
            "#fccde5",
            "#d9d9d9",
            "#bc80bd",
            "#ccebc5",
            "#ffed6f",
        ],
    ).tag(sync=True)

    def __init__(
        self,
        text="Lorem ipsum",
        classes=["MISC", "PER", "LOC", "ORG"],
        entity_spans=[],
        snap_to_word_boundary=True,
        **kwargs,
    ):
        """Create a text tagging "core" widget.

        This is a front-end widget that displays its 'text' attribute. When a
        sub-string is highlighted, it will snap to the word boundaries, and
        mark it as a span of type `widget.selected_class`. All spans that are
        highlighted are available under `widget.entity_spans`, which is also
        assignable.

        Select the type of entity to tag by clicking its corresponding button,
        or by using the hotkeys 1-0. Hotkeys are mapped to entities in the
        order in which they appear on screen.

        Parameters
        ----------
        text : str, optional
            The text to display in the frontend, by default "Lorem ipsum"
        classes : list, optional
            The possible classes to assign to a span, by default
            ["MISC", "PER", "LOC", "ORG"].
        entity_spans : list, optional
            The currently highlighted spans, by default []
        snap_to_word_boundary : bool
            Whether to always snap to the word boundary, even when a
            word is only partially selected.
        """
        super().__init__(
            text=text,
            classes=classes,
            entity_spans=entity_spans,
            snap_to_word_boundary=snap_to_word_boundary,
            **kwargs,
        )
        if not self.selected_class:
            self.selected_class = self.classes[0]


[docs]class TextTagger(LabellingWidgetMixin, widgets.VBox):
    """A tagging widget to annotate tokens inside text."""

    data = traitlets.List(
        trait=traitlets.Tuple(
            traitlets.Int(), traitlets.Int(), traitlets.Unicode()
        )
    )

    def __init__(
        self,
        classes=["MISC", "PER", "LOC", "ORG"],
        text="Lorem ipsum",
        data=[],
        button_width="5em",
        snap_to_word_boundary=True,
    ):
        """A tagging widget to annotate tokens inside text.

        Parameters
        ----------
        classes : list, optional
            The classes of entities to annotate, by default
            ["MISC", "PER", "LOC", "ORG"]
        text : str, optional
            The text to display, by default "Lorem ipsum"
        data : list, optional
            If you have entity annotations for this text already, by default []
        button_width : str, optional
            A valid HTML width string, by default "5em"
        snap_to_word_boundary : bool
            Whether or not the widget should expand selections to the word
            boundaries. For most languages, this should be left True, but some
            languages are based off single characters (e.g. traditional
            mandarin).
        """
        super().__init__()
        self.text_widget = TextTaggerCore(
            text=text,
            classes=classes,
            entity_spans=data,
            snap_to_word_boundary=snap_to_word_boundary,
        )
        self.class_selector = widgets.ToggleButtons(
            options=classes,
            description="Class to tag:",
            style=widgets.ToggleButtonsStyle(button_width=button_width),
        )
        widgets.link(
            (self.class_selector, "value"),
            (self.text_widget, "selected_class"),
        )
        widgets.link((self, "data"), (self.text_widget, "entity_spans"))
        self.children = (self.text_widget, self.class_selector)

        self.children = (
            self.text_widget,
            self.class_selector,
            widgets.HBox(
                [self.undo_button, self.skip_button, self.submit_button],
                layout={
                    "align_items": "stretch",
                    "justify_content": "flex-end",
                    "flex_flow": "row wrap",
                },
            ),
            self.event_watcher,
        )
        self.__undo_in_process = False

[docs]    def display(self, text: str):
        """Display text to be tagged.

        Parameters
        ----------
        text : str
        """
        self.text_widget.text = text
        self.clear()
        self._undo_queue.clear()

    @traitlets.observe("data")
    def _append_undo_fn(self, proposal: dict):
        if self.__undo_in_process:
            return
        old_data = proposal["old"]

        def _undo_data_change():
            self.__undo_in_process = True
            self.data = old_data
            self.__undo_in_process = False

        self._undo_queue.append(_undo_data_change)

    def _handle_keystroke(self, event):
        super()._handle_keystroke(event)
        keys = [str(i) for i in range(1, 10)] + ["0"]
        for key, option in zip(keys, self.class_selector.options):
            if event.get("key") == key:
                self.class_selector.value = option