Source code for pylelemmatize.util.color_str

from typing import List, Optional, IO, Tuple, Union
import numpy as np
from .banded_editdistance import banded_edit_path
from ..abstract_mapper import fast_str_to_numpy





def print_colored_text(seq: Union[str, np.array],
                       fg_rgb: Optional[Union[Tuple[int, int, int], List[Tuple[int, int, int]]]] = None, 
                       bg_rgb: Optional[Union[Tuple[int, int, int], List[Tuple[int, int, int]]]] = None,
                       file: Optional[IO[str]] = None) -> str:
    if fg_rgb is None:
        fg_rgb = (255, 255, 255)
    if bg_rgb is None:
        bg_rgb = (0, 0, 0)
    if isinstance(fg_rgb, Tuple) and len(fg_rgb) == 3 and all(isinstance(c, int) for c in fg_rgb):
        fg_rgb = [fg_rgb] * len(seq)
    else:
        assert len(fg_rgb) == len(seq), "fg_rgb length must match sequence length"

    if isinstance(bg_rgb, Tuple) and len(bg_rgb) == 3 and all(isinstance(c, int) for c in bg_rgb):
        bg_rgb = [bg_rgb] * len(seq)
    else:
        assert len(bg_rgb) == len(seq), "bg_rgb length must match sequence length"
    res = []
    for n, character in enumerate(seq):
        fg = fg_rgb[n]
        bg = bg_rgb[n]
        colored_char = f"\x1b[38;2;{fg[0]};{fg[1]};{fg[2]}m\x1b[48;2;{bg[0]};{bg[1]};{bg[2]}m{character}\x1b[0m"
        res.append(colored_char)
    res = ''.join(res)
    if file is not None:
        print(res, file=file)
    return res


def print_error_types(seq1: Union[str, np.array], seq2: Union[str, np.array], band: int = -1, 
                      correct_rgb: Tuple[Tuple[int, int, int], Tuple[int, int, int]] = ((0, 255, 0), (0, 0, 0)), 
                      substitution_rgb: Tuple[Tuple[int, int, int], Tuple[int, int, int]] = ((255, 0, 0), (0, 0, 0)),
                      insertion_rgb: Tuple[Tuple[int, int, int], Tuple[int, int, int]] = ((255, 221, 0), (64, 64, 64)),
                      deletion_rgb: Tuple[Tuple[int, int, int], Tuple[int, int, int]] = ((255, 0, 221), (64, 64, 64)), 
                      file: Optional[IO[str]] = None) -> str:
    """
    Visualize alignment errors between two sequences with color-coded output.

    This function computes the alignment between two sequences and prints the aligned text
    with color-coded error types. Matches, substitutions, insertions, and deletions are
    displayed with different foreground and background colors.

    Parameters
    ----------
    seq1 : Union[str, np.array]
        The first sequence (reference).
    seq2 : Union[str, np.array]
        The second sequence (hypothesis).
    band : int, optional
        The maximum allowed misalignment (band) for the dynamic programming alignment.
        If -1, the band is set to the maximum length of the two sequences. Default is -1.
    correct_rgb : Tuple[Tuple[int, int, int], Tuple[int, int, int]], optional
        RGB colors for matches. The first tuple is the foreground color, and the second
        tuple is the background color. Default is ((0, 255, 0), (0, 0, 0)).
    substitution_rgb : Tuple[Tuple[int, int, int], Tuple[int, int, int]], optional
        RGB colors for substitutions. The first tuple is the foreground color, and the second
        tuple is the background color. Default is ((255, 0, 0), (0, 0, 0)).
    insertion_rgb : Tuple[Tuple[int, int, int], Tuple[int, int, int]], optional
        RGB colors for insertions. The first tuple is the foreground color, and the second
        tuple is the background color. Default is ((255, 221, 0), (64, 64, 64)).
    deletion_rgb : Tuple[Tuple[int, int, int], Tuple[int, int, int]], optional
        RGB colors for deletions. The first tuple is the foreground color, and the second
        tuple is the background color. Default is ((255, 0, 221), (64, 64, 64)).
    file : Optional[IO[str]], optional
        A file-like object to which the output will be written. If None, the output is printed
        to the standard output. Default is None.

    Returns
    -------
    str
        The colorized string representation of the alignment.

    Raises
    ------
    RuntimeError
        If an invalid state is encountered during error type processing.

    Notes
    -----
    - This function uses ANSI escape codes for colorization, which may not be supported in all
      terminal environments.
    - The alignment is computed using a banded dynamic programming approach.

    Examples
    --------
    >>> print_error_types("hello", "h3llo", band=3)
    (Outputs colorized text to the terminal)
    """
    if isinstance(seq1, str):
        seq1_arr = fast_str_to_numpy(seq1)
    if isinstance(seq2, str):
        seq2_arr = fast_str_to_numpy(seq2)
    if band < 1:
        band = max(len(seq1_arr), len(seq2_arr))
    
    if len(seq1_arr) == 0 and len(seq2_arr) == 0:
        return print_colored_text("", file=file)
    if len(seq1_arr) == 0:
        fg_colors = [insertion_rgb[0]] * len(seq2_arr)
        bg_colors = [insertion_rgb[1]] * len(seq2_arr)
        return print_colored_text(seq2_arr, fg_rgb=fg_colors, bg_rgb=bg_colors, file=file)
    if len(seq2_arr) == 0:
        fg_colors = [deletion_rgb[0]] * len(seq1_arr)
        bg_colors = [deletion_rgb[1]] * len(seq1_arr)
        return print_colored_text(seq1_arr, fg_rgb=fg_colors, bg_rgb=bg_colors, file=file)
    
    paths, _, (is_match, is_ins, is_del, is_sub) = banded_edit_path(seq1_arr, seq2_arr, band=band)
    fg_colors = []
    bg_colors = []
    txt = []
    #print(f"seq1: {seq1}\nseq2: {seq2}\npaths:\n{paths}\n", file=sys.stderr)
    for n in range(len(paths)-1):
        if is_match[n]:
            fg, bg = correct_rgb
            fg_colors.append(fg)
            bg_colors.append(bg)
            txt.append(seq1[paths[n+1][0]])
        elif is_sub[n]:
            fg, bg = substitution_rgb
            fg_colors.append(fg)
            bg_colors.append(bg)
            txt.append(seq1[paths[n+1][0]])
        elif is_ins[n]:
            fg, bg = insertion_rgb
            fg_colors.append(fg)
            bg_colors.append(bg)
            txt.append(seq2[paths[n+1][1]])
        elif is_del[n]:
            fg, bg = deletion_rgb
            fg_colors.append(fg)
            bg_colors.append(bg)
            txt.append(seq1[paths[n+1][0]])
        else:
            raise RuntimeError("Invalid state in error type printing.")
    txt = ''.join(txt)
    #print(f"{txt} FG:{fg_colors} BG:{bg_colors} Is match{is_match}\n{paths}", file=sys.stderr)
    return print_colored_text(txt, fg_rgb=fg_colors, bg_rgb=bg_colors, file=file)