util

`genlm.eval.util`

`bootstrap_ci(values, metric, ci=0.95, n_bootstrap=10000)`

Calculate bootstrap confidence intervals for a given metric.

Parameters:

Name	Type	Description	Default
`values`	`array - like`	Array-like object containing the data points.	required
`metric`	`function`	Function that computes the statistic of interest (e.g., np.mean, np.median).	required
`ci`	`float`	Confidence interval level (between 0 and 1). Default is 0.95 for 95% CI.	`0.95`
`n_bootstrap`	`int`	Number of bootstrap samples to generate. Default is 10000.	`10000`

Returns:

Type	Description
`tuple`	(mean, lower, upper) where: - mean is the metric computed on the original data - lower is the lower bound of the confidence interval - upper is the upper bound of the confidence interval

Raises:

Type	Description
`ValueError`	If ci is not between 0 and 1 or if values is empty.

Source code in genlm/eval/util.py

def bootstrap_ci(values, metric, ci=0.95, n_bootstrap=10000):
    """Calculate bootstrap confidence intervals for a given metric.

    Args:
        values (array-like): Array-like object containing the data points.
        metric (function): Function that computes the statistic of interest (e.g., np.mean, np.median).
        ci (float): Confidence interval level (between 0 and 1). Default is 0.95 for 95% CI.
        n_bootstrap (int): Number of bootstrap samples to generate. Default is 10000.

    Returns:
        (tuple): (mean, lower, upper) where:
            - mean is the metric computed on the original data
            - lower is the lower bound of the confidence interval
            - upper is the upper bound of the confidence interval

    Raises:
        ValueError: If ci is not between 0 and 1 or if values is empty.
    """
    if not 0 < ci < 1:
        raise ValueError("ci must be between 0 and 1")

    if not values:
        raise ValueError("values must not be empty")

    values = np.asarray(values)

    mean = metric(values)
    bootstrap_values = []

    for _ in range(n_bootstrap):
        bootstrap_sample = np.random.choice(values, size=len(values), replace=True)
        bootstrap_values.append(metric(bootstrap_sample))

    lower_percentile = (1 - ci) / 2 * 100
    upper_percentile = (1 + ci) / 2 * 100
    lower, upper = np.percentile(bootstrap_values, [lower_percentile, upper_percentile])
    return mean, lower, upper

`chat_template_messages(prompt, examples, user_message)`

Create a list of messages for chat template formatting.

Parameters:

Name	Type	Description	Default
`prompt`	`str`	System prompt to be used as the initial message.	required
`examples`	`list`	List of (example, response) tuples for few-shot learning.	required
`user_message`	`str`	The actual user query to be processed.	required

Returns:

Type	Description
`list`	List of dictionaries containing the formatted messages with roles and content for the chat template.

Source code in genlm/eval/util.py

def chat_template_messages(prompt, examples, user_message):
    """Create a list of messages for chat template formatting.

    Args:
        prompt (str): System prompt to be used as the initial message.
        examples (list): List of (example, response) tuples for few-shot learning.
        user_message (str): The actual user query to be processed.

    Returns:
        (list): List of dictionaries containing the formatted messages with roles
             and content for the chat template.
    """
    messages = [{"role": "system", "content": prompt}]
    for example, response in examples:
        messages.append({"role": "user", "content": example})
        messages.append({"role": "assistant", "content": response})
    messages.append({"role": "user", "content": user_message})
    return messages

`LRUCache`

Bases: ABC

A cache that evicts the least recently used item when the cache is full.

Source code in genlm/eval/util.py

class LRUCache(ABC):
    """A cache that evicts the least recently used item when the cache is full."""

    def __init__(self, cache_size=1):
        """Initialize the cache.

        Args:
            cache_size (int): The size of the cache.
        """
        self.cache_size = cache_size
        self.cache = OrderedDict()

    @abstractmethod
    def create(self, key):
        """Create an object for a given key.

        Args:
            key (any): The key to create an object for.
        """
        pass  # pragma: no cover

    def cleanup(self, key, obj):
        """Cleanup an object for a given key.

        Args:
            key (any): The key to cleanup an object for.
            obj (any): The object to cleanup.
        """
        pass  # pragma: no cover

    def get(self, key):
        """Get an object for a given key.

        Args:
            key (any): The key to get an object for.

        Returns:
            any: The object for the given key.
        """
        if self.cache_size > 0:
            if key in self.cache:
                self.cache.move_to_end(key)
                return self.cache[key]
        else:
            return self.create(key)

        obj = self.create(key)
        self.cache[key] = obj

        if len(self.cache) > self.cache_size:
            key, obj = self.cache.popitem(last=False)
            self.cleanup(key, obj)

        return obj

`init(cache_size=1)`

Initialize the cache.

Parameters:

Name	Type	Description	Default
`cache_size`	`int`	The size of the cache.	`1`

Source code in genlm/eval/util.py

def __init__(self, cache_size=1):
    """Initialize the cache.

    Args:
        cache_size (int): The size of the cache.
    """
    self.cache_size = cache_size
    self.cache = OrderedDict()

`create(key)` `abstractmethod`

Create an object for a given key.

Parameters:

Name	Type	Description	Default
`key`	`any`	The key to create an object for.	required

Source code in genlm/eval/util.py

@abstractmethod
def create(self, key):
    """Create an object for a given key.

    Args:
        key (any): The key to create an object for.
    """
    pass  # pragma: no cover

`cleanup(key, obj)`

Cleanup an object for a given key.

Parameters:

Name	Type	Description	Default
`key`	`any`	The key to cleanup an object for.	required
`obj`	`any`	The object to cleanup.	required

Source code in genlm/eval/util.py

def cleanup(self, key, obj):
    """Cleanup an object for a given key.

    Args:
        key (any): The key to cleanup an object for.
        obj (any): The object to cleanup.
    """
    pass  # pragma: no cover

`get(key)`

Get an object for a given key.

Parameters:

Name	Type	Description	Default
`key`	`any`	The key to get an object for.	required

Returns:

Name	Type	Description
`any`		The object for the given key.

Source code in genlm/eval/util.py

def get(self, key):
    """Get an object for a given key.

    Args:
        key (any): The key to get an object for.

    Returns:
        any: The object for the given key.
    """
    if self.cache_size > 0:
        if key in self.cache:
            self.cache.move_to_end(key)
            return self.cache[key]
    else:
        return self.create(key)

    obj = self.create(key)
    self.cache[key] = obj

    if len(self.cache) > self.cache_size:
        key, obj = self.cache.popitem(last=False)
        self.cleanup(key, obj)

    return obj

util

genlm.eval.util

bootstrap_ci(values, metric, ci=0.95, n_bootstrap=10000)

chat_template_messages(prompt, examples, user_message)

LRUCache

__init__(cache_size=1)

create(key) abstractmethod

cleanup(key, obj)

get(key)

`genlm.eval.util`

`bootstrap_ci(values, metric, ci=0.95, n_bootstrap=10000)`

`chat_template_messages(prompt, examples, user_message)`

`LRUCache`

`init(cache_size=1)`

`create(key)` `abstractmethod`

`cleanup(key, obj)`

`get(key)`