Skip to content

json_schema

genlm.eval.domains.json_schema

JSONSchemaBenchDataset

Bases: Dataset[JSONSchemaBenchInstance]

Dataset for JSON evaluation using the JSONSchemaBench dataset.

Source code in genlm/eval/domains/json_schema/dataset.py
class JSONSchemaBenchDataset(Dataset[JSONSchemaBenchInstance]):
    """Dataset for JSON evaluation using the JSONSchemaBench dataset."""

    def __init__(self, schemas, tasks, unique_ids):
        """Initialize the dataset with a list of regex patterns.

        Args:
            schemas (list[str]): The JSON schemas to evaluate.
            tasks (list[str]): The task name for each schema.
            unique_ids (list[int]): The unique id for each schema.
        """
        assert len(schemas) == len(tasks)
        self.schemas = schemas
        self.tasks = tasks
        self.unique_ids = unique_ids

    def __len__(self):
        return len(self.schemas)

    @classmethod
    def from_tasks(cls, tasks, split="val"):
        """Load tasks from a list of tasks in the JSONSchemaBench dataset.

        Args:
            tasks (list[Task]): The tasks to evaluate.
            split (str): The split to load. Defaults to "val".

        Returns:
            (JSONSchemaBenchDataset): Dataset initialized with schemas from the tasks.
        """
        all_schemas = []
        task_names = []
        all_unique_ids = []

        for task in tasks:
            dataset = load_dataset("epfl-dlab/JSONSchemaBench", task)[split]
            schemas = dataset["json_schema"]
            unique_ids = dataset["unique_id"]
            assert len(schemas) == len(unique_ids)
            for schema, unique_id in zip(schemas, unique_ids):
                assert unique_id not in all_unique_ids
                all_unique_ids.append(unique_id)
                all_schemas.append(json.loads(schema))
                task_names.append(task)

        return cls(all_schemas, task_names, all_unique_ids)

    def __iter__(self):
        """Iterate over JSON schemas.

        Returns:
            (Iterator[JSONSchemaBenchInstance]): Iterator over JSON schema instances.
        """
        for schema_id, schema in enumerate(self.schemas):
            yield JSONSchemaBenchInstance(
                json_schema=schema,
                instance_id=self.unique_ids[schema_id],
                task=self.tasks[schema_id],
            )

    @property
    def schema(self):
        """Get the schema class for this dataset.

        Returns:
            (type[JSONSchemaBenchInstance]): The Pydantic model class for JSON schema instances.
        """
        return JSONSchemaBenchInstance

__init__(schemas, tasks, unique_ids)

Initialize the dataset with a list of regex patterns.

Parameters:

Name Type Description Default
schemas list[str]

The JSON schemas to evaluate.

required
tasks list[str]

The task name for each schema.

required
unique_ids list[int]

The unique id for each schema.

required
Source code in genlm/eval/domains/json_schema/dataset.py
def __init__(self, schemas, tasks, unique_ids):
    """Initialize the dataset with a list of regex patterns.

    Args:
        schemas (list[str]): The JSON schemas to evaluate.
        tasks (list[str]): The task name for each schema.
        unique_ids (list[int]): The unique id for each schema.
    """
    assert len(schemas) == len(tasks)
    self.schemas = schemas
    self.tasks = tasks
    self.unique_ids = unique_ids

from_tasks(tasks, split='val') classmethod

Load tasks from a list of tasks in the JSONSchemaBench dataset.

Parameters:

Name Type Description Default
tasks list[Task]

The tasks to evaluate.

required
split str

The split to load. Defaults to "val".

'val'

Returns:

Type Description
JSONSchemaBenchDataset

Dataset initialized with schemas from the tasks.

Source code in genlm/eval/domains/json_schema/dataset.py
@classmethod
def from_tasks(cls, tasks, split="val"):
    """Load tasks from a list of tasks in the JSONSchemaBench dataset.

    Args:
        tasks (list[Task]): The tasks to evaluate.
        split (str): The split to load. Defaults to "val".

    Returns:
        (JSONSchemaBenchDataset): Dataset initialized with schemas from the tasks.
    """
    all_schemas = []
    task_names = []
    all_unique_ids = []

    for task in tasks:
        dataset = load_dataset("epfl-dlab/JSONSchemaBench", task)[split]
        schemas = dataset["json_schema"]
        unique_ids = dataset["unique_id"]
        assert len(schemas) == len(unique_ids)
        for schema, unique_id in zip(schemas, unique_ids):
            assert unique_id not in all_unique_ids
            all_unique_ids.append(unique_id)
            all_schemas.append(json.loads(schema))
            task_names.append(task)

    return cls(all_schemas, task_names, all_unique_ids)

__iter__()

Iterate over JSON schemas.

Returns:

Type Description
Iterator[JSONSchemaBenchInstance]

Iterator over JSON schema instances.

Source code in genlm/eval/domains/json_schema/dataset.py
def __iter__(self):
    """Iterate over JSON schemas.

    Returns:
        (Iterator[JSONSchemaBenchInstance]): Iterator over JSON schema instances.
    """
    for schema_id, schema in enumerate(self.schemas):
        yield JSONSchemaBenchInstance(
            json_schema=schema,
            instance_id=self.unique_ids[schema_id],
            task=self.tasks[schema_id],
        )

schema property

Get the schema class for this dataset.

Returns:

Type Description
type[JSONSchemaBenchInstance]

The Pydantic model class for JSON schema instances.

JSONSchemaBenchInstance

Bases: Instance

Schema for JSON instance.

Source code in genlm/eval/domains/json_schema/dataset.py
class JSONSchemaBenchInstance(Instance):
    """Schema for JSON instance."""

    json_schema: dict
    instance_id: Union[int, str]
    task: str

    def __repr__(self):
        return f"schema: {self.json_schema} (id: {self.instance_id})"

JSONSchemaBenchEvaluator

Bases: Evaluator[JSONSchemaBenchInstance]

Evaluator for JSON schema.

Source code in genlm/eval/domains/json_schema/evaluator.py
class JSONSchemaBenchEvaluator(Evaluator[JSONSchemaBenchInstance]):
    """Evaluator for JSON schema."""

    def evaluate_sample(self, instance, response):
        """Evaluate if a response is valid against the JSON schema.

        Args:
            instance (JSONSchemaBenchInstance): The JSON schema instance being evaluated.
            response (str): The model's response text.

        Returns:
            (EvaluationResult): Evaluation result for whether the response is valid against the JSON schema.
        """
        try:
            json_object = json.loads(response)
        except json.JSONDecodeError:
            return EvaluationResult(score=0, desc="invalid json")
        is_valid = validate_json_object(json_object, instance.json_schema)
        return EvaluationResult(
            score=int(is_valid),
            desc="valid" if is_valid else "json does not match schema",
        )

evaluate_sample(instance, response)

Evaluate if a response is valid against the JSON schema.

Parameters:

Name Type Description Default
instance JSONSchemaBenchInstance

The JSON schema instance being evaluated.

required
response str

The model's response text.

required

Returns:

Type Description
EvaluationResult

Evaluation result for whether the response is valid against the JSON schema.

Source code in genlm/eval/domains/json_schema/evaluator.py
def evaluate_sample(self, instance, response):
    """Evaluate if a response is valid against the JSON schema.

    Args:
        instance (JSONSchemaBenchInstance): The JSON schema instance being evaluated.
        response (str): The model's response text.

    Returns:
        (EvaluationResult): Evaluation result for whether the response is valid against the JSON schema.
    """
    try:
        json_object = json.loads(response)
    except json.JSONDecodeError:
        return EvaluationResult(score=0, desc="invalid json")
    is_valid = validate_json_object(json_object, instance.json_schema)
    return EvaluationResult(
        score=int(is_valid),
        desc="valid" if is_valid else "json does not match schema",
    )

default_prompt_formatter(tokenizer, instance, use_chat_format=True, system_prompt=DEFAULT_SYSTEM_PROMPT)

Default prompt formatter for JSON Schema.

Parameters:

Name Type Description Default
tokenizer Tokenizer

The tokenizer to use.

required
instance JSONSchemaInstance

The instance to format.

required
use_chat_format bool

Whether to use chat format.

True
system_prompt str

The system prompt to use.

DEFAULT_SYSTEM_PROMPT

Returns:

Type Description
list[int]

The prompt ids.

Source code in genlm/eval/domains/json_schema/__init__.py
def default_prompt_formatter(
    tokenizer,
    instance,
    use_chat_format=True,
    system_prompt=DEFAULT_SYSTEM_PROMPT,
):
    """Default prompt formatter for JSON Schema.

    Args:
        tokenizer (Tokenizer): The tokenizer to use.
        instance (JSONSchemaInstance): The instance to format.
        use_chat_format (bool): Whether to use chat format.
        system_prompt (str): The system prompt to use.

    Returns:
        (list[int]): The prompt ids.
    """
    if use_chat_format:
        return tokenizer.apply_chat_template(
            conversation=few_shots_messages_formatter(
                task=instance.task,
                schema=instance.json_schema,
                system_prompt=system_prompt,
            ),
            tokenize=True,
            add_generation_prompt=True,
        )
    else:
        raise NotImplementedError("JSON schema does not support non-chat format")