Datatypes Not Supported for Structured Output

Hello ,

I have used llama 3.1 8B model and infrencing through Exllama with Formatron for Structured Output .But it is working well for Literal data types but for string output and list of string it is not working . The results are blank.

Sharing two classes for which I am not able to get the output

import json
from formatron.schemas.pydantic import ClassSchema

        class CallSummaryPromptFormat(ClassSchema):
            conversation_summary: str = Field(max_length=250*10)

        class AreasOfImprovementPromptFormat(ClassSchema):
            suggestion_type: str = Field(max_length=8 * 10)
            explain_improvement_suggestion: str = Field(max_length=80 * 10)


        # Use Case - Areas of Improvement
        class AgentAreasOfImprovementPromptFormat(ClassSchema):
            agent_areas_of_improvement: conlist(AreasOfImprovementPromptFormat, max_length=5)

Below I am sharing the code implementation

class MihupExllamaLLM:
    def __init__(self):

        model_dir = "/app/Llama-3.1-8B-Instruct-exl2"
        # model_dir = "/app/mlabonne_NeuralDaredevil-8B-abliterated-5_0bpw_exl2"
        config = ExLlamaV2Config(model_dir)

        # config.fasttensors = True
        self.model = ExLlamaV2(config)

        # ExLlamaV2Cache
        self.cache = ExLlamaV2Cache_Q4(self.model, max_seq_len=256 * 96, lazy=True)  # 32768 - 8200 MB # 24576 - 7900 MB
        self.model.load_autosplit(self.cache, progress=True)

        print("Loading tokenizer...")
        self.tokenizer = ExLlamaV2Tokenizer(config)
        self.tokenizer_data =  build_token_enforcer_tokenizer_data(self.tokenizer)

        self.generator = ExLlamaV2DynamicGenerator(
            model=self.model,
            cache=self.cache,
            tokenizer=self.tokenizer,
        )

        self.gen_settings = ExLlamaV2Sampler.Settings(
            temperature=0.0,  # Set to 0 for deterministic output
            top_k = 1 , # Only consider the most likely token
            top_p = 1.0 , # No nucleus sampling
            token_repetition_penalty=1.0  # No repetition penalty
        )

        self.generator.warmup()

    def run_mihup_llm_inference(self, call_transcript: str, prompt_tuples: List[Tuple]) -> List[Dict]:
        self.cache.reset()

        common_transcript = format_transcript_text(call_transcript)
        prompts = []
        filters = []
        use_case_ids = []
        for upper_tuple in prompt_tuples:
            use_case_id = upper_tuple[1]
            use_case_ids.append(use_case_id)
            p = upper_tuple[0]
            prompt_str = p[0]
            # print(f"use_case_id : {use_case_id}, prompt : {prompt_str}")
            prompt_question_combined = format_llama3_prompt(mihup_system_prompt, common_transcript + prompt_str)
            prompts.append(prompt_question_combined)
            filter_schema = p[2]

            formatter = FormatterBuilder()
            print("before appending", formatter)
            formatter.append_line(f"{formatter.json(filter_schema, capture_name='json')}")

            print("after  appending",formatter)

            filters.append([
                create_formatter_filter(self.model, self.tokenizer, formatter),
            ])

        outputs = self.generator.generate(
            prompt=prompts,
            filters=filters,
            filter_prefer_eos=True,
            max_new_tokens=2048,
            add_bos=True,
            stop_conditions=[self.tokenizer.eos_token_id],
            gen_settings=self.gen_settings,
            completion_only=True,
            encode_special_tokens=True,
        )
        print("Output is",outputs)
        final_output = []
        use_case_ids_to_be_considered = []
        for i in range(len(outputs)):
            try:
                output_json = None
                output_json = json.loads(outputs[i])
                final_output.append(output_json)
                use_case_ids_to_be_considered.append(use_case_ids[i])
            except ValueError as e:
                print("error: {0} , use_case_id :{1}".format(outputs[i], use_case_ids[i]))

        use_case_id_key = "use_case_id"
        for idx in range(len(final_output)):
            final_output[idx][use_case_id_key] = use_case_ids_to_be_considered[idx]

        return final_output

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions