8000 Datatypes Not Supported for Structured Output · Issue #28 · Dan-wanna-M/formatron · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
Datatypes Not Supported for Structured Output #28
Open
@UTSAV-44

Description

@UTSAV-44

Hello ,

I have used llama 3.1 8B model and infrencing through Exllama with Formatron for Structured Output .But it is working well for Literal data types but for string output and list of string it is not working . The results are blank.

Sharing two classes for which I am not able to get the output

import json
from formatron.schemas.pydantic import ClassSchema

        class CallSummaryPromptFormat(ClassSchema):
            conversation_summary: str = Field(max_length=250*10)

        class AreasOfImprovementPromptFormat(ClassSchema):
            suggestion_type: str = Field(max_length=8 * 10)
            explain_improvement_suggestion: str = Field(max_length=80 * 10)


        # Use Case - Areas of Improvement
        class AgentAreasOfImprovementPromptFormat(ClassSchema):
            agent_areas_of_improvement: conlist(AreasOfImprovementPromptFormat, max_length=5)



Below I am sharing the code implementation

class MihupExllamaLLM:
    def __init__(self):

        model_dir = "/app/Llama-3.1-8B-Instruct-exl2"
        # model_dir = "/app/mlabonne_NeuralDaredevil-8B-abliterated-5_0bpw_exl2"
        config = ExLlamaV2Config(model_dir)

        # config.fasttensors = True
        self.model = ExLlamaV2(config)

        # ExLlamaV2Cache
        self.cache = ExLlamaV2Cache_Q4(self.model, max_seq_len=256 * 96, lazy=True)  # 32768 - 8200 MB # 24576 - 7900 MB
        self.model.load_autosplit(self.cache, progress=True)

        print("Loading tokenizer...")
        self.tokenizer = ExLlamaV2Tokenizer(config)
        self.tokenizer_data =  build_token_enforcer_tokenizer_data(self.tokenizer)

        self.generator = ExLlamaV2DynamicGenerator(
            model=self.model,
            cache=self.cache,
            tokenizer=self.tokenizer,
        )

        self.gen_settings = ExLlamaV2Sampler.Settings(
            temperature=0.0,  # Set to 0 for deterministic output
            top_k = 1 , # Only consider the most likely token
            top_p = 1.0 , # No nucleus sampling
            token_repetition_penalty=1.0  # No repetition penalty
        )

        self.generator.warmup()

    def run_mihup_llm_inference(self, call_transcript: str, prompt_tuples: List[Tuple]) -> List[Dict]:
        self.cache.reset()

        common_transcript = format_transcript_text(call_transcript)
        prompts = []
        filters = []
        use_case_ids = []
        for upper_tuple in prompt_tuples:
            use_case_id = upper_tuple[1]
            use_case_ids.append(use_case_id)
            p = upper_tuple[0]
            prompt_str = p[0]
            # print(f"use_case_id : {use_case_id}, prompt : {prompt_str}")
            prompt_question_combined = format_llama3_prompt(mihup_system_prompt, common_transcript + prompt_str)
            prompts.append(prompt_question_combined)
            filter_schema = p[2]

            formatter = FormatterBuilder()
            print("before appending", formatter)
            formatter.append_line(f"{formatter.json(filter_schema, capture_name='json')}")

            print("after  appending",formatter)

            filters.append([
                create_formatter_filter(self.model, self.tokenizer, formatter),
            ])

        outputs = self.generator.generate(
            prompt=prompts,
            filters=filters,
            filter_prefer_eos=True,
            max_new_tokens=2048,
            add_bos=True,
            stop_conditions=[self.tokenizer.eos_token_id],
            gen_settings=self.gen_settings,
            completion_only=True,
            encode_special_tokens=True,
        )
        print("Output is",outputs)
        final_output = []
        use_case_ids_to_be_considered = []
        for i in range(len(outputs)):
            try:
                output_json = None
                output_json = json.loads(outputs[i])
                final_output.append(output_json)
                use_case_ids_to_be_considered.append(use_case_ids[i])
            except ValueError as e:
                print("error: {0} , use_case_id :{1}".format(outputs[i], use_case_ids[i]))

        use_case_id_key = "use_case_id"
        for idx in range(len(final_output)):
            final_output[idx][use_case_id_key] = use_case_ids_to_be_considered[idx]

        return final_output

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      0