8000 HTTPError 502 Bad Gateway for big number of strings · Issue #118 · DeepLcom/deepl-python · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
HTTPError 502 Bad Gateway for big number of strings #118
Open
@CojocaruLiviuGabriel

Description

@CojocaruLiviuGabriel

When I use the API to translate a big array of strings ( 322 to be more specific ), after a while I receive HTTPError 502 Bad Gateway error. I tried to 'split' in smaller pieces, but the I still get the same error.

image


def parse_xml_file(file_path):
    """Parse the XML file and extract <source> and <translation> tag contents."""
    tree = ET.parse(file_path)
    root = tree.getroot()

    source_translation_pairs = []

    for context in root.findall('context'):
        for message in context.findall('message'):
            source = message.find('source')
            translation = message.find('translation')

            if source is not None and translation is not None:
                source_text = source.text
                translation_text = translation.text
                source_translation_pairs.append((source_text, translation_text))

    return source_translation_pairs, tree, root

def translate_batch(texts, target_lang='ES'):
    """Translate a batch of texts to the specified target language using the DeepL API."""
    data = {
        'auth_key': API_KEY,
        'target_lang': target_lang,
        'formality': 'default',  # Adjust formality if needed
    }
    translations = []

    for text in texts:
        data['text'] = text

        try:
            response = requests.post(DEEPL_API_URL, data=data, proxies=proxies)
            response.raise_for_status()  # Check for HTTP request errors

            # Extract translated text
            translation_result = response.json()
            translations.append(translation_result['translations'][0]['text'])
        except requests.exceptions.RequestException as e:
            print(f"Error during translation: {e}")
            translations.append("")  # Append empty string in case of failure

        time.sleep(2)  # Add a delay to avoid overloading the API

    return translations

def translate_texts(texts, target_lang='ES'):
    """Translate a list of texts in batches to avoid overloading the API."""
    translated_texts = []

    for i in range(0, len(texts), BATCH_SIZE):
        batch = texts[i:i + BATCH_SIZE]
        translated_batch = translate_batch(batch, target_lang)
        translated_texts.extend(translated_batch)

    return translated_texts

def update_spanish_file(english_file, spanish_file):
    """Translate English text and update Spanish XML file."""

    # Start timing
    start_time = time.time()

    # Step 1: Parse the English XML file and extract translations
    english_source_translation_pairs, _, _ = parse_xml_file(english_file)

    # Extract only English translations for translation via DeepL
    english_translations = [pair[1] for pair in english_source_translation_pairs if pair[1]]

    # Step 2: Translate the English translations into Spanish
    translated_texts = translate_texts(english_translations, target_lang='ES')

    # Step 3: Parse the Spanish XML file
    _, spanish_tree, spanish_root = parse_xml_file(spanish_file)

    # Step 4: Update the Spanish translation in the right <message> tags
    for (english_source, _), translated_text in zip(english_source_translation_pairs, translated_texts):
        for context in spanish_root.findall('context'):
            for message in context.findall('message'):
                source = message.find('source')
                translation = message.find('translation')

                if source is not None and translation is not None and source.text == english_source:
                    translation.text = translated_text

    # Step 5: Write the updated Spanish XML back to the file
    spanish_tree.write(spanish_file, encoding='utf-8', xml_declaration=True)
    write_xml_with_doctype(spanish_file, spanish_tree)

    # End timing
    end_time = time.time()

    # Calculate and print the execution time
    execution_time = end_time - start_time
    print(f"Execution time: {execution_time:.2f} seconds")

# Example usage:
if __name__ == "__main__":
    english_file = 'english_file.ts'
    spanish_file = 'spanish_file.ts'

    update_spanish_file(english_file, spanish_file)

Above is my code

Edit: I am using the free version for the moment, but I plan on upgrading to PRO. If I raise the sleep to be 2 seconds I receive code 504

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      0