Open
Description
When I use the API to translate a big array of strings ( 322 to be more specific ), after a while I receive HTTPError 502 Bad Gateway error. I tried to 'split' in smaller pieces, but the I still get the same error.
def parse_xml_file(file_path):
"""Parse the XML file and extract <source> and <translation> tag contents."""
tree = ET.parse(file_path)
root = tree.getroot()
source_translation_pairs = []
for context in root.findall('context'):
for message in context.findall('message'):
source = message.find('source')
translation = message.find('translation')
if source is not None and translation is not None:
source_text = source.text
translation_text = translation.text
source_translation_pairs.append((source_text, translation_text))
return source_translation_pairs, tree, root
def translate_batch(texts, target_lang='ES'):
"""Translate a batch of texts to the specified target language using the DeepL API."""
data = {
'auth_key': API_KEY,
'target_lang': target_lang,
'formality': 'default', # Adjust formality if needed
}
translations = []
for text in texts:
data['text'] = text
try:
response = requests.post(DEEPL_API_URL, data=data, proxies=proxies)
response.raise_for_status() # Check for HTTP request errors
# Extract translated text
translation_result = response.json()
translations.append(translation_result['translations'][0]['text'])
except requests.exceptions.RequestException as e:
print(f"Error during translation: {e}")
translations.append("") # Append empty string in case of failure
time.sleep(2) # Add a delay to avoid overloading the API
return translations
def translate_texts(texts, target_lang='ES'):
"""Translate a list of texts in batches to avoid overloading the API."""
translated_texts = []
for i in range(0, len(texts), BATCH_SIZE):
batch = texts[i:i + BATCH_SIZE]
translated_batch = translate_batch(batch, target_lang)
translated_texts.extend(translated_batch)
return translated_texts
def update_spanish_file(english_file, spanish_file):
"""Translate English text and update Spanish XML file."""
# Start timing
start_time = time.time()
# Step 1: Parse the English XML file and extract translations
english_source_translation_pairs, _, _ = parse_xml_file(english_file)
# Extract only English translations for translation via DeepL
english_translations = [pair[1] for pair in english_source_translation_pairs if pair[1]]
# Step 2: Translate the English translations into Spanish
translated_texts = translate_texts(english_translations, target_lang='ES')
# Step 3: Parse the Spanish XML file
_, spanish_tree, spanish_root = parse_xml_file(spanish_file)
# Step 4: Update the Spanish translation in the right <message> tags
for (english_source, _), translated_text in zip(english_source_translation_pairs, translated_texts):
for context in spanish_root.findall('context'):
for message in context.findall('message'):
source = message.find('source')
translation = message.find('translation')
if source is not None and translation is not None and source.text == english_source:
translation.text = translated_text
# Step 5: Write the updated Spanish XML back to the file
spanish_tree.write(spanish_file, encoding='utf-8', xml_declaration=True)
write_xml_with_doctype(spanish_file, spanish_tree)
# End timing
end_time = time.time()
# Calculate and print the execution time
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.2f} seconds")
# Example usage:
if __name__ == "__main__":
english_file = 'english_file.ts'
spanish_file = 'spanish_file.ts'
update_spanish_file(english_file, spanish_file)
Above is my code
Edit: I am using the free version for the moment, but I plan on upgrading to PRO. If I raise the sleep to be 2 seconds I receive code 504
Metadata
Metadata
Assignees
Labels
No labels