8000 Rolling importer fixes by mokimo · Pull Request #4413 · adobecom/milo · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Rolling importer fixes #4413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/import/localPathsToImport.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// Do not commit this file
// If you add any paths in here during a local import run
// You can steer exactly which articles you want to import
// And skip retrieving any sort of logs
export default [
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compared to localPathsToImport, which actually, when set, would import those specific paths.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to have these notes as a comment in the file itself, so whoever runs the script knows which to use?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still not clear about the difference between the two files. I see localPathsToImport is used to read from it, while importingPaths is used to write in it.

Copy link
Contributor Author
@mokimo mokimo Jun 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to add a more elaborate explanation... You're right, localPathsToImport is being read and we only import those articles. Handy for tracking down failures and debugging specific imports.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The now named paths-that-are-being-currently-imported file, just shows whatever is being imported. Based on logs, or local file.
Comes in handy when the logs have certain articles that were 'previewed' but not published, so it avoids any confusion

// '/tr/products/workfront/login'
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// do not commit this file
// this is a temporary file to debug the import process
// this is the exact list of articles that are being imported
48 changes: 35 additions & 13 deletions .github/workflows/import/poll-logs.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import fs from 'fs';
import importUrl from './index.js';
import LOCAL_DEBUG_ENTRIES from './LOCAL_DEBUG_ENTRIES.js';
import { getImsToken } from './daFetch.js';
import localPathsToImport from './localPathsToImport.js';

const { env, exit } = process;
const {
Expand Down Expand Up @@ -63,7 +64,7 @@ function getISOSinceXDaysAgo(days) {
}

function getWorkflowRunUrl() {
if (GITHUB_SERVER_URL && toRepo && GITHUB_RUN_ID) {
if (GITHUB_SERVER_URL && GITHUB_RUN_ID) {
return `${GITHUB_SERVER_URL}/adobecom/milo/actions/runs/${GITHUB_RUN_ID}`;
}
return null;
Expand All @@ -90,14 +91,14 @@ const slackNotification = (text) => {
* @param {string} siteName - The name of the site (e.g., 'da-bacom', 'bacom'). Used for the filename.
* @param {string} baseUrl - The base URL for the log endpoint (e.g., 'https://admin.hlx.page/log/adobecom/da-bacom/main').
*/
async function fetchLogsForSite(siteName, baseUrl, fromParam) {
async function fetchLogsForSite(siteName, baseUrl, fromParam, toParam) {
if (LOCAL_DEBUG_ENTRIES.length && USE_LOCAL_DEBUG_ENTRIES) {
console.log('Using local entries from LOCAL_DEBUG_ENTRIES.js');
return LOCAL_DEBUG_ENTRIES;
}

console.log(`Fetching logs for site: ${siteName} from ${baseUrl}...`);
const initialUrl = `${baseUrl}?from=${fromParam}`;
const initialUrl = `${baseUrl}?from=${fromParam}&to=${toParam}`;
const entries = [];
let totalFetched = 0;

Expand Down Expand Up @@ -202,31 +203,36 @@ async function getLivePaths(entries, logLink) {
}

const saveLivePaths = (livePaths) => {
fs.writeFileSync(".github/workflows/import/importingPaths.js", livePaths.join('\n'));
fs.writeFileSync(".github/workflows/import/paths-that-are-being-currently-imported.js", livePaths.join('\n'));
}

async function main() {
await getImsToken();
const entries = await fetchLogsForSite(
const TO_PARAM = process.env.LAST_RUN_ISO_TO || new Date().toISOString();
if (localPathsToImport.length) console.log("Importing paths from local environment");
const entries = localPathsToImport.length ? localPathsToImport : await fetchLogsForSite(
ROLLING_IMPORT_POLL_LOGS_FROM_REPO,
`https://admin.hlx.page/log/adobecom/${ROLLING_IMPORT_POLL_LOGS_FROM_REPO}`,
FROM_PARAM
FROM_PARAM,
TO_PARAM
);
const logLink = `Log Link: https://admin.hlx.page/log/adobecom/${ROLLING_IMPORT_POLL_LOGS_FROM_REPO}?from=${FROM_PARAM}`
const logLink = `Log Link: https://admin.hlx.page/log/adobecom/${ROLLING_IMPORT_POLL_LOGS_FROM_REPO}?from=${FROM_PARAM}&to=${TO_PARAM}`;
if(!entries?.length) {
console.log(`No entries found in the logs, exiting. ${logLink}`);
await slackNotification(`No entries found, exiting ${logLink}`);
return;
}
const livePaths = await getLivePaths(entries, logLink);
const livePaths = localPathsToImport.length ? localPathsToImport : await getLivePaths(entries, logLink);
const importedMedia = new Set();
let result = {
success: 0,
error: 0,
errorPaths: [],
initiallyFailingPaths: [],
successPaths: [],
errorPaths: []
};
if(LOCAL_RUN) saveLivePaths(livePaths)

for (const path of livePaths) {
queue.add(() =>
importUrl(path, importedMedia)
Expand All @@ -237,20 +243,36 @@ async function main() {
console.log(
`Progress: Success: ${result.success} | Error: ${result.error}`
);

})
.catch(() => {
.catch((e) => {
result.error++;
result.errorPaths.push(path);
result.initiallyFailingPaths.push(path);
if (result.error % 10 === 0)
console.log(
`Progress: Success: ${result.success} | Error: ${result.error}`
);

})
);
}

await queue.onIdle();

for (const erroredPath of result.initiallyFailingPaths) {
console.log("Retrying erroring-path:" + erroredPath)
queue.add(() => importUrl(erroredPath, importedMedia)
.then(() => {
result.success++;
result.successPaths.push(path);
result.error--;
})
.catch(() => {
result.errorPaths.push(erroredPath);
})
)
}

await queue.onIdle();

if (!LOCAL_RUN) {
await slackNotification(
`Succcessful: ${result.success} paths | Failed: ${result.error} paths.`
Expand Down
Loading
0