tethys.backend/commands/validate_checksum.ts

121 lines
4.6 KiB
TypeScript
Raw Permalink Normal View History

2023-03-03 15:54:28 +00:00
import crypto from 'crypto';
import fs from 'fs/promises'; // Use fs/promises for async file operations
2024-03-14 19:25:27 +00:00
import logger from '@adonisjs/core/services/logger';
import { BaseCommand } from '@adonisjs/core/ace';
import { CommandOptions } from '@adonisjs/core/types/ace';
import dayjs from 'dayjs';
import TethysFile from '#models/file';
import AppConfig from '#models/appconfig';
// import db from '@adonisjs/lucid/services/db'; // Import the DB service
2023-03-03 15:54:28 +00:00
export default class ValidateChecksum extends BaseCommand {
/**
* Command name used to run the command
2023-03-03 15:54:28 +00:00
*/
public static commandName = 'validate:checksum';
/**
* Command description displayed in the "help" output
2023-03-03 15:54:28 +00:00
*/
public static description = '';
public static options: CommandOptions = {
startApp: true,
staysAlive: false,
};
private chunkSize = 100; // Set chunk size for pagination
2023-03-03 15:54:28 +00:00
public async run() {
let page = 1; // Start with the first page
let hasMoreFiles = true; // Flag to check if there are more files to process
// Loop to process files in chunks
while (hasMoreFiles) {
// Query a chunk of published files from the database with pagination
const files = await TethysFile.query()
.whereHas('dataset', (dQuery) => {
dQuery.where('server_state', 'published'); // Only get published datasets
})
.orderBy('document_id', 'asc') // Order by document ID
.preload('hashvalues') // Preload hash values
.forPage(page, this.chunkSize); // Get files for the current page
// Check if there are no more files to process
if (files.length === 0) {
hasMoreFiles = false; // No more files, exit the loop
break;
2023-03-03 15:54:28 +00:00
}
// Process the current chunk of files
await this.processChunk(files);
// Move to the next page
page += 1; // Increment page number
}
// Write the current timestamp into the database
const timestamp = dayjs().unix(); // Get Unix timestamp
// Update the timestamp in the appconfigs table
// await db.from('appconfigs')
// .where('appid', 'backgroundjob')
// .where('configkey', 'lastjob')
// .update({ configvalue: timestamp });
await AppConfig.updateOrCreate({ appid: 'backgroundjob', configkey: 'lastjob' }, { configvalue: timestamp });
// Log the updated timestamp
logger.info(`Updated last job timestamp to: ${timestamp}`);
logger.info(`Cron job executed at: ${dayjs.unix(timestamp).format('YYYY-MM-DD HH:mm:ss')}`);
}
private async processChunk(filesArray: TethysFile[]) {
// Process all files in parallel using Promise.all
await Promise.all(
filesArray.map((file) => this.fetchData(file)), // Fetch data for each file
);
}
private async fetchData(file: TethysFile): Promise<void> {
// Create a hashValues object to store hash values
const hashValues = file.hashvalues.reduce(
(acc, h) => {
acc[h.type] = h.value; // Map hash type to its value
return acc;
},
{} as { [key: string]: string },
);
// Construct the file path
const filePath = '/storage/app/public/' + file.pathName;
try {
// Calculate the MD5 checksum of the file
const calculatedMd5FileHash = await this.checksumFile(filePath, 'md5');
// Compare the calculated hash with the stored hash
if (hashValues['md5'] === calculatedMd5FileHash) {
logger.info(
`File id ${file.id} OK: stored md5 checksum: ${calculatedMd5FileHash}, same control md5 checksum: ${hashValues['md5']}`,
);
2023-03-03 15:54:28 +00:00
} else {
// Log an error if checksums do not match
2024-03-14 19:25:27 +00:00
logger.error(
`File id ${file.id}: stored md5 checksum: ${calculatedMd5FileHash}, control md5 checksum: ${hashValues['md5']}`,
2023-03-03 15:54:28 +00:00
);
}
} catch (error) {
// Log any error encountered during processing
logger.error(`File id ${file.id} error: ${error.message}`);
2023-03-03 15:54:28 +00:00
}
}
2024-03-14 19:25:27 +00:00
private async checksumFile(path: string, hashName = 'md5'): Promise<string> {
const hash = crypto.createHash(hashName); // Create a hash object
const data = await fs.readFile(path); // Read file asynchronously
hash.update(data); // Update hash with file data
return hash.digest('hex'); // Return the hash in hexadecimal format
2023-03-03 15:54:28 +00:00
}
}