import crypto from 'crypto'; import fs from 'fs/promises'; // Use fs/promises for async file operations import logger from '@adonisjs/core/services/logger'; import { BaseCommand } from '@adonisjs/core/ace'; import { CommandOptions } from '@adonisjs/core/types/ace'; import dayjs from 'dayjs'; import TethysFile from '#models/file'; import AppConfig from '#models/appconfig'; // import db from '@adonisjs/lucid/services/db'; // Import the DB service export default class ValidateChecksum extends BaseCommand { /** * Command name used to run the command */ public static commandName = 'validate:checksum'; /** * Command description displayed in the "help" output */ public static description = ''; public static options: CommandOptions = { startApp: true, staysAlive: false, }; private chunkSize = 100; // Set chunk size for pagination public async run() { let page = 1; // Start with the first page let hasMoreFiles = true; // Flag to check if there are more files to process // Loop to process files in chunks while (hasMoreFiles) { // Query a chunk of published files from the database with pagination const files = await TethysFile.query() .whereHas('dataset', (dQuery) => { dQuery.where('server_state', 'published'); // Only get published datasets }) .orderBy('document_id', 'asc') // Order by document ID .preload('hashvalues') // Preload hash values .forPage(page, this.chunkSize); // Get files for the current page // Check if there are no more files to process if (files.length === 0) { hasMoreFiles = false; // No more files, exit the loop break; } // Process the current chunk of files await this.processChunk(files); // Move to the next page page += 1; // Increment page number } // Write the current timestamp into the database const timestamp = dayjs().unix(); // Get Unix timestamp // Update the timestamp in the appconfigs table // await db.from('appconfigs') // .where('appid', 'backgroundjob') // .where('configkey', 'lastjob') // .update({ configvalue: timestamp }); await AppConfig.updateOrCreate({ appid: 'backgroundjob', configkey: 'lastjob' }, { configvalue: timestamp }); // Log the updated timestamp logger.info(`Updated last job timestamp to: ${timestamp}`); logger.info(`Cron job executed at: ${dayjs.unix(timestamp).format('YYYY-MM-DD HH:mm:ss')}`); } private async processChunk(filesArray: TethysFile[]) { // Process all files in parallel using Promise.all await Promise.all( filesArray.map((file) => this.fetchData(file)), // Fetch data for each file ); } private async fetchData(file: TethysFile): Promise { // Create a hashValues object to store hash values const hashValues = file.hashvalues.reduce( (acc, h) => { acc[h.type] = h.value; // Map hash type to its value return acc; }, {} as { [key: string]: string }, ); // Construct the file path const filePath = '/storage/app/public/' + file.pathName; try { // Calculate the MD5 checksum of the file const calculatedMd5FileHash = await this.checksumFile(filePath, 'md5'); // Compare the calculated hash with the stored hash if (hashValues['md5'] === calculatedMd5FileHash) { logger.info( `File id ${file.id} OK: stored md5 checksum: ${calculatedMd5FileHash}, same control md5 checksum: ${hashValues['md5']}`, ); } else { // Log an error if checksums do not match logger.error( `File id ${file.id}: stored md5 checksum: ${calculatedMd5FileHash}, control md5 checksum: ${hashValues['md5']}`, ); } } catch (error) { // Log any error encountered during processing logger.error(`File id ${file.id} error: ${error.message}`); } } private async checksumFile(path: string, hashName = 'md5'): Promise { const hash = crypto.createHash(hashName); // Create a hash object const data = await fs.readFile(path); // Read file asynchronously hash.update(data); // Update hash with file data return hash.digest('hex'); // Return the hash in hexadecimal format } }