2023-03-03 15:54:28 +00:00
|
|
|
import crypto from 'crypto';
|
2024-09-16 15:59:46 +00:00
|
|
|
import fs from 'fs/promises'; // Use fs/promises for async file operations
|
2024-03-14 19:25:27 +00:00
|
|
|
import logger from '@adonisjs/core/services/logger';
|
2024-09-16 15:59:46 +00:00
|
|
|
import { BaseCommand } from '@adonisjs/core/ace';
|
|
|
|
import { CommandOptions } from '@adonisjs/core/types/ace';
|
|
|
|
import dayjs from 'dayjs';
|
|
|
|
import TethysFile from '#models/file';
|
|
|
|
import AppConfig from '#models/appconfig';
|
|
|
|
// import db from '@adonisjs/lucid/services/db'; // Import the DB service
|
2023-03-03 15:54:28 +00:00
|
|
|
|
|
|
|
export default class ValidateChecksum extends BaseCommand {
|
|
|
|
/**
|
2024-09-16 15:59:46 +00:00
|
|
|
* Command name used to run the command
|
2023-03-03 15:54:28 +00:00
|
|
|
*/
|
|
|
|
public static commandName = 'validate:checksum';
|
|
|
|
|
|
|
|
/**
|
2024-09-16 15:59:46 +00:00
|
|
|
* Command description displayed in the "help" output
|
2023-03-03 15:54:28 +00:00
|
|
|
*/
|
|
|
|
public static description = '';
|
2024-09-16 15:59:46 +00:00
|
|
|
|
|
|
|
public static options: CommandOptions = {
|
|
|
|
startApp: true,
|
|
|
|
staysAlive: false,
|
|
|
|
};
|
|
|
|
|
|
|
|
private chunkSize = 100; // Set chunk size for pagination
|
2023-03-03 15:54:28 +00:00
|
|
|
|
|
|
|
public async run() {
|
2024-09-16 15:59:46 +00:00
|
|
|
let page = 1; // Start with the first page
|
|
|
|
let hasMoreFiles = true; // Flag to check if there are more files to process
|
|
|
|
|
|
|
|
// Loop to process files in chunks
|
|
|
|
while (hasMoreFiles) {
|
|
|
|
// Query a chunk of published files from the database with pagination
|
|
|
|
const files = await TethysFile.query()
|
|
|
|
.whereHas('dataset', (dQuery) => {
|
|
|
|
dQuery.where('server_state', 'published'); // Only get published datasets
|
|
|
|
})
|
|
|
|
.orderBy('document_id', 'asc') // Order by document ID
|
|
|
|
.preload('hashvalues') // Preload hash values
|
|
|
|
.forPage(page, this.chunkSize); // Get files for the current page
|
|
|
|
|
|
|
|
// Check if there are no more files to process
|
|
|
|
if (files.length === 0) {
|
|
|
|
hasMoreFiles = false; // No more files, exit the loop
|
|
|
|
break;
|
2023-03-03 15:54:28 +00:00
|
|
|
}
|
|
|
|
|
2024-09-16 15:59:46 +00:00
|
|
|
// Process the current chunk of files
|
|
|
|
await this.processChunk(files);
|
|
|
|
|
|
|
|
// Move to the next page
|
|
|
|
page += 1; // Increment page number
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write the current timestamp into the database
|
|
|
|
const timestamp = dayjs().unix(); // Get Unix timestamp
|
|
|
|
|
|
|
|
// Update the timestamp in the appconfigs table
|
|
|
|
// await db.from('appconfigs')
|
|
|
|
// .where('appid', 'backgroundjob')
|
|
|
|
// .where('configkey', 'lastjob')
|
|
|
|
// .update({ configvalue: timestamp });
|
|
|
|
|
|
|
|
await AppConfig.updateOrCreate({ appid: 'backgroundjob', configkey: 'lastjob' }, { configvalue: timestamp });
|
|
|
|
|
|
|
|
// Log the updated timestamp
|
|
|
|
logger.info(`Updated last job timestamp to: ${timestamp}`);
|
|
|
|
logger.info(`Cron job executed at: ${dayjs.unix(timestamp).format('YYYY-MM-DD HH:mm:ss')}`);
|
|
|
|
}
|
|
|
|
|
|
|
|
private async processChunk(filesArray: TethysFile[]) {
|
|
|
|
// Process all files in parallel using Promise.all
|
|
|
|
await Promise.all(
|
|
|
|
filesArray.map((file) => this.fetchData(file)), // Fetch data for each file
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
private async fetchData(file: TethysFile): Promise<void> {
|
|
|
|
// Create a hashValues object to store hash values
|
|
|
|
const hashValues = file.hashvalues.reduce(
|
|
|
|
(acc, h) => {
|
|
|
|
acc[h.type] = h.value; // Map hash type to its value
|
|
|
|
return acc;
|
|
|
|
},
|
|
|
|
{} as { [key: string]: string },
|
|
|
|
);
|
|
|
|
|
|
|
|
// Construct the file path
|
|
|
|
const filePath = '/storage/app/public/' + file.pathName;
|
|
|
|
|
|
|
|
try {
|
|
|
|
// Calculate the MD5 checksum of the file
|
|
|
|
const calculatedMd5FileHash = await this.checksumFile(filePath, 'md5');
|
|
|
|
|
|
|
|
// Compare the calculated hash with the stored hash
|
|
|
|
if (hashValues['md5'] === calculatedMd5FileHash) {
|
|
|
|
logger.info(
|
|
|
|
`File id ${file.id} OK: stored md5 checksum: ${calculatedMd5FileHash}, same control md5 checksum: ${hashValues['md5']}`,
|
|
|
|
);
|
2023-03-03 15:54:28 +00:00
|
|
|
} else {
|
2024-09-16 15:59:46 +00:00
|
|
|
// Log an error if checksums do not match
|
2024-03-14 19:25:27 +00:00
|
|
|
logger.error(
|
2024-09-16 15:59:46 +00:00
|
|
|
`File id ${file.id}: stored md5 checksum: ${calculatedMd5FileHash}, control md5 checksum: ${hashValues['md5']}`,
|
2023-03-03 15:54:28 +00:00
|
|
|
);
|
|
|
|
}
|
2024-09-16 15:59:46 +00:00
|
|
|
} catch (error) {
|
|
|
|
// Log any error encountered during processing
|
|
|
|
logger.error(`File id ${file.id} error: ${error.message}`);
|
2023-03-03 15:54:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-14 19:25:27 +00:00
|
|
|
private async checksumFile(path: string, hashName = 'md5'): Promise<string> {
|
2024-09-16 15:59:46 +00:00
|
|
|
const hash = crypto.createHash(hashName); // Create a hash object
|
|
|
|
const data = await fs.readFile(path); // Read file asynchronously
|
|
|
|
hash.update(data); // Update hash with file data
|
|
|
|
return hash.digest('hex'); // Return the hash in hexadecimal format
|
2023-03-03 15:54:28 +00:00
|
|
|
}
|
|
|
|
}
|