import { createWorker, PSM } from 'tesseract.js';

// Nên resize image trước khi chạy ocr để tối ưu thời gian xử lý
export class OCRService {
  private static loadTenderKeyWords = ['loadtender', 'load tender', 'rate confirmation', 'load confirmation', 'rate agreement'];
  public static readonly IMAGE_MAX_WIDTH = 1920;

  static async checkLoadTenderDocument(file: string | File | Blob): Promise<any[]> {
    return new Promise(async (resolve, reject) => {
      try {
        const worker = await createWorker('eng', 1, {
          corePath: '../../assets/tesseract.js/tesseract-core-simd.wasm.js',
          workerPath: '../../assets/tesseract.js/worker.min.js',
          langPath: '../../assets/tesseract.js/data'
        });
        await worker.setParameters({
          tessedit_pageseg_mode: PSM.AUTO_ONLY // auto page segmentaion and OCR, no OSD
        })
        // detect load tender document chỉ cần bắt key ở phần header
        // Dùng rectangle để tối ưu thời gian chạy
        const { data: { text } } = await worker.recognize(file, {
          rectangle: { top: 0, left: 0, width: this.IMAGE_MAX_WIDTH, height: 500 },
        });
        let textDoc = text.toLowerCase();
        let keysDetected = [];
        for (let key of this.loadTenderKeyWords) {
          if (textDoc.includes(key)) {
            keysDetected.push(key);
            break;
          }
        }
        resolve(keysDetected);
      } catch (e) {
        resolve ([])
      }
      
    })
  }
}