import { isBrowser } from '@websolutespa/bom-core';
import { marked } from 'marked';
import { LlmChunk } from 'src/types';
import { Emitter } from './emitter';

declare global {
  interface Window {
    webkitSpeechSynthesis: SpeechSynthesis;
    webkitSpeechRecognition: SpeechRecognition;
    webkitSpeechGrammarList: SpeechGrammarList;
    webkitSpeechRecognitionEvent: SpeechRecognitionEvent;
  }
}

export type SynthesisMode = ('none' | 'default' | 'elevenlabs') & string;
export type RecognitionMode = ('none' | 'default') & string;

export class Speech extends Emitter {

  reader_: SpeechReader | undefined;
  textToSpeech_: TextToSpeech | undefined;
  speechToText_: SpeechToText | undefined;
  synthesisMode_: SynthesisMode = 'none';
  recognitionMode_: RecognitionMode = 'none';
  lang: string = 'it';
  textToSpeechApiKey: string | undefined;
  textToSpeechVoiceId: string | undefined;
  enabled: boolean = false;

  get reader(): SpeechReader {
    if (!this.reader_) {
      this.reader_ = new SpeechReader(this);
    }
    return this.reader_;
  }

  set synthesisMode(synthesisMode: SynthesisMode) {
    if (this.synthesisMode_ !== synthesisMode) {
      this.synthesisMode_ = synthesisMode;
      if (this.textToSpeech_) {
        this.textToSpeech_.cancel();
      }
      switch (synthesisMode) {
        case 'default':
          this.textToSpeech_ = new DefaultTextToSpeech(this.lang);
          break;
        case 'elevenlabs':
          this.textToSpeech_ = new ElevenlabsTextToSpeech(this.textToSpeechApiKey, this.textToSpeechVoiceId);
          break;
        default:
          this.textToSpeech_ = undefined;
      }
    }
  }
  get synthesisMode() {
    return this.synthesisMode_;
  }

  set recognitionMode(recognitionMode: RecognitionMode) {
    if (this.recognitionMode_ !== recognitionMode) {
      this.recognitionMode_ = recognitionMode;
      if (this.speechToText_) {
        this.speechToText_.stop();
      }
      switch (recognitionMode) {
        case 'default':
          this.speechToText_ = new DefaultSpeechToText((transcript) => {
            this.emit('result', transcript);
          });
          break;
        default:
          this.speechToText_ = undefined;
      }
    }
  }
  get recognitionMode() {
    return this.recognitionMode_;
  }

  get hasTextToSpeechSupport(): boolean {
    return this.synthesisMode !== 'none' &&
      this.textToSpeech_ ? this.textToSpeech_.hasSupport : false;
  }
  get hasSpeechToTextSupport(): boolean {
    return this.recognitionMode !== 'none' &&
      this.speechToText_ ? this.speechToText_.hasSupport : false;
  }
  get hasSupport(): boolean {
    return this.hasTextToSpeechSupport && this.hasSpeechToTextSupport;
  }

  constructor() {
    super();
  }

  log(...rest: (string | {} | [])[]) {
    console.log('Speech', ...rest);
  }

  setEnabled(enabled: boolean) {
    this.enabled = enabled;
    if (!enabled) {
      if (this.textToSpeech_) {
        this.textToSpeech_.cancel();
      }
    }
  }

  speak(text: string) {
    // console.log('Speech.speak', text, this.enabled);
    if (!text || !this.enabled) {
      return;
    }
    if (this.textToSpeech_) {
      this.textToSpeech_.speak(text);
    }
  }

  cancel() {
    try {
      if (this.textToSpeech_) {
        this.textToSpeech_.cancel();
      }
    } catch (error) {
      console.log('Speech.cancel.error', error);
    }
  }

  recognizeStart() {
    if (this.speechToText_) {
      this.speechToText_.start(this.lang);
    }
  }

  recognizeStop() {
    if (this.speechToText_) {
      this.speechToText_.stop();
    }
  }
}

export interface TextToSpeech {
  lang: string;
  speak: (text: string) => void
  cancel: () => void
  get hasSupport(): boolean;
}

export class DefaultTextToSpeech implements TextToSpeech {
  private chunks_: string[] = [];

  lang: string = 'it';
  speaking: boolean = false;

  constructor(lang: string) {
    this.lang = lang;
  }

  get pitch(): number {
    return 1;
  }

  get rate(): number {
    return 1;
  }

  get volume(): number {
    return 1;
  }

  get voices() {
    if (!this.hasSupport) {
      return [];
    } else {
      const synthesis = this.synthesis as SpeechSynthesis;
      const voices = synthesis.getVoices().sort((a, b) => {
        const aname = a.name.toUpperCase();
        const bname = b.name.toUpperCase();
        if (aname < bname) {
          return -1;
        } else if (aname == bname) {
          return 0;
        } else {
          return +1;
        }
      });
      return voices;
    }
  }

  get voice() {
    const voices = this.voices;
    let voice = voices.find((x) =>
      x.lang.toLowerCase().indexOf(this.lang) === 0 &&
      x.name.toLowerCase().indexOf('natural') === -1
    );
    if (!voice && voices.length > 0) {
      voice = voices.find((x) =>
        x.lang.toLowerCase().indexOf('en') === 0 &&
        x.name.toLowerCase().indexOf('natural') === -1
      );
    }
    return voice;
  }

  get synthesis(): SpeechSynthesis {
    return window.speechSynthesis || window.webkitSpeechSynthesis;
  }

  get hasSupport() {
    return isBrowser && this.synthesis != null;
  }

  speak(text: string): void {
    // console.log('DefaultTextToSpeech.speak', text, this.enabled, this.hasSupport);

    if (!this.hasSupport) {
      return this.unsupported();
    }

    const synthesis = this.synthesis;
    if (synthesis.speaking) {
      // console.warn('DefaultTextToSpeech.speak already speaking');
      this.chunks_.push(text);
      return;
    }

    console.log('DefaultTextToSpeech.speak', text);

    synthesis.cancel();

    const voice = this.voice;
    if (!voice) {
      console.warn('DefaultTextToSpeech.speak', 'no available voice found');
      return;
    }

    const pitch = this.pitch;
    const rate = this.rate;
    const volume = this.volume;

    const utterance = new SpeechSynthesisUtterance(text);
    utterance.onend = (event) => {
      // console.log('DefaultTextToSpeech.speak.onend', event);
      if (this.chunks_.length > 0) {
        const text = this.chunks_.shift();
        if (text) {
          this.speak(text);
        }
      }
    };
    utterance.onerror = (error) => {
      console.warn('DefaultTextToSpeech.speak.onerror', error);
    };
    /*
    utterance.onboundary = (event) => {
      console.log('DefaultTextToSpeech.speak.onboundary', event);
    };
    utterance.onmark = (event) => {
      console.log('DefaultTextToSpeech.speak.onmark', event);
    };
    utterance.onpause = (event) => {
      console.log('DefaultTextToSpeech.speak.onpause', event);
    };
    utterance.onresume = (event) => {
      console.log('DefaultTextToSpeech.speak.onresume', event);
    };
    */
    utterance.voice = voice;
    utterance.pitch = pitch;
    utterance.rate = rate;
    utterance.volume = volume;
    synthesis.speak(utterance);
  }

  cancel() {
    this.chunks_ = [];
    if (!this.hasSupport) {
      return;
    }
    if (this.synthesis.speaking) {
      this.synthesis.cancel();
    }
  }

  unsupported() {
    if (isBrowser) {
      this.log('Speech Synthesis not supported on this device');
    }
  }

  log(...rest: (string | {} | [])[]) {
    console.log('DefaultTextToSpeech', ...rest);
  }
}

export class ElevenlabsTextToSpeech implements TextToSpeech {
  private chunks_: string[] = [];
  private audioSource_: AudioBufferSourceNode | undefined;

  lang: string = 'it';
  speaking: boolean = false;
  apiKey: string | undefined;
  voiceId: string | undefined;

  get hasSupport() {
    return true;
  }

  constructor(apiKey?: string, voiceId?: string) {
    this.apiKey = apiKey;
    this.voiceId = voiceId;
  }

  speak(text: string): void {
    // console.log('ElevenlabsTextToSpeech.speak', text, this.enabled, this.hasSupport);

    if (!this.apiKey) {
      console.error('ElevenlabsTextToSpeech.error: missing apiKey!');
      return;
    }

    if (!this.voiceId) {
      console.error('ElevenlabsTextToSpeech.error: missing voiceId!');
      return;
    }

    if (this.speaking) {
      // console.warn('speak already speaking');
      this.chunks_.push(text);
      return;
    }
    this.speaking = true;

    const ctx = new AudioContext();

    const options = {
      method: 'POST',
      headers: {
        'xi-api-key': this.apiKey,
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        voice_settings: {
          stability: 1,
          similarity_boost: 1,
        },
        text: text,
        model_id: 'eleven_multilingual_v2',
      }),
    };
    fetch(`https://api.elevenlabs.io/v1/text-to-speech/${this.voiceId}?output_format=mp3_22050_32`, options)
      .then(data => data.arrayBuffer())
      .then((arrayBuffer: ArrayBuffer) => ctx.decodeAudioData(arrayBuffer))
      .then((audioBuffer: AudioBuffer) => {
        return new Promise((resolve, reject) => {
          const audioSource = this.audioSource_ = ctx.createBufferSource();
          audioSource.buffer = audioBuffer;
          audioSource.connect(ctx.destination);
          audioSource.start(ctx.currentTime);
          audioSource.addEventListener('ended', () => {
            this.audioSource_ = undefined;
            resolve(audioSource);
          });
          audioSource.addEventListener('error', (error) => {
            this.audioSource_ = undefined;
            reject(error);
          });
        });
      })
      .then(audioSource => {
        console.log('ElevenlabsTextToSpeech.speak.success', audioSource);
      })
      .catch(error => {
        console.error('ElevenlabsTextToSpeech.speak.error', error);
      })
      .finally(() => {
        this.speaking = false;
        if (this.chunks_.length > 0) {
          const text = this.chunks_.shift();
          if (text) {
            this.speak(text);
          }
        }
      });
  }

  cancel() {
    this.chunks_ = [];
    if (this.speaking) {
      if (this.audioSource_) {
        this.audioSource_.stop();
        this.audioSource_ = undefined;
      }
      this.speaking = false;
    }
  }
}

export interface SpeechToText {
  start: (lang: string) => void;
  stop: () => void;
  get hasSupport(): boolean;
}

export class DefaultSpeechToText implements SpeechToText {

  lang: string = 'it';
  enabled: boolean = false;
  callback: (transcript: string) => void;

  constructor(callback: (transcript: string) => void) {
    this.callback = callback;
  }

  log(...rest: (string | {} | [])[]) {
    console.log('Speech', ...rest);
  }

  start(lang: string) {
    if (!this.hasSupport) {
      // console.log('DefaultSpeechToText.start.unsupported');
      return this.unsupported();
    }
    if (this.recognizerStarted_) {
      return;
    }
    const recognizer = this.recognizer;
    recognizer.lang = lang;
    recognizer.start();
    // console.log('DefaultSpeechToText.start');
  }

  stop() {
    if (!this.hasSupport) {
      return this.unsupported();
    }
    setTimeout(() => {
      if (this.recognizerStarted_) {
        const recognizer = this.recognizer;
        recognizer.stop();
        // console.log('DefaultSpeechToText.stop');
      }
    }, 1000);
  }

  onResult(event: SpeechRecognitionEvent) {
    const results: SpeechRecognitionResultList = event.results;
    const result: SpeechRecognitionResult = results[event.resultIndex];
    const alternatives: SpeechRecognitionAlternative[] = Array.from(result);
    const alternative: SpeechRecognitionAlternative | undefined =
      alternatives.reduce(
        (
          p: SpeechRecognitionAlternative | undefined,
          c: SpeechRecognitionAlternative
        ) => {
          return p ? (c.confidence > p.confidence ? c : p) : c;
        },
        undefined
      );
    const transcript = alternative ? alternative.transcript : '';
    // console.log('Speech.transcript', transcript);
    this.callback(transcript);
  }

  unsupported() {
    if (isBrowser) {
      this.log('Speech Recognition not supported on this device');
    }
  }

  get recognitionEvent() {
    return window.SpeechRecognitionEvent || window.webkitSpeechRecognitionEvent;
  }

  get grammarList() {
    return window.SpeechGrammarList || window.webkitSpeechGrammarList;
  }

  get recognition() {
    return window.SpeechRecognition || window.webkitSpeechRecognition;
  }

  get hasSupport() {
    return isBrowser && this.recognition != null;
  }

  private recognizer_: SpeechRecognition | undefined;
  private recognizerStarted_: boolean = false;

  get recognizer() {
    if (this.recognizer_) {
      return this.recognizer_;
    } else {
      const recognizer = new this.recognition();
      /*
        const grammar =
          "#JSGF V1.0; grammar colors; public <color> = aqua | azure | beige | bisque | black | blue | brown | chocolate | coral | crimson | cyan | fuchsia | ghostwhite | gold | goldenrod | gray | green | indigo | ivory | khaki | lavender | lime | linen | magenta | maroon | moccasin | navy | olive | orange | orchid | peru | pink | plum | purple | red | salmon | sienna | silver | snow | tan | teal | thistle | tomato | turquoise | violet | white | yellow ;";
        const grammarList = new this.grammarList();
        grammarList.addFromString(grammar, 1);
        recognizer.grammars = grammarList;
        */
      recognizer.lang = this.lang; // "en-US";
      recognizer.continuous = true;
      recognizer.interimResults = false;
      recognizer.maxAlternatives = 1;
      recognizer.addEventListener('start', () => {
        // console.log('recognizer.start');
        this.recognizerStarted_ = true;
      });
      recognizer.addEventListener('end', () => {
        // console.log('recognizer.end');
        this.recognizerStarted_ = false;
      });
      recognizer.addEventListener('result', (event: SpeechRecognitionEvent) => {
        // console.log('recognizer.result');
        this.onResult(event);
      });
      /*
        recognizer.addEventListener("audioend", (event) => {
          this.log('recognizer', "audioend", event);
        });
        recognizer.addEventListener("speechend", (event) => {
          this.log('recognizer', "speechend", event);
        });
        */
      /*
        audiostart
        Fired when the user agent has started to capture audio. Also available via the onaudiostart property.
        audioend
        Fired when the user agent has finished capturing audio. Also available via the onaudioend property.
        end
        Fired when the speech recognition service has disconnected. Also available via the onend property.
        error
        Fired when a speech recognition error occurs. Also available via the onerror property.
        nomatch
        Fired when the speech recognition service returns a final result with no significant recognition. This may involve some degree of recognition, which doesn't meet or exceed the confidence threshold. Also available via the onnomatch property.
        result
        Fired when the speech recognition service returns a result — a word or phrase has been positively recognized and this has been communicated back to the app. Also available via the onresult property.
        soundstart
        Fired when any sound — recognizable speech or not — has been detected. Also available via the onsoundstart property.
        soundend
        Fired when any sound — recognizable speech or not — has stopped being detected. Also available via the onsoundend property.
        speechstart
        Fired when sound that is recognized by the speech recognition service as speech has been detected. Also available via the onspeechstart property.
        speechend
        Fired when speech recognized by the speech recognition service has stopped being detected. Also available via the onspeechend property.
        start
        Fired when the speech recognition service has begun listening to incoming audio with intent to recognize grammars associated with the current SpeechRecognition. Also available via the onstart property.
        */
      this.recognizer_ = recognizer;
      return recognizer;
    }
  }
}

export class SpeechReader {
  private buffer_: string = '';
  private chunks: string[] = [];
  private speech: Speech;

  constructor(speech: Speech) {
    this.speech = speech;
  }

  markdownToChunk(markdown: string): string {
    let text = '';
    const parser = isBrowser ? new DOMParser() : undefined;
    marked.parse(markdown, {
      walkTokens: (token) => {
        switch (token.type) {
          case 'text': {
            // console.log('SpeechReader.markdownToChunk', token.text);
            if (parser) {
              const document = parser.parseFromString(`<div>${token.text}</div>`, 'text/html');
              const parsedText = (document.firstElementChild as HTMLDivElement)?.innerText;
              text += parsedText;
            } else {
              text += token.text;
            }
          }
            break;
          default:
        }
      },
    });
    return text;
  }

  add(chunk: string) {
    if (!this.speech.enabled) {
      return;
    }
    // console.log('SpeechReader.add', chunk);
    const matches = chunk.match(/(:|\.|\?|!)/);
    if (matches && matches.index !== undefined) {
      // console.log(matches);
      const text = this.buffer_ += chunk.substring(0, matches.index + 1);
      // console.log('SpeechReader.add.text', text);
      // console.log(matches.index, chunk.length);
      const rest = matches.index + 1 < chunk.length ? chunk.substring(matches.index + 1) : '';
      // console.log('rest', rest);
      this.buffer_ = rest;
      const parsedText = this.markdownToChunk(text);
      // console.log('SpeechReader.add.parsedText', parsedText);
      const chunks = this.chunks;
      chunks.push(parsedText);
      if (chunks.length === 1) {
        this.speak();
      }
    } else {
      this.buffer_ += chunk;
    }
  }

  end() {
    if (!this.speech.enabled) {
      return;
    }
    const chunks = this.chunks;
    const parsedText = this.markdownToChunk(this.buffer_);
    this.buffer_ = '';
    // console.log('SpeechReader.end.parsedText', parsedText);
    if (parsedText.trim().length > 0) {
      chunks.push(parsedText);
    }
    if (chunks.length > 0) {
      this.speak();
    }
  }

  speak() {
    const speech = this.speech;
    const chunks = this.chunks;
    // const text = chunks.join(' ').replace(/\s\s/g, ' ');
    let text = chunks.join('');
    // text = text.replace('\n', '');
    text = text.replace(/(\s+)/g, ' ');
    // text = text.replace(/\s$/g, '');
    if (text) {
      // console.log('SpeechReader.speak', [text]);
      this.chunks = [];
      speech.speak(text);
    }
  }
}

export function chunksToTexts(chunks: LlmChunk[]): string[] {
  const texts: string[] = [];
  chunks.forEach(chunk => {
    if (typeof chunk === 'string') {
      texts.push(chunk);
    } else if (chunk.type === 'string') {
      texts.push(chunk.content);
    }
  });
  return texts;
}
