import React, { useCallback, useState } from 'react';
import { useDropzone } from 'react-dropzone';
import * as mammoth from 'mammoth';

const WordImporter = ({ onImport }) => {
    const [isLoading, setIsLoading] = useState(false);
    const [error, setError] = useState(null);

    const onDrop = useCallback(async (acceptedFiles) => {
        setError(null);
        const file = acceptedFiles[0];

        if (!file) {
            setError('No file selected');
            return;
        }

        setIsLoading(true);

        try {
            // Read file as ArrayBuffer
            const arrayBuffer = await new Promise((resolve, reject) => {
                const reader = new FileReader();
                reader.onload = () => resolve(reader.result);
                reader.onerror = reject;
                reader.readAsArrayBuffer(file);
            });

            // Convert to HTML
            const result = await mammoth.convertToHtml({ arrayBuffer });
            const cleanedHtml = cleanWordHtml(result.value);
            onImport(cleanedHtml);

        } catch (err) {
            console.error('Conversion error:', err);
            setError('Failed to process Word file. Please ensure it is a .docx file.');
        } finally {
            setIsLoading(false);
        }
    }, [onImport]);

    const { getRootProps, getInputProps, isDragActive } = useDropzone({
        onDrop,
        accept: {
            'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx']
        },
        multiple: false
    });

    return (
        <div {...getRootProps()} className={`dropzone ${isDragActive ? 'active' : ''}`}>
            <input {...getInputProps()} />
            {isLoading ? (
                <p>Processing Word document...</p>
            ) : isDragActive ? (
                <p>Drop the Word file here...</p>
            ) : (
                <p>Drag & drop a .docx file here, or click to select</p>
            )}
            {error && <p className="error">{error}</p>}
        </div>
    );
};

// Enhanced Word HTML cleaner
function cleanWordHtml(html) {
    // Create temporary div to parse HTML
    const div = document.createElement('div');
    div.innerHTML = html;

    // Remove Word-specific elements - now with valid selectors
    const unwantedTags = [
        'o:p', 'v:rect', 'style', 'xml', 'meta',
        'head', 'title', 'w:wordDocument', 'm:math'
    ];

    unwantedTags.forEach(tag => {
        // Handle namespaced tags by removing namespace
        const simpleTag = tag.split(':')[1] || tag;
        const elements = div.getElementsByTagName(tag);

        // Convert HTMLCollection to array and remove each element
        Array.from(elements).forEach(el => el.remove());
    });

    // Alternative approach for namespaced elements
    const allElements = div.getElementsByTagName('*');
    Array.from(allElements).forEach(el => {
        // Remove elements with Word-specific namespaces
        if (el.tagName.includes('w:') || el.tagName.includes('m:')) {
            el.remove();
            return;
        }

        // Remove all attributes except basic formatting
        if (el.hasAttributes()) {
            const toKeep = ['href', 'src', 'alt', 'strong', 'em', 'u'];
            Array.from(el.attributes).forEach(attr => {
                if (!toKeep.includes(attr.name)) {
                    el.removeAttribute(attr.name);
                }
            });
        }
    });

    // Clean up empty paragraphs and spans
    div.querySelectorAll('p, span').forEach(el => {
        if (!el.textContent.trim() || el.innerHTML === '&nbsp;') {
            el.remove();
        } else if (el.tagName === 'SPAN') {
            // Replace spans with their content
            const parent = el.parentNode;
            while (el.firstChild) {
                parent.insertBefore(el.firstChild, el);
            }
            parent.removeChild(el);
        }
    });

    return div.innerHTML;
}

export default WordImporter;