#!/usr/bin/env python3
"""
Removes duplicate definitions ('prefixed-property', 'pseudo-class', 'pseudo-element', 'prefixed-function')
from other xml files if a definition with the same id exists in the TARGET file.
The TARGET file remains unchanged. Duplicates are deleted from all other files.

Usage:
  - Dry run (default):
      ./remove_duplicate_defs.py --file="manuallySupported/mozilla.xml"
  - Actually modify files
      ./remove_duplicate_defs.py --apply --file=manuallySupported/mozilla.xml

Notes:
  - Only the following tag names are processed:
      prefixed-property, pseudo-class, pseudo-element, function, prefixed-function, named-value
"""
import argparse
import os
import sys
import xml.etree.ElementTree as ET

TARGET_FILES_DIR_NAME = 'manuallySupported'
PROCESS_TAGS = {
    'prefixed-property', 'pseudo-class', 'pseudo-element', 'prefixed-function'
}
# XML namespace used in these files
NS = {'css': 'urn:schemas-jetbrains-com:css-xml'}
CSS_NS = '{urn:schemas-jetbrains-com:css-xml}'

def _iter_definition_children(root: ET.Element):
    # Some files may wrap definitions directly under root or under <definitions>
    # We operate on direct children of the document element (as the original script did)
    for elem in list(root):
        if not isinstance(elem.tag, str):
            continue
        local = elem.tag.split('}', 1)[-1] if '}' in elem.tag else elem.tag
        if local in PROCESS_TAGS:
            yield elem, local

def _gather_target_ids(target_path: str) -> dict[str, set[str]]:
    try:
        tree = ET.parse(target_path)
        root = tree.getroot()
    except Exception as e:
        print(f"[ERROR] Failed to parse {target_path}: {e}", file=sys.stderr)
        return {}
    ids_by_tag: dict[str, set[str]] = {t: set() for t in PROCESS_TAGS}
    for elem, local in _iter_definition_children(root):
        _id = elem.attrib.get('id')
        if _id:
            ids_by_tag.setdefault(local, set()).add(_id)
    return ids_by_tag

def _find_xml_files(dir_path: str, exclude_name: str):
    if not os.path.isdir(dir_path):
        return []
    return [name for name in os.listdir(dir_path) if name.endswith('.xml') and name != exclude_name]

def _remove_duplicates_in_file(file_path: str, target_ids_by_tag: dict[str, set[str]], apply_changes: bool) -> int:
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
    except Exception:
        # Skip malformed files silently
        return 0

    to_remove = []
    for elem, local in _iter_definition_children(root):
        elem_id = elem.attrib.get('id')
        if elem_id and elem_id in target_ids_by_tag.get(local, set()):
            to_remove.append((elem, local, elem_id))

    if not to_remove:
        return 0

    print(f"[INFO] {os.path.basename(file_path)}: will remove {len(to_remove)} duplicate definition(s) also present in target file")
    for _, local, eid in to_remove:
        print(f"  - <{local} id=\"{eid}\"/>")

    if not apply_changes:
        return len(to_remove)

    for elem, _, _ in to_remove:
        root.remove(elem)

    ET.register_namespace('', NS['css'])
    tree.write(file_path, encoding='utf-8', xml_declaration=True)
    print(f"[INFO] Updated {file_path}")
    return len(to_remove)


def remove_duplicates_from_files(base_directory: str, apply_changes: bool, target_file: str) -> int:
    target_dir = os.path.join(base_directory, TARGET_FILES_DIR_NAME)
    target_path = os.path.join(target_dir, target_file)
    if not os.path.exists(target_path):
        print(f"[ERROR] {target_path} not found", file=sys.stderr)
        return 2

    # Collect IDs from the TARGET file (to keep) and remove duplicates from all other files
    target_ids_by_tag = _gather_target_ids(target_path)
    if not any(target_ids_by_tag.values()):
        print('[INFO] No definitions found in the target file; nothing to do.')
        return 0

    # Process other files in both directories
    search_dirs = [base_directory, target_dir]
    total_removed = 0
    for d in search_dirs:
        for name in _find_xml_files(d, os.path.basename(target_file)):
            path = os.path.join(d, name)
            total_removed += _remove_duplicates_in_file(path, target_ids_by_tag, apply_changes)

    if total_removed == 0:
        print('[INFO] No duplicate definitions found in other files; no changes needed.')
    else:
        if not apply_changes:
            print('[INFO] Dry run mode. Re-run with --apply to modify the files.')
        print(f"[INFO] Total duplicates {'removed' if apply_changes else 'found'} across files: {total_removed}")
    return 0


def main():
    parser = argparse.ArgumentParser(description='Remove duplicate definitions from other XML files if they exist in the target file')
    parser.add_argument('--apply', action='store_true', help='Apply changes (otherwise dry run)')
    parser.add_argument('--file', '-f', dest='file', default='webkit.xml', help='Target XML file name (default: webkit.xml)')
    args = parser.parse_args()

    # Use the parent directory of this script's directory
    directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    target = os.path.basename(args.file)
    rc = remove_duplicates_from_files(directory, args.apply, target)
    sys.exit(rc)


if __name__ == '__main__':
    main()
