#!/usr/bin/env python3 """Replace cell contents in Excel and CSV files.""" from __future__ import annotations import argparse import re import sys from pathlib import Path from typing import Any try: import pandas as pd except ImportError: pd = None SUPPORTED_SUFFIXES = {".xlsx", ".csv"} def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Replace cell content in Excel or CSV files." ) parser.add_argument("--input", required=True, help="Input Excel/CSV file path") parser.add_argument("--search", required=True, help="Text or pattern to search for") parser.add_argument("--replace", required=True, help="Replacement text") parser.add_argument( "--output", help="Output file path. Defaults to overwriting the input file.", ) parser.add_argument( "--sheet", help="Sheet name for Excel files. Defaults to the first sheet.", ) parser.add_argument( "--regex", action="store_true", help="Treat --search as a regular expression.", ) parser.add_argument( "--column", help="Specific column name to update. Defaults to all columns.", ) parser.add_argument( "--case", action="store_true", help="Use case-sensitive matching. Default is case-insensitive.", ) return parser def fail(message: str, exit_code: int = 1) -> int: print(f"Error: {message}", file=sys.stderr) return exit_code def require_dependencies(file_type: str) -> None: if pd is None: package_hint = "pandas openpyxl" if file_type == "excel" else "pandas" raise RuntimeError( f"Missing required dependencies. Install with: pip install {package_hint}" ) def compile_pattern(search: str, use_regex: bool, case_sensitive: bool) -> re.Pattern[str]: pattern_text = search if use_regex else re.escape(search) flags = 0 if case_sensitive else re.IGNORECASE try: return re.compile(pattern_text, flags) except re.error as exc: raise ValueError(f"Invalid regular expression: {exc}") from exc def replace_value( value: Any, pattern: re.Pattern[str], replacement: str, ) -> tuple[Any, int]: if value is None: return value, 0 if not isinstance(value, str): return value, 0 new_value, count = pattern.subn(replacement, value) if count == 0: return value, 0 return new_value, count def target_columns(dataframe: "pd.DataFrame", column_name: str | None) -> list[str]: if column_name is None: return list(dataframe.columns) if column_name not in dataframe.columns: available = ", ".join(str(col) for col in dataframe.columns) raise KeyError( f"Column '{column_name}' not found. Available columns: {available or '(none)'}" ) return [column_name] def replace_in_dataframe( dataframe: "pd.DataFrame", pattern: re.Pattern[str], replacement: str, column_name: str | None, ) -> int: replacements = 0 for column in target_columns(dataframe, column_name): new_values = [] for value in dataframe[column].tolist(): new_value, count = replace_value(value, pattern, replacement) new_values.append(new_value) replacements += count dataframe[column] = new_values return replacements def read_csv(input_path: Path) -> "pd.DataFrame": return pd.read_csv(input_path, dtype=object, keep_default_na=False, encoding="utf-8-sig") def write_csv(dataframe: "pd.DataFrame", output_path: Path) -> None: dataframe.to_csv(output_path, index=False, encoding="utf-8-sig") def load_excel_sheets(input_path: Path) -> dict[str, "pd.DataFrame"]: with pd.ExcelFile(input_path) as workbook: return { sheet_name: pd.read_excel( workbook, sheet_name=sheet_name, dtype=object, keep_default_na=False, ) for sheet_name in workbook.sheet_names } def resolve_sheet_name(sheet_names: list[str], requested_sheet: str | None) -> str: if not sheet_names: raise ValueError("The Excel file does not contain any sheets.") if requested_sheet is None: return sheet_names[0] if requested_sheet not in sheet_names: available = ", ".join(sheet_names) raise KeyError( f"Sheet '{requested_sheet}' not found. Available sheets: {available}" ) return requested_sheet def write_excel(sheets: dict[str, "pd.DataFrame"], output_path: Path) -> None: with pd.ExcelWriter(output_path, engine="openpyxl") as writer: for sheet_name, dataframe in sheets.items(): dataframe.to_excel(writer, sheet_name=sheet_name, index=False) def process_csv( input_path: Path, output_path: Path, pattern: re.Pattern[str], replacement: str, column_name: str | None, ) -> int: require_dependencies("csv") dataframe = read_csv(input_path) replacements = replace_in_dataframe(dataframe, pattern, replacement, column_name) write_csv(dataframe, output_path) return replacements def process_excel( input_path: Path, output_path: Path, sheet_name: str | None, pattern: re.Pattern[str], replacement: str, column_name: str | None, ) -> tuple[int, str]: require_dependencies("excel") sheets = load_excel_sheets(input_path) selected_sheet = resolve_sheet_name(list(sheets.keys()), sheet_name) replacements = replace_in_dataframe( sheets[selected_sheet], pattern, replacement, column_name ) write_excel(sheets, output_path) return replacements, selected_sheet def main() -> int: args = build_parser().parse_args() input_path = Path(args.input).expanduser() output_path = Path(args.output).expanduser() if args.output else input_path if not input_path.exists(): return fail(f"Input file not found: {input_path}") if not input_path.is_file(): return fail(f"Input path is not a file: {input_path}") if input_path.suffix.lower() not in SUPPORTED_SUFFIXES: return fail( "Unsupported file type. Only .xlsx and .csv files are supported." ) try: output_path.parent.mkdir(parents=True, exist_ok=True) except OSError as exc: return fail(f"Unable to create output directory: {exc}") try: pattern = compile_pattern(args.search, args.regex, args.case) if input_path.suffix.lower() == ".csv": replacements = process_csv( input_path=input_path, output_path=output_path, pattern=pattern, replacement=args.replace, column_name=args.column, ) print( f"Completed. Replacements made: {replacements}. Output: {output_path}" ) return 0 replacements, selected_sheet = process_excel( input_path=input_path, output_path=output_path, sheet_name=args.sheet, pattern=pattern, replacement=args.replace, column_name=args.column, ) print( "Completed. " f"Sheet: {selected_sheet}. Replacements made: {replacements}. " f"Output: {output_path}" ) return 0 except (ValueError, KeyError, RuntimeError) as exc: return fail(str(exc)) except PermissionError as exc: return fail(f"Permission denied: {exc}") except Exception as exc: return fail(f"Unexpected failure while processing the file: {exc}") if __name__ == "__main__": sys.exit(main())