feat: implement ERP AI Assistant Phase 1

Backend (FastAPI + SQLAlchemy + Claude API + RAG): - Config management with Pydantic v2 - Database engine with connection pooling and SQL injection prevention - AI engine with Claude API integration (support custom base URL) - RAG engine with ChromaDB and sentence-transformers - Requirement analysis service - Config generation service - Executor engine with SQL validation - REST API endpoints: /analyze, /generate, /execute Frontend (Vue 3 + Element Plus + Pinia): - Complete 3-step workflow: analyze → generate → execute - Step indicator with progress visualization - Analysis result display with field table - SQL preview with monospace font - Execute confirmation dialog with safety warning - Execution result display - State management with Pinia - API service integration Security: - SQL injection prevention with parameterized queries - Dangerous SQL operation blocking - Database password URL encoding - Transaction auto-rollback - Pydantic config validation Features: - Natural language requirement analysis - Automated SQL configuration generation - Safe execution with human review - LAN access support - Custom Claude API endpoint support Documentation: - README with quick start guide - Quick start guide - LAN access configuration - Dependency fixes guide - Claude API configuration - Git operation guide - Implementation report Dependencies fixed: - numpy<2.0.0 for chromadb compatibility - sentence-transformers==2.7.0 for huggingface_hub compatibility Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 14:23:20 +00:00
commit acd73431ae
60 changed files with 11284 additions and 0 deletions
--- a/backend/app/core/init.py
+++ b/backend/app/core/init.py
@@ -0,0 +1 @@
+"""Core modules"""
--- a/backend/app/core/ai_engine.py
+++ b/backend/app/core/ai_engine.py
@@ -0,0 +1,120 @@
+"""AI Engine for ERP AI Assistant.
+
+This module provides the ClaudeEngine class that wraps Claude API calls
+and provides JSON parsing utilities.
+"""
+
+import json
+import re
+from typing import Any
+
+import anthropic
+from loguru import logger
+
+from app.config import get_settings
+
+
+class ClaudeEngine:
+    """Engine for interacting with Claude API.
+
+    This class wraps the Anthropic Claude API client and provides
+    utilities for parsing JSON responses from Claude.
+    """
+
+    def __init__(self) -> None:
+        """Initialize Claude engine with settings."""
+        settings = get_settings()
+
+        # Initialize Anthropic client with optional custom base_url
+        client_kwargs = {"api_key": settings.ANTHROPIC_API_KEY}
+        if settings.ANTHROPIC_BASE_URL:
+            client_kwargs["base_url"] = settings.ANTHROPIC_BASE_URL
+            logger.info(f"Using custom Anthropic base URL: {settings.ANTHROPIC_BASE_URL}")
+
+        self.client = anthropic.AsyncAnthropic(**client_kwargs)
+        self.model = settings.CLAUDE_MODEL
+        self.max_tokens = settings.CLAUDE_MAX_TOKENS
+        self.temperature = settings.CLAUDE_TEMPERATURE
+
+    def parse_json_response(self, content: str) -> dict[str, Any]:
+        """Parse JSON from Claude responses.
+
+        Attempts multiple parsing strategies:
+        1. Direct JSON parse
+        2. Extract from markdown code blocks
+        3. Extract any {...} block
+
+        Args:
+            content: The response content from Claude
+
+        Returns:
+            Parsed JSON as a dictionary
+
+        Raises:
+            ValueError: If JSON cannot be parsed using any strategy
+        """
+        if not content or not content.strip():
+            raise ValueError("Empty content provided")
+
+        # Strategy 1: Try direct JSON parse
+        try:
+            return json.loads(content)
+        except json.JSONDecodeError:
+            pass
+
+        # Strategy 2: Try extracting from markdown code blocks
+        json_code_block_pattern = r'```json\s*(\{.*?\})\s*```'
+        json_match = re.search(json_code_block_pattern, content, re.DOTALL)
+        if json_match:
+            try:
+                return json.loads(json_match.group(1))
+            except json.JSONDecodeError:
+                pass
+
+        # Also try any code block (not just json tagged)
+        code_block_pattern = r'```\s*(\{.*?\})\s*```'
+        code_block_match = re.search(code_block_pattern, content, re.DOTALL)
+        if code_block_match:
+            try:
+                return json.loads(code_block_match.group(1))
+            except json.JSONDecodeError:
+                pass
+
+        # Strategy 3: Try extracting any {...} block
+        # Find balanced braces
+        brace_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
+        json_blocks = re.findall(brace_pattern, content, re.DOTALL)
+        for json_block in json_blocks:
+            try:
+                return json.loads(json_block)
+            except json.JSONDecodeError:
+                continue
+
+        # All strategies failed
+        logger.error(f"无法解析 Claude 返回的 JSON: {content[:200]}")
+        raise ValueError("无法解析 Claude 返回的 JSON，请检查响应格式")
+
+    async def call_claude(
+        self,
+        messages: list[dict[str, str]],
+        temperature: float | None = None
+    ) -> str:
+        """Call Claude API.
+
+        Args:
+            messages: List of message dictionaries with 'role' and 'content'
+            temperature: Optional temperature override (0-2)
+
+        Returns:
+            The text content from Claude's response
+
+        Raises:
+            Exception: If the API call fails
+        """
+        response = await self.client.messages.create(
+            model=self.model,
+            max_tokens=self.max_tokens,
+            temperature=temperature if temperature is not None else self.temperature,
+            messages=messages
+        )
+        return response.content[0].text
--- a/backend/app/core/db_engine.py
+++ b/backend/app/core/db_engine.py
@@ -0,0 +1,78 @@
+from typing import Optional
+from sqlalchemy import create_engine, text
+from sqlalchemy.orm import sessionmaker
+from contextlib import contextmanager
+from loguru import logger
+from app.config import get_settings
+
+
+class DatabaseEngine:
+    """数据库操作引擎"""
+
+    def __init__(self):
+        settings = get_settings()
+        self.engine = create_engine(
+            settings.DATABASE_URL,
+            pool_size=20,
+            max_overflow=10,
+            pool_pre_ping=True,
+            echo=settings.DEBUG
+        )
+        self.Session = sessionmaker(bind=self.engine)
+
+    @contextmanager
+    def get_session(self):
+        """获取数据库会话（上下文管理器）"""
+        session = self.Session()
+        try:
+            yield session
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            logger.error(f"数据库操作失败：{e}")
+            raise
+        finally:
+            session.close()
+
+    def execute_sql(self, sql: str, params: Optional[dict] = None) -> list:
+        """执行单条 SQL"""
+        with self.get_session() as session:
+            result = session.execute(text(sql), params or {})
+            return result.fetchall()
+
+    def execute_transaction(self, sql_list: list, params_list: Optional[list] = None) -> bool:
+        """执行事务（多条 SQL）"""
+        params_list = params_list or [None] * len(sql_list)
+        with self.get_session() as session:
+            for sql, params in zip(sql_list, params_list):
+                session.execute(text(sql), params or {})
+        return True
+
+    def get_table_structure(self, table_name: str):
+        """获取表结构（安全参数化查询）"""
+        sql = """
+        SELECT
+            COLUMN_NAME,
+            DATA_TYPE,
+            CHARACTER_MAXIMUM_LENGTH,
+            IS_NULLABLE,
+            COLUMN_DEFAULT
+        FROM INFORMATION_SCHEMA.COLUMNS
+        WHERE TABLE_NAME = :table_name
+        ORDER BY ORDINAL_POSITION
+        """
+        return self.execute_sql(sql, {"table_name": table_name})
+
+    def table_exists(self, table_name: str) -> bool:
+        """检查表是否存在（安全参数化查询）"""
+        sql = """
+        SELECT COUNT(*)
+        FROM INFORMATION_SCHEMA.TABLES
+        WHERE TABLE_NAME = :table_name
+        """
+        result = self.execute_sql(sql, {"table_name": table_name})
+        return result[0][0] > 0
+
+    def dispose(self):
+        """关闭连接池，释放资源"""
+        self.engine.dispose()
--- a/backend/app/core/executor.py
+++ b/backend/app/core/executor.py
@@ -0,0 +1,147 @@
+"""Config Executor for ERP AI Assistant.
+
+This module provides the ConfigExecutor class for validating and executing
+SQL configuration statements with safety checks.
+"""
+
+import re
+from typing import List, Tuple, Dict, Any
+
+from loguru import logger
+
+from app.core.db_engine import DatabaseEngine
+
+
+class ConfigExecutor:
+    """Executor for SQL configuration statements with safety validation.
+
+    This class validates SQL statements against dangerous operations before
+    execution and provides transaction-based execution with rollback support.
+    """
+
+    # Dangerous SQL keywords that should be blocked
+    DANGEROUS_KEYWORDS = [
+        r"DROP\s+DATABASE",
+        r"DROP\s+TABLE",
+        r"TRUNCATE\s+TABLE",
+        r"DELETE\s+FROM",
+        r"UPDATE\s+.*\s+SET",
+        r"ALTER\s+TABLE\s+.*\s+DROP"
+    ]
+
+    def __init__(self) -> None:
+        """Initialize executor with database engine."""
+        self.db_engine = DatabaseEngine()
+        logger.info("ConfigExecutor initialized")
+
+    def validate_sql(self, sql: str) -> Tuple[bool, str]:
+        """Validate SQL statement for safety.
+
+        Checks SQL against a list of dangerous keywords/patterns to prevent
+        destructive operations.
+
+        Args:
+            sql: SQL statement to validate
+
+        Returns:
+            Tuple of (is_valid, message) where is_valid indicates if SQL is safe
+        """
+        if not sql or not sql.strip():
+            return False, "SQL语句为空"
+
+        sql_upper = sql.upper().strip()
+
+        # Check for dangerous operations
+        for pattern in self.DANGEROUS_KEYWORDS:
+            if re.search(pattern, sql_upper):
+                # Extract matched keyword for error message
+                match = re.search(pattern, sql_upper)
+                matched_keyword = match.group(0) if match else pattern
+                logger.warning(f"SQL validation failed: dangerous operation '{matched_keyword}' detected")
+                return False, f"危险操作被拦截: {matched_keyword}"
+
+        logger.debug(f"SQL validation passed: {sql[:50]}...")
+        return True, "SQL验证通过"
+
+    def execute_config(
+        self,
+        sql_list: List[str],
+        session_id: str
+    ) -> Dict[str, Any]:
+        """Execute a list of SQL statements in a transaction.
+
+        Validates all SQL statements before execution. If any validation fails,
+        no statements are executed.
+
+        Args:
+            sql_list: List of SQL statements to execute
+            session_id: Session ID for logging and tracking
+
+        Returns:
+            Dictionary containing:
+                - success: Boolean indicating overall success
+                - executed: List of executed SQL statements
+                - failed: Error message if execution failed, None otherwise
+                - message: Human-readable result message
+        """
+        logger.info(f"[{session_id}] Starting config execution with {len(sql_list)} SQL statements")
+
+        results: Dict[str, Any] = {
+            "success": True,
+            "executed": [],
+            "failed": None,
+            "message": ""
+        }
+
+        try:
+            # Step 1: Validate all SQL statements
+            logger.debug(f"[{session_id}] Validating {len(sql_list)} SQL statements")
+            for i, sql in enumerate(sql_list):
+                is_valid, msg = self.validate_sql(sql)
+                if not is_valid:
+                    error_msg = f"SQL #{i+1} 验证失败: {msg}"
+                    logger.error(f"[{session_id}] {error_msg}")
+                    raise ValueError(error_msg)
+
+            # Step 2: Execute transaction
+            logger.debug(f"[{session_id}] Executing transaction")
+            self.db_engine.execute_transaction(sql_list)
+
+            # Step 3: Record success
+            results["executed"] = sql_list
+            results["message"] = f"成功执行 {len(sql_list)} 条SQL"
+            logger.success(f"[{session_id}] {results['message']}")
+
+        except ValueError as e:
+            # Validation failure
+            results["success"] = False
+            results["failed"] = str(e)
+            results["message"] = f"执行失败: {e}"
+            logger.error(f"[{session_id}] {results['message']}")
+
+        except Exception as e:
+            # Execution failure
+            results["success"] = False
+            results["failed"] = str(e)
+            results["message"] = f"执行失败: {e}"
+            logger.error(f"[{session_id}] {results['message']}")
+
+        return results
+
+    def rollback(self, session_id: str) -> Dict[str, Any]:
+        """Rollback executed operations for a session.
+
+        This is a placeholder for rollback functionality. Actual implementation
+        would require recording inverse SQL operations during execution.
+
+        Args:
+            session_id: Session ID to rollback
+
+        Returns:
+            Dictionary with success status and message
+        """
+        logger.warning(f"[{session_id}] Rollback requested but not yet implemented")
+        return {
+            "success": False,
+            "message": "回滚功能待实现"
+        }
--- a/backend/app/core/prompts.py
+++ b/backend/app/core/prompts.py
@@ -0,0 +1,144 @@
+"""
+Prompt 模板定义
+
+模板说明:
+- SYSTEM_PROMPT: 系统提示词，定义 Claude 的角色和专业领域
+- ANALYZE_PROMPT_TEMPLATE: 需求解析模板，占位符：user_input, knowledge_context, existing_tables
+- GENERATE_PROMPT_TEMPLATE: 配置生成模板，占位符：requirements, platform_rules, similar_cases
+"""
+
+SYSTEM_PROMPT = """你是一个 ERP 平台配置专家助手，专门帮助开发人员配置一零软件结构化开发平台。
+
+## 你的职责
+
+你是 ERP 系统配置和开发的专业顾问，负责：
+1. 分析用户需求，理解业务场景
+2. 设计合理的数据库表结构
+3. 生成符合平台规范的配置方案
+4. 提供完整的 SQL 脚本和配置说明
+
+## 平台知识
+
+你熟悉以下平台概念：
+- 窗体类型：基础资料、单据、报表、系统设置等
+- 标准字段命名规范：F 开头的主键、FPrefix 前缀的自定义字段
+- 配置流程：需求分析 → 表结构设计 → 功能配置 → 页面配置 → 菜单配置
+- 命名约定：表名以 T_开头，功能号以功能类别前缀开头
+
+## 输出要求
+
+1. 提供完整的 SQL 脚本，包括建表语句、函数配置、页面配置等
+2. 确保配置符合平台规范和最佳实践
+3. 进行风险评估，提示潜在问题
+4. 使用 JSON 格式输出结构化结果
+5. 所有字段和表名使用英文，注释使用中文
+
+请始终保持专业、严谨的工作态度，确保输出的配置方案可落地执行。"""
+
+
+ANALYZE_PROMPT_TEMPLATE = """请分析以下用户需求，生成结构化的需求分析文档。
+
+## 用户输入
+{user_input}
+
+## 相关知识上下文
+{knowledge_context}
+
+## 现有表结构
+{existing_tables}
+
+## 分析要求
+
+请输出结构化的需求分析文档，使用 JSON 格式，包含以下字段：
+
+# Note: Use {{ and }} to escape braces for .format() - rendered as literal { and }
+```json
+{{
+    "功能名称": "功能的中文名称",
+    "功能号建议": "建议的功能编号，如 SAL001",
+    "窗体类型": "基础资料/单据/报表/系统设置",
+    "主表名建议": "建议的主表名，如 T_SAL_Order",
+    "从表名建议": "建议的从表名，如 T_SAL_OrderEntry",
+    "主表字段": [
+        {{"字段名": "FOrderId", "字段类型": "varchar(50)", "中文名称": "订单编号", "必填": true}},
+        ...
+    ],
+    "从表字段": [
+        {{"字段名": "FEntryId", "字段类型": "int", "中文名称": "分录 ID", "必填": true}},
+        ...
+    ],
+    "业务需求": "详细的业务需求描述",
+    "关联表": ["相关表 1", "相关表 2"],
+    "风险提示": ["潜在风险 1", "潜在风险 2"]
+}}
+```
+
+## 注意事项
+
+1. 字段命名遵循平台规范：主键以 F 开头，使用 PascalCase
+2. 表名以 T_开头，使用模块前缀
+3. 考虑必填字段、默认值、数据长度等约束
+4. 识别必要的业务关联关系
+5. 评估潜在的数据一致性和性能风险"""
+
+
+GENERATE_PROMPT_TEMPLATE = """请根据需求分析结果，生成完整的平台配置方案。
+
+## 需求分析结果
+{requirements}
+
+## 平台规则
+{platform_rules}
+
+## 类似案例参考
+{similar_cases}
+
+## 生成要求
+
+请生成完整的配置方案，使用 JSON 格式，包含以下内容：
+
+# Note: Use {{ and }} to escape braces for .format() - rendered as literal { and }
+```json
+{{
+    "table_sql": "建表 SQL 语句，包括主表和从表",
+    "function_config_sql": "功能配置 SQL 语句",
+    "page_config_sql": "页面配置 SQL 语句",
+    "menu_config_sql": "菜单配置 SQL 语句",
+    "ikey_config_sql": "IKEY 配置 SQL 语句",
+    "config_summary": {{
+        "created_tables": ["表 1", "表 2"],
+        "main_entities": ["主要实体 1", "主要实体 2"],
+        "relationships": "表间关系说明"
+    }},
+    "implementation_notes": "实施注意事项",
+    "validation_rules": ["验证规则 1", "验证规则 2"]
+}}
+```
+
+## 配置规范
+
+1. **建表 SQL**:
+   - 主键使用 FId 或 F+ 表名缩写 + Id
+   - 包含创建时间、创建人、更新时间、更新人等审计字段
+   - 使用合适的索引提高查询性能
+
+2. **功能配置**:
+   - 定义功能号、功能名称、功能类型
+   - 配置数据权限和操作权限
+
+3. **页面配置**:
+   - 配置表单布局、字段顺序
+   - 设置字段属性（必填、只读、可见性）
+
+4. **菜单配置**:
+   - 配置菜单层级、图标、排序
+
+5. **IKEY 配置**:
+   - 配置编码规则、生成策略
+
+## 注意事项
+
+- 所有 SQL 语句需要语法正确、可直接执行
+- 配置需要符合平台规范
+- 考虑扩展性和维护性
+- 提供必要的注释说明"""
--- a/backend/app/core/rag_engine.py
+++ b/backend/app/core/rag_engine.py
@@ -0,0 +1,251 @@
+"""RAG Engine for ERP AI Assistant.
+
+This module provides the RAGEngine class that handles knowledge document
+storage and retrieval using ChromaDB and sentence-transformers embeddings.
+"""
+
+from typing import Optional
+
+import chromadb
+from chromadb.config import Settings as ChromaSettings
+from sentence_transformers import SentenceTransformer
+from loguru import logger
+
+from app.config import get_settings
+
+
+class RAGEngine:
+    """RAG Engine for knowledge document retrieval.
+
+    This class wraps ChromaDB vector database and sentence-transformers
+    to provide semantic search over knowledge documents.
+    """
+
+    # Class-level singleton for embedding model (lazy loading)
+    _embedding_model: Optional[SentenceTransformer] = None
+
+    def __init__(self) -> None:
+        """Initialize RAG engine with ChromaDB and embedding model."""
+        settings = get_settings()
+
+        # Initialize ChromaDB persistent client
+        logger.info(f"Initializing ChromaDB at: {settings.CHROMA_DB_PATH}")
+        self.chroma_client = chromadb.PersistentClient(
+            path=settings.CHROMA_DB_PATH,
+            settings=ChromaSettings(anonymized_telemetry=False)
+        )
+
+        # Load sentence-transformers embedding model (lazy loading, singleton)
+        logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL}")
+        self.embedding_model = self._get_embedding_model(settings.EMBEDDING_MODEL)
+
+        # Get or create documents collection
+        self.documents_collection = self.chroma_client.get_or_create_collection(
+            name="documents"
+        )
+
+        # Store chunking settings
+        self.chunk_size = settings.CHUNK_SIZE
+        self.chunk_overlap = settings.CHUNK_OVERLAP
+
+        logger.info(
+            f"RAG Engine initialized: chunk_size={self.chunk_size}, "
+            f"chunk_overlap={self.chunk_overlap}"
+        )
+
+    @classmethod
+    def _get_embedding_model(cls, model_name: str) -> SentenceTransformer:
+        """Get or create the embedding model (lazy loading, singleton).
+
+        Args:
+            model_name: Name of the embedding model to load
+
+        Returns:
+            SentenceTransformer embedding model instance
+        """
+        if cls._embedding_model is None:
+            logger.info(f"Loading embedding model: {model_name}")
+            cls._embedding_model = SentenceTransformer(model_name)
+        return cls._embedding_model
+
+    def _split_text(self, text: str) -> list[str]:
+        """Split text into overlapping chunks.
+
+        Args:
+            text: The text to split
+
+        Returns:
+            List of chunk strings
+        """
+        if not text:
+            return []
+
+        chunks = []
+        start = 0
+        text_length = len(text)
+
+        while start < text_length:
+            end = start + self.chunk_size
+            chunk = text[start:end]
+
+            if chunk.strip():  # Only add non-empty chunks
+                chunks.append(chunk)
+
+            start += self.chunk_size - self.chunk_overlap
+
+            # Avoid infinite loop if overlap >= chunk_size
+            if self.chunk_overlap >= self.chunk_size:
+                start += 1
+
+        return chunks
+
+    def _delete_chunks_for_doc(self, doc_id: str) -> None:
+        """Delete all chunks associated with a document.
+
+        Args:
+            doc_id: The document ID to delete chunks for
+        """
+        try:
+            # Find all chunks for this document
+            results = self.documents_collection.get(
+                where={"doc_id": doc_id},
+                include=[]
+            )
+            if results and results.get("ids"):
+                self.documents_collection.delete(ids=results["ids"])
+                logger.debug(f"Deleted {len(results['ids'])} chunks for document '{doc_id}'")
+        except Exception as e:
+            logger.warning(f"Failed to delete chunks for document '{doc_id}': {e}")
+
+    def add_document(
+        self,
+        doc_id: str,
+        content: str,
+        metadata: Optional[dict] = None
+    ) -> int:
+        """Add a document to the knowledge base.
+
+        Args:
+            doc_id: Unique identifier for the document
+            content: The document content to index
+            metadata: Optional metadata dict to store with the document
+
+        Returns:
+            Number of chunks added
+
+        Raises:
+            ValueError: If content is empty
+        """
+        if not content or not content.strip():
+            raise ValueError("Cannot add empty document")
+
+        try:
+            # Delete existing chunks for this doc_id (handles duplicates)
+            self._delete_chunks_for_doc(doc_id)
+
+            # Split content into chunks
+            chunks = self._split_text(content)
+            logger.info(f"Split document '{doc_id}' into {len(chunks)} chunks")
+
+            if not chunks:
+                return 0
+
+            # Generate embeddings for all chunks
+            logger.debug(f"Generating embeddings for {len(chunks)} chunks")
+            embeddings = self.embedding_model.encode(chunks)
+
+            # Prepare chunk IDs and metadata
+            chunk_ids = [f"{doc_id}_chunk_{i}" for i in range(len(chunks))]
+
+            # Add metadata to each chunk
+            chunk_metadata = []
+            base_metadata = metadata or {}
+            for i, chunk in enumerate(chunks):
+                meta = {
+                    **base_metadata,
+                    "doc_id": doc_id,
+                    "chunk_index": i,
+                    "total_chunks": len(chunks)
+                }
+                chunk_metadata.append(meta)
+
+            # Add to ChromaDB
+            self.documents_collection.add(
+                ids=chunk_ids,
+                embeddings=embeddings.tolist(),
+                documents=chunks,
+                metadatas=chunk_metadata
+            )
+
+            logger.info(f"Added {len(chunks)} chunks for document '{doc_id}'")
+            return len(chunks)
+
+        except Exception as e:
+            logger.error(f"Failed to add document '{doc_id}': {e}")
+            raise
+
+    def search(self, query: str, top_k: int = 3) -> list[dict]:
+        """Search for relevant document chunks.
+
+        Args:
+            query: The search query
+            top_k: Number of results to return (default: 3, max: 100)
+
+        Returns:
+            List of dicts with 'content', 'metadata', and 'distance'
+
+        Raises:
+            ValueError: If top_k exceeds maximum limit
+        """
+        # Validate top_k
+        if top_k > 100:
+            raise ValueError(f"top_k cannot exceed 100 (got: {top_k})")
+
+        if not query or not query.strip():
+            logger.warning("Empty search query provided")
+            return []
+
+        try:
+            # Generate embedding for query
+            logger.debug(f"Generating embedding for query: {query[:50]}...")
+            query_embedding = self.embedding_model.encode([query])
+
+            # Query ChromaDB
+            results = self.documents_collection.query(
+                query_embeddings=query_embedding.tolist(),
+                n_results=top_k,
+                include=["documents", "metadatas", "distances"]
+            )
+
+            # Format results
+            formatted_results = []
+
+            if results and results.get("documents"):
+                documents = results["documents"][0]
+                metadatas = results["metadatas"][0] if results.get("metadatas") else []
+                distances = results["distances"][0] if results.get("distances") else []
+
+                for i, content in enumerate(documents):
+                    formatted_results.append({
+                        "content": content,
+                        "metadata": metadatas[i] if i < len(metadatas) else {},
+                        "distance": distances[i] if i < len(distances) else None
+                    })
+
+            logger.info(f"Found {len(formatted_results)} results for query")
+            return formatted_results
+
+        except Exception as e:
+            logger.error(f"Search failed: {e}")
+            raise
+
+    def close(self) -> None:
+        """Release resources and cleanup the RAG engine.
+
+        This method should be called when the engine is no longer needed
+        to free up memory and other resources.
+        """
+        logger.info("Closing RAG engine and releasing resources")
+        self.embedding_model = None
+        self.documents_collection = None
+        self.chroma_client = None