feat: implement ERP AI Assistant Phase 1

Backend (FastAPI + SQLAlchemy + Claude API + RAG):
- Config management with Pydantic v2
- Database engine with connection pooling and SQL injection prevention
- AI engine with Claude API integration (support custom base URL)
- RAG engine with ChromaDB and sentence-transformers
- Requirement analysis service
- Config generation service
- Executor engine with SQL validation
- REST API endpoints: /analyze, /generate, /execute

Frontend (Vue 3 + Element Plus + Pinia):
- Complete 3-step workflow: analyze → generate → execute
- Step indicator with progress visualization
- Analysis result display with field table
- SQL preview with monospace font
- Execute confirmation dialog with safety warning
- Execution result display
- State management with Pinia
- API service integration

Security:
- SQL injection prevention with parameterized queries
- Dangerous SQL operation blocking
- Database password URL encoding
- Transaction auto-rollback
- Pydantic config validation

Features:
- Natural language requirement analysis
- Automated SQL configuration generation
- Safe execution with human review
- LAN access support
- Custom Claude API endpoint support

Documentation:
- README with quick start guide
- Quick start guide
- LAN access configuration
- Dependency fixes guide
- Claude API configuration
- Git operation guide
- Implementation report

Dependencies fixed:
- numpy<2.0.0 for chromadb compatibility
- sentence-transformers==2.7.0 for huggingface_hub compatibility

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-21 14:23:20 +00:00
commit acd73431ae
60 changed files with 11284 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Core modules"""

View File

@@ -0,0 +1,120 @@
"""AI Engine for ERP AI Assistant.
This module provides the ClaudeEngine class that wraps Claude API calls
and provides JSON parsing utilities.
"""
import json
import re
from typing import Any
import anthropic
from loguru import logger
from app.config import get_settings
class ClaudeEngine:
"""Engine for interacting with Claude API.
This class wraps the Anthropic Claude API client and provides
utilities for parsing JSON responses from Claude.
"""
def __init__(self) -> None:
"""Initialize Claude engine with settings."""
settings = get_settings()
# Initialize Anthropic client with optional custom base_url
client_kwargs = {"api_key": settings.ANTHROPIC_API_KEY}
if settings.ANTHROPIC_BASE_URL:
client_kwargs["base_url"] = settings.ANTHROPIC_BASE_URL
logger.info(f"Using custom Anthropic base URL: {settings.ANTHROPIC_BASE_URL}")
self.client = anthropic.AsyncAnthropic(**client_kwargs)
self.model = settings.CLAUDE_MODEL
self.max_tokens = settings.CLAUDE_MAX_TOKENS
self.temperature = settings.CLAUDE_TEMPERATURE
def parse_json_response(self, content: str) -> dict[str, Any]:
"""Parse JSON from Claude responses.
Attempts multiple parsing strategies:
1. Direct JSON parse
2. Extract from markdown code blocks
3. Extract any {...} block
Args:
content: The response content from Claude
Returns:
Parsed JSON as a dictionary
Raises:
ValueError: If JSON cannot be parsed using any strategy
"""
if not content or not content.strip():
raise ValueError("Empty content provided")
# Strategy 1: Try direct JSON parse
try:
return json.loads(content)
except json.JSONDecodeError:
pass
# Strategy 2: Try extracting from markdown code blocks
json_code_block_pattern = r'```json\s*(\{.*?\})\s*```'
json_match = re.search(json_code_block_pattern, content, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group(1))
except json.JSONDecodeError:
pass
# Also try any code block (not just json tagged)
code_block_pattern = r'```\s*(\{.*?\})\s*```'
code_block_match = re.search(code_block_pattern, content, re.DOTALL)
if code_block_match:
try:
return json.loads(code_block_match.group(1))
except json.JSONDecodeError:
pass
# Strategy 3: Try extracting any {...} block
# Find balanced braces
brace_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
json_blocks = re.findall(brace_pattern, content, re.DOTALL)
for json_block in json_blocks:
try:
return json.loads(json_block)
except json.JSONDecodeError:
continue
# All strategies failed
logger.error(f"无法解析 Claude 返回的 JSON: {content[:200]}")
raise ValueError("无法解析 Claude 返回的 JSON请检查响应格式")
async def call_claude(
self,
messages: list[dict[str, str]],
temperature: float | None = None
) -> str:
"""Call Claude API.
Args:
messages: List of message dictionaries with 'role' and 'content'
temperature: Optional temperature override (0-2)
Returns:
The text content from Claude's response
Raises:
Exception: If the API call fails
"""
response = await self.client.messages.create(
model=self.model,
max_tokens=self.max_tokens,
temperature=temperature if temperature is not None else self.temperature,
messages=messages
)
return response.content[0].text

View File

@@ -0,0 +1,78 @@
from typing import Optional
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from contextlib import contextmanager
from loguru import logger
from app.config import get_settings
class DatabaseEngine:
"""数据库操作引擎"""
def __init__(self):
settings = get_settings()
self.engine = create_engine(
settings.DATABASE_URL,
pool_size=20,
max_overflow=10,
pool_pre_ping=True,
echo=settings.DEBUG
)
self.Session = sessionmaker(bind=self.engine)
@contextmanager
def get_session(self):
"""获取数据库会话(上下文管理器)"""
session = self.Session()
try:
yield session
session.commit()
except Exception as e:
session.rollback()
logger.error(f"数据库操作失败:{e}")
raise
finally:
session.close()
def execute_sql(self, sql: str, params: Optional[dict] = None) -> list:
"""执行单条 SQL"""
with self.get_session() as session:
result = session.execute(text(sql), params or {})
return result.fetchall()
def execute_transaction(self, sql_list: list, params_list: Optional[list] = None) -> bool:
"""执行事务(多条 SQL"""
params_list = params_list or [None] * len(sql_list)
with self.get_session() as session:
for sql, params in zip(sql_list, params_list):
session.execute(text(sql), params or {})
return True
def get_table_structure(self, table_name: str):
"""获取表结构(安全参数化查询)"""
sql = """
SELECT
COLUMN_NAME,
DATA_TYPE,
CHARACTER_MAXIMUM_LENGTH,
IS_NULLABLE,
COLUMN_DEFAULT
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = :table_name
ORDER BY ORDINAL_POSITION
"""
return self.execute_sql(sql, {"table_name": table_name})
def table_exists(self, table_name: str) -> bool:
"""检查表是否存在(安全参数化查询)"""
sql = """
SELECT COUNT(*)
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_NAME = :table_name
"""
result = self.execute_sql(sql, {"table_name": table_name})
return result[0][0] > 0
def dispose(self):
"""关闭连接池,释放资源"""
self.engine.dispose()

View File

@@ -0,0 +1,147 @@
"""Config Executor for ERP AI Assistant.
This module provides the ConfigExecutor class for validating and executing
SQL configuration statements with safety checks.
"""
import re
from typing import List, Tuple, Dict, Any
from loguru import logger
from app.core.db_engine import DatabaseEngine
class ConfigExecutor:
"""Executor for SQL configuration statements with safety validation.
This class validates SQL statements against dangerous operations before
execution and provides transaction-based execution with rollback support.
"""
# Dangerous SQL keywords that should be blocked
DANGEROUS_KEYWORDS = [
r"DROP\s+DATABASE",
r"DROP\s+TABLE",
r"TRUNCATE\s+TABLE",
r"DELETE\s+FROM",
r"UPDATE\s+.*\s+SET",
r"ALTER\s+TABLE\s+.*\s+DROP"
]
def __init__(self) -> None:
"""Initialize executor with database engine."""
self.db_engine = DatabaseEngine()
logger.info("ConfigExecutor initialized")
def validate_sql(self, sql: str) -> Tuple[bool, str]:
"""Validate SQL statement for safety.
Checks SQL against a list of dangerous keywords/patterns to prevent
destructive operations.
Args:
sql: SQL statement to validate
Returns:
Tuple of (is_valid, message) where is_valid indicates if SQL is safe
"""
if not sql or not sql.strip():
return False, "SQL语句为空"
sql_upper = sql.upper().strip()
# Check for dangerous operations
for pattern in self.DANGEROUS_KEYWORDS:
if re.search(pattern, sql_upper):
# Extract matched keyword for error message
match = re.search(pattern, sql_upper)
matched_keyword = match.group(0) if match else pattern
logger.warning(f"SQL validation failed: dangerous operation '{matched_keyword}' detected")
return False, f"危险操作被拦截: {matched_keyword}"
logger.debug(f"SQL validation passed: {sql[:50]}...")
return True, "SQL验证通过"
def execute_config(
self,
sql_list: List[str],
session_id: str
) -> Dict[str, Any]:
"""Execute a list of SQL statements in a transaction.
Validates all SQL statements before execution. If any validation fails,
no statements are executed.
Args:
sql_list: List of SQL statements to execute
session_id: Session ID for logging and tracking
Returns:
Dictionary containing:
- success: Boolean indicating overall success
- executed: List of executed SQL statements
- failed: Error message if execution failed, None otherwise
- message: Human-readable result message
"""
logger.info(f"[{session_id}] Starting config execution with {len(sql_list)} SQL statements")
results: Dict[str, Any] = {
"success": True,
"executed": [],
"failed": None,
"message": ""
}
try:
# Step 1: Validate all SQL statements
logger.debug(f"[{session_id}] Validating {len(sql_list)} SQL statements")
for i, sql in enumerate(sql_list):
is_valid, msg = self.validate_sql(sql)
if not is_valid:
error_msg = f"SQL #{i+1} 验证失败: {msg}"
logger.error(f"[{session_id}] {error_msg}")
raise ValueError(error_msg)
# Step 2: Execute transaction
logger.debug(f"[{session_id}] Executing transaction")
self.db_engine.execute_transaction(sql_list)
# Step 3: Record success
results["executed"] = sql_list
results["message"] = f"成功执行 {len(sql_list)} 条SQL"
logger.success(f"[{session_id}] {results['message']}")
except ValueError as e:
# Validation failure
results["success"] = False
results["failed"] = str(e)
results["message"] = f"执行失败: {e}"
logger.error(f"[{session_id}] {results['message']}")
except Exception as e:
# Execution failure
results["success"] = False
results["failed"] = str(e)
results["message"] = f"执行失败: {e}"
logger.error(f"[{session_id}] {results['message']}")
return results
def rollback(self, session_id: str) -> Dict[str, Any]:
"""Rollback executed operations for a session.
This is a placeholder for rollback functionality. Actual implementation
would require recording inverse SQL operations during execution.
Args:
session_id: Session ID to rollback
Returns:
Dictionary with success status and message
"""
logger.warning(f"[{session_id}] Rollback requested but not yet implemented")
return {
"success": False,
"message": "回滚功能待实现"
}

144
backend/app/core/prompts.py Normal file
View File

@@ -0,0 +1,144 @@
"""
Prompt 模板定义
模板说明:
- SYSTEM_PROMPT: 系统提示词,定义 Claude 的角色和专业领域
- ANALYZE_PROMPT_TEMPLATE: 需求解析模板占位符user_input, knowledge_context, existing_tables
- GENERATE_PROMPT_TEMPLATE: 配置生成模板占位符requirements, platform_rules, similar_cases
"""
SYSTEM_PROMPT = """你是一个 ERP 平台配置专家助手,专门帮助开发人员配置一零软件结构化开发平台。
## 你的职责
你是 ERP 系统配置和开发的专业顾问,负责:
1. 分析用户需求,理解业务场景
2. 设计合理的数据库表结构
3. 生成符合平台规范的配置方案
4. 提供完整的 SQL 脚本和配置说明
## 平台知识
你熟悉以下平台概念:
- 窗体类型:基础资料、单据、报表、系统设置等
- 标准字段命名规范F 开头的主键、FPrefix 前缀的自定义字段
- 配置流程:需求分析 → 表结构设计 → 功能配置 → 页面配置 → 菜单配置
- 命名约定:表名以 T_开头功能号以功能类别前缀开头
## 输出要求
1. 提供完整的 SQL 脚本,包括建表语句、函数配置、页面配置等
2. 确保配置符合平台规范和最佳实践
3. 进行风险评估,提示潜在问题
4. 使用 JSON 格式输出结构化结果
5. 所有字段和表名使用英文,注释使用中文
请始终保持专业、严谨的工作态度,确保输出的配置方案可落地执行。"""
ANALYZE_PROMPT_TEMPLATE = """请分析以下用户需求,生成结构化的需求分析文档。
## 用户输入
{user_input}
## 相关知识上下文
{knowledge_context}
## 现有表结构
{existing_tables}
## 分析要求
请输出结构化的需求分析文档,使用 JSON 格式,包含以下字段:
# Note: Use {{ and }} to escape braces for .format() - rendered as literal { and }
```json
{{
"功能名称": "功能的中文名称",
"功能号建议": "建议的功能编号,如 SAL001",
"窗体类型": "基础资料/单据/报表/系统设置",
"主表名建议": "建议的主表名,如 T_SAL_Order",
"从表名建议": "建议的从表名,如 T_SAL_OrderEntry",
"主表字段": [
{{"字段名": "FOrderId", "字段类型": "varchar(50)", "中文名称": "订单编号", "必填": true}},
...
],
"从表字段": [
{{"字段名": "FEntryId", "字段类型": "int", "中文名称": "分录 ID", "必填": true}},
...
],
"业务需求": "详细的业务需求描述",
"关联表": ["相关表 1", "相关表 2"],
"风险提示": ["潜在风险 1", "潜在风险 2"]
}}
```
## 注意事项
1. 字段命名遵循平台规范:主键以 F 开头,使用 PascalCase
2. 表名以 T_开头使用模块前缀
3. 考虑必填字段、默认值、数据长度等约束
4. 识别必要的业务关联关系
5. 评估潜在的数据一致性和性能风险"""
GENERATE_PROMPT_TEMPLATE = """请根据需求分析结果,生成完整的平台配置方案。
## 需求分析结果
{requirements}
## 平台规则
{platform_rules}
## 类似案例参考
{similar_cases}
## 生成要求
请生成完整的配置方案,使用 JSON 格式,包含以下内容:
# Note: Use {{ and }} to escape braces for .format() - rendered as literal { and }
```json
{{
"table_sql": "建表 SQL 语句,包括主表和从表",
"function_config_sql": "功能配置 SQL 语句",
"page_config_sql": "页面配置 SQL 语句",
"menu_config_sql": "菜单配置 SQL 语句",
"ikey_config_sql": "IKEY 配置 SQL 语句",
"config_summary": {{
"created_tables": ["表 1", "表 2"],
"main_entities": ["主要实体 1", "主要实体 2"],
"relationships": "表间关系说明"
}},
"implementation_notes": "实施注意事项",
"validation_rules": ["验证规则 1", "验证规则 2"]
}}
```
## 配置规范
1. **建表 SQL**:
- 主键使用 FId 或 F+ 表名缩写 + Id
- 包含创建时间、创建人、更新时间、更新人等审计字段
- 使用合适的索引提高查询性能
2. **功能配置**:
- 定义功能号、功能名称、功能类型
- 配置数据权限和操作权限
3. **页面配置**:
- 配置表单布局、字段顺序
- 设置字段属性(必填、只读、可见性)
4. **菜单配置**:
- 配置菜单层级、图标、排序
5. **IKEY 配置**:
- 配置编码规则、生成策略
## 注意事项
- 所有 SQL 语句需要语法正确、可直接执行
- 配置需要符合平台规范
- 考虑扩展性和维护性
- 提供必要的注释说明"""

View File

@@ -0,0 +1,251 @@
"""RAG Engine for ERP AI Assistant.
This module provides the RAGEngine class that handles knowledge document
storage and retrieval using ChromaDB and sentence-transformers embeddings.
"""
from typing import Optional
import chromadb
from chromadb.config import Settings as ChromaSettings
from sentence_transformers import SentenceTransformer
from loguru import logger
from app.config import get_settings
class RAGEngine:
"""RAG Engine for knowledge document retrieval.
This class wraps ChromaDB vector database and sentence-transformers
to provide semantic search over knowledge documents.
"""
# Class-level singleton for embedding model (lazy loading)
_embedding_model: Optional[SentenceTransformer] = None
def __init__(self) -> None:
"""Initialize RAG engine with ChromaDB and embedding model."""
settings = get_settings()
# Initialize ChromaDB persistent client
logger.info(f"Initializing ChromaDB at: {settings.CHROMA_DB_PATH}")
self.chroma_client = chromadb.PersistentClient(
path=settings.CHROMA_DB_PATH,
settings=ChromaSettings(anonymized_telemetry=False)
)
# Load sentence-transformers embedding model (lazy loading, singleton)
logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL}")
self.embedding_model = self._get_embedding_model(settings.EMBEDDING_MODEL)
# Get or create documents collection
self.documents_collection = self.chroma_client.get_or_create_collection(
name="documents"
)
# Store chunking settings
self.chunk_size = settings.CHUNK_SIZE
self.chunk_overlap = settings.CHUNK_OVERLAP
logger.info(
f"RAG Engine initialized: chunk_size={self.chunk_size}, "
f"chunk_overlap={self.chunk_overlap}"
)
@classmethod
def _get_embedding_model(cls, model_name: str) -> SentenceTransformer:
"""Get or create the embedding model (lazy loading, singleton).
Args:
model_name: Name of the embedding model to load
Returns:
SentenceTransformer embedding model instance
"""
if cls._embedding_model is None:
logger.info(f"Loading embedding model: {model_name}")
cls._embedding_model = SentenceTransformer(model_name)
return cls._embedding_model
def _split_text(self, text: str) -> list[str]:
"""Split text into overlapping chunks.
Args:
text: The text to split
Returns:
List of chunk strings
"""
if not text:
return []
chunks = []
start = 0
text_length = len(text)
while start < text_length:
end = start + self.chunk_size
chunk = text[start:end]
if chunk.strip(): # Only add non-empty chunks
chunks.append(chunk)
start += self.chunk_size - self.chunk_overlap
# Avoid infinite loop if overlap >= chunk_size
if self.chunk_overlap >= self.chunk_size:
start += 1
return chunks
def _delete_chunks_for_doc(self, doc_id: str) -> None:
"""Delete all chunks associated with a document.
Args:
doc_id: The document ID to delete chunks for
"""
try:
# Find all chunks for this document
results = self.documents_collection.get(
where={"doc_id": doc_id},
include=[]
)
if results and results.get("ids"):
self.documents_collection.delete(ids=results["ids"])
logger.debug(f"Deleted {len(results['ids'])} chunks for document '{doc_id}'")
except Exception as e:
logger.warning(f"Failed to delete chunks for document '{doc_id}': {e}")
def add_document(
self,
doc_id: str,
content: str,
metadata: Optional[dict] = None
) -> int:
"""Add a document to the knowledge base.
Args:
doc_id: Unique identifier for the document
content: The document content to index
metadata: Optional metadata dict to store with the document
Returns:
Number of chunks added
Raises:
ValueError: If content is empty
"""
if not content or not content.strip():
raise ValueError("Cannot add empty document")
try:
# Delete existing chunks for this doc_id (handles duplicates)
self._delete_chunks_for_doc(doc_id)
# Split content into chunks
chunks = self._split_text(content)
logger.info(f"Split document '{doc_id}' into {len(chunks)} chunks")
if not chunks:
return 0
# Generate embeddings for all chunks
logger.debug(f"Generating embeddings for {len(chunks)} chunks")
embeddings = self.embedding_model.encode(chunks)
# Prepare chunk IDs and metadata
chunk_ids = [f"{doc_id}_chunk_{i}" for i in range(len(chunks))]
# Add metadata to each chunk
chunk_metadata = []
base_metadata = metadata or {}
for i, chunk in enumerate(chunks):
meta = {
**base_metadata,
"doc_id": doc_id,
"chunk_index": i,
"total_chunks": len(chunks)
}
chunk_metadata.append(meta)
# Add to ChromaDB
self.documents_collection.add(
ids=chunk_ids,
embeddings=embeddings.tolist(),
documents=chunks,
metadatas=chunk_metadata
)
logger.info(f"Added {len(chunks)} chunks for document '{doc_id}'")
return len(chunks)
except Exception as e:
logger.error(f"Failed to add document '{doc_id}': {e}")
raise
def search(self, query: str, top_k: int = 3) -> list[dict]:
"""Search for relevant document chunks.
Args:
query: The search query
top_k: Number of results to return (default: 3, max: 100)
Returns:
List of dicts with 'content', 'metadata', and 'distance'
Raises:
ValueError: If top_k exceeds maximum limit
"""
# Validate top_k
if top_k > 100:
raise ValueError(f"top_k cannot exceed 100 (got: {top_k})")
if not query or not query.strip():
logger.warning("Empty search query provided")
return []
try:
# Generate embedding for query
logger.debug(f"Generating embedding for query: {query[:50]}...")
query_embedding = self.embedding_model.encode([query])
# Query ChromaDB
results = self.documents_collection.query(
query_embeddings=query_embedding.tolist(),
n_results=top_k,
include=["documents", "metadatas", "distances"]
)
# Format results
formatted_results = []
if results and results.get("documents"):
documents = results["documents"][0]
metadatas = results["metadatas"][0] if results.get("metadatas") else []
distances = results["distances"][0] if results.get("distances") else []
for i, content in enumerate(documents):
formatted_results.append({
"content": content,
"metadata": metadatas[i] if i < len(metadatas) else {},
"distance": distances[i] if i < len(distances) else None
})
logger.info(f"Found {len(formatted_results)} results for query")
return formatted_results
except Exception as e:
logger.error(f"Search failed: {e}")
raise
def close(self) -> None:
"""Release resources and cleanup the RAG engine.
This method should be called when the engine is no longer needed
to free up memory and other resources.
"""
logger.info("Closing RAG engine and releasing resources")
self.embedding_model = None
self.documents_collection = None
self.chroma_client = None