"""Tests for RAG Engine. This module tests the RAGEngine class for document indexing and retrieval. """ import pytest from unittest.mock import MagicMock, patch from app.core.rag_engine import RAGEngine def test_rag_engine_init(): """Test RAG engine initialization.""" engine = RAGEngine() assert engine.chroma_client is not None assert engine.documents_collection is not None assert engine.chunk_size > 0 assert engine.chunk_overlap >= 0 assert engine.chunk_overlap < engine.chunk_size def test_split_text_basic(): """Test basic text splitting functionality.""" engine = RAGEngine() # Test with text longer than chunk_size long_text = "A" * 1000 chunks = engine._split_text(long_text) assert len(chunks) > 0 assert all(len(chunk) <= engine.chunk_size for chunk in chunks) assert all(chunk.strip() for chunk in chunks) # No empty chunks def test_split_text_empty(): """Test splitting empty text.""" engine = RAGEngine() # Test with empty text assert engine._split_text("") == [] assert engine._split_text(" ") == [] def test_split_text_overlap(): """Test text splitting with overlap.""" engine = RAGEngine() # Test that chunks overlap correctly text = "A" * 600 chunks = engine._split_text(text) if len(chunks) > 1: # Check overlap exists between consecutive chunks # (This is a basic check; actual overlap content depends on implementation) assert len(chunks) > 1 def test_add_document_success(): """Test adding a document to the knowledge base.""" engine = RAGEngine() # Mock the collection's add method engine.documents_collection.add = MagicMock() doc_id = "test_doc_1" content = "This is a test document for the knowledge base." metadata = {"source": "test", "type": "sample"} num_chunks = engine.add_document(doc_id, content, metadata) assert num_chunks > 0 assert engine.documents_collection.add.called # Verify add was called with correct parameters call_args = engine.documents_collection.add.call_args assert "ids" in call_args.kwargs assert "embeddings" in call_args.kwargs assert "documents" in call_args.kwargs assert "metadatas" in call_args.kwargs def test_add_document_empty_content(): """Test that adding empty document raises ValueError.""" engine = RAGEngine() with pytest.raises(ValueError, match="Cannot add empty document"): engine.add_document("test_doc", "") with pytest.raises(ValueError, match="Cannot add empty document"): engine.add_document("test_doc", " ") def test_search_basic(): """Test basic search functionality.""" engine = RAGEngine() # Mock the collection's query method mock_results = { "documents": [["Result 1", "Result 2"]], "metadatas": [[{"doc_id": "doc1"}, {"doc_id": "doc2"}]], "distances": [[0.1, 0.2]] } engine.documents_collection.query = MagicMock(return_value=mock_results) results = engine.search("test query", top_k=2) assert len(results) == 2 assert results[0]["content"] == "Result 1" assert results[0]["metadata"]["doc_id"] == "doc1" assert results[0]["distance"] == 0.1 assert engine.documents_collection.query.called def test_search_empty_query(): """Test search with empty query returns empty results.""" engine = RAGEngine() results = engine.search("", top_k=3) assert results == [] results = engine.search(" ", top_k=3) assert results == [] def test_search_invalid_top_k(): """Test that search with invalid top_k raises ValueError.""" engine = RAGEngine() with pytest.raises(ValueError, match="top_k cannot exceed 100"): engine.search("test", top_k=101) def test_delete_chunks_for_doc(): """Test deleting chunks for a document.""" engine = RAGEngine() # Mock the get and delete methods engine.documents_collection.get = MagicMock(return_value={ "ids": ["doc1_chunk_0", "doc1_chunk_1"] }) engine.documents_collection.delete = MagicMock() engine._delete_chunks_for_doc("doc1") assert engine.documents_collection.get.called assert engine.documents_collection.delete.called def test_close(): """Test closing the RAG engine releases resources.""" engine = RAGEngine() engine.close() assert engine.embedding_model is None assert engine.documents_collection is None assert engine.chroma_client is None