Merge branch 'main' into add-groq-client-support

kunjanshah0811 · web-flow · commit 3bc5b74ff44e · 2025-06-13T12:35:23.000+02:00
diff --git a/docs/integrations/huggingface/QATestset.md b/docs/integrations/huggingface/QATestset.md
@@ -0,0 +1,53 @@
+# 📤 Push a QATestset to the Hugging Face Hub
+
+**Learn how to upload and manage your QATestset on the Hugging Face Hub using the `push_to_hf_hub` feature.**
+
+This tutorial will guide you through the steps to push a dataset to the Hugging Face Hub and load it back for reuse.
+
+## Install Required Dependencies
+
+Before you begin, ensure you have the necessary libraries installed. Run the following command to install the `datasets` and `huggingface_hub` packages:
+
+```bash
+pip install datasets huggingface_hub
+```
+
+## Authenticate with Hugging Face
+
+To enable access to your account, set your Hugging Face authentication token (`HF_TOKEN`). You can generate your token from your [Hugging Face account settings](https://huggingface.co/settings/tokens).
+
+## Push Your Dataset to the Hub
+
+Use the `push_to_hf_hub` method to upload your dataset to the Hugging Face Hub. Replace `<username>` with your Hugging Face username and `<dataset_name>` with the desired name for your dataset:
+
+This example demonstrates how to load a `QATestset` from the file `test_set.jsonl` and push it to the Hugging Face Hub:
+
+```python
+from giskard.rag.testset import QATestset
+test_set = QATestset.load("test_set.jsonl")
+test_set.push_to_hf_hub("<username>/<dataset_name>")
+```
+
+Once the dataset is successfully pushed, it will be available on your Hugging Face profile.
+
+## Load the Dataset from the Hub
+
+To reuse the dataset, you can load it back using the `load_from_hf_hub` method. This example demonstrates how to load the dataset and convert it to a pandas DataFrame for inspection:
+
+```python
+from giskard.rag.testset import QATestset
+dset = QATestset.load_from_hf_hub("<username>/<dataset_name>")
+dset.to_pandas().head()
+```
+
+Replace `<username>` and `<dataset_name>` with the appropriate values.
+
+## Benefits of Using the Hugging Face Hub
+
+By leveraging this integration, you can:
+
+- Seamlessly share datasets across projects and collaborators.
+- Reuse datasets without the need for manual file transfers.
+- Access datasets directly from the Hugging Face Hub for streamlined workflows.
+
+Start pushing your datasets today and take advantage of the collaborative power of the Hugging Face Hub!
diff --git a/docs/integrations/huggingface/index.md b/docs/integrations/huggingface/index.md
@@ -8,6 +8,7 @@
 :hidden:
 
 ./evaluator.md
+./QATestset.md
 
 ```
 
@@ -17,3 +18,8 @@
 :text-align: center
 :link: ./evaluator.md
 ::::
+
+::::{grid-item-card} <br/><h3>📤 Push a QATestset to the Hugging Face Hub</h3>
+:text-align: center
+:link: ./QATestset.md
+::::
diff --git a/docs/open_source/testset_generation/testset_generation/index.md b/docs/open_source/testset_generation/testset_generation/index.md
@@ -279,6 +279,8 @@ from giskard.rag import QATestset
 loaded_testset = QATestset.load("my_testset.jsonl")
 ```
 
+You can push your generated test set to the Hugging Face Hub or load an existing dataset from it using [`QATestset.push_to_hf_hub`](giskard.rag.QATestset.push_to_hf_hub) and [`QATestset.load_from_hf_hub`](giskard.rag.QATestset.load_from_hf_hub). This allows you to share and reuse datasets easily. For detailed instructions, refer to the [Hugging Face Integration Documentation](../../../integrations/huggingface/QATestset.md).
+
 You can also convert it to a pandas DataFrame, for quick inspection or further processing:
 
 ```py
diff --git a/giskard/core/core.py b/giskard/core/core.py
@@ -8,13 +8,13 @@
 from enum import Enum
 from pathlib import Path
 
-from griffe import Docstring
-from griffe.docstrings.dataclasses import (
+from griffe import (
+    Docstring,
     DocstringSection,
+    DocstringSectionKind,
     DocstringSectionParameters,
     DocstringSectionReturns,
 )
-from griffe.enumerations import DocstringSectionKind
 
 from ..utils.artifacts import serialize_parameter
 
@@ -26,7 +26,7 @@
 from typing import Any, Callable, Dict, List, Literal, Optional, Type, TypeVar, Union
 
 logger = logging.getLogger(__name__)
-DEMILITER = f"\n{'='*20}\n"
+DEMILITER = f"\n{'=' * 20}\n"
 
 
 class Kwargs:
diff --git a/giskard/llm/client/base.py b/giskard/llm/client/base.py
@@ -31,3 +31,8 @@ def complete(
         format=None,
     ) -> ChatMessage:
         ...
+
+    @abstractmethod
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        ...
diff --git a/giskard/llm/client/bedrock.py b/giskard/llm/client/bedrock.py
@@ -61,6 +61,10 @@ def complete(
 
         return self._parse_completion(completion, caller_id)
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {"client_type": self.__class__.__name__, "model": self.model}
+
 
 @deprecated("ClaudeBedrockClient is deprecated: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html")
 class ClaudeBedrockClient(BaseBedrockClient):
diff --git a/giskard/llm/client/gemini.py b/giskard/llm/client/gemini.py
@@ -57,6 +57,10 @@ def __init__(self, model: str = "gemini-pro", _client=None):
         self.model = model
         self._client = _client or genai.GenerativeModel(self.model)
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {"client_type": self.__class__.__name__, "model": self.model}
+
     def complete(
         self,
         messages: Sequence[ChatMessage],
diff --git a/giskard/llm/client/litellm.py b/giskard/llm/client/litellm.py
@@ -151,3 +151,12 @@ def complete(
                     continue
 
         return ChatMessage(role=response_message.role, content=response_message.content)
+
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {
+            "client_type": self.__class__.__name__,
+            "model": self.model,
+            "disable_structured_output": self.disable_structured_output,
+            "completion_params": self.completion_params,
+        }
diff --git a/giskard/llm/client/mistral.py b/giskard/llm/client/mistral.py
@@ -24,6 +24,10 @@ def __init__(self, model: str = "mistral-large-latest", client: Mistral = None):
         self.model = model
         self._client = client or Mistral(api_key=os.getenv("MISTRAL_API_KEY", ""))
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {"client_type": self.__class__.__name__, "model": self.model}
+
     def complete(
         self,
         messages: Sequence[ChatMessage],
diff --git a/giskard/llm/client/openai.py b/giskard/llm/client/openai.py
@@ -37,6 +37,14 @@ def __init__(
         self._client = client or openai.OpenAI()
         self.json_mode = json_mode if json_mode is not None else _supports_json_format(model)
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {
+            "client_type": self.__class__.__name__,
+            "model": self.model,
+            "json_mode": self.json_mode,
+        }
+
     def complete(
         self,
         messages: Sequence[ChatMessage],
diff --git a/giskard/rag/dataset_card_template.md b/giskard/rag/dataset_card_template.md
@@ -0,0 +1,46 @@
+---
+tags:
+- giskard
+- synthetic
+
+task_categories:
+- text-generation
+- text2text-generation
+---
+
+# Dataset Card for {repo_id}
+This dataset was created using the [giskard](https://github.com/Giskard-AI/giskard) library, an open-source Python framework designed to evaluate and test AI systems. Giskard helps identify performance, bias, and security issues in AI applications, supporting both LLM-based systems like RAG agents and traditional machine learning models for tabular data.
+
+This dataset is a QA (Question/Answer) dataset, containing {num_items} pairs.
+
+## Usage
+
+You can load this dataset using the following code:
+
+```python
+from giskard.rag.testset import QATestset
+test_set = QATestset.load_from_hub("{repo_id}")
+```
+
+Refer to the following tutorial to use it for evaluating your RAG engine: [RAG evaluation tutorial](https://docs.giskard.ai/en/stable/open_source/testset_generation/rag_evaluation/index.html).
+
+## Configuration
+
+The configuration relative to the dataset generation:
+
+```bash
+{config}
+```
+
+---
+
+<h2 style="text-align: center;">
+  <span style="display: inline-flex; align-items: center; gap: 8px;">
+    Built with 
+    <a href="https://giskard.ai" target="_blank" style="display: inline-flex;">
+      <img src="https://cdn.prod.website-files.com/601d6f7d0b9c984f07bf10bc/62983fa8ef716259c397a57d_logo.svg" 
+             alt="Giskard Logo" 
+             width="100">
+    </a>
+  </span>
+</h2>
diff --git a/giskard/rag/report.py b/giskard/rag/report.py
@@ -7,7 +7,6 @@
 import matplotlib
 import numpy as np
 import pandas as pd
-from IPython.core.display import HTML
 
 from ..llm.client.base import LLMClient
 from ..llm.embeddings.base import BaseEmbedding
@@ -135,6 +134,8 @@ def to_html(self, filename=None, embed=False):
             return
 
         if embed:
+            from IPython.core.display import HTML
+
             return HTML(f'<iframe srcdoc="{escape(html)}" width=100% height=800px></iframe>')
         return html
 
diff --git a/giskard/rag/testset.py b/giskard/rag/testset.py
@@ -1,14 +1,24 @@
-from typing import Any, Dict, Optional, Sequence
+from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence
 
 import json
+import logging
 from dataclasses import dataclass
+from pathlib import Path
 
 import pandas as pd
+from datasets import Dataset as HFDataset
+from datasets import load_dataset
+from huggingface_hub import DatasetCard
+
+if TYPE_CHECKING:
+    from huggingface_hub import CommitInfo
 
 from ..core.suite import Suite
 from ..datasets.base import Dataset
 from ..testing.tests.llm import test_llm_correctness
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class QuestionSample:
@@ -110,6 +120,78 @@ def load(cls, path):
         dataframe = pd.read_json(path, orient="records", lines=True)
         return cls.from_pandas(dataframe)
 
+    def push_to_hf_hub(
+        self,
+        repo_id: str,
+        token: str = None,
+        private: bool = False,
+        **kwargs: Any,
+    ) -> "CommitInfo":
+        """Push the QATestset to the Hugging Face Hub.
+
+        Parameters
+        ----------
+        repo_id : str
+            The repository ID on the Hugging Face Hub.
+        token : str, optional
+            Authentication token for private repositories. Defaults to None.
+        private : bool
+            Whether to create a private repository. Defaults to False.
+        **kwargs : Any
+            Additional arguments passed to Dataset.push_to_hub().
+
+        Returns
+        -------
+        CommitInfo
+            The commit information.
+        """
+
+        # Conversion to Dataset from the datasets library
+        dataset = HFDataset.from_pandas(self._dataframe)
+        dataset.push_to_hub(repo_id, token=token, private=private, **kwargs)
+
+        # Load the dataset card template
+        template_path = Path(__file__).parent / "dataset_card_template.md"
+        template = template_path.read_text()
+
+        # Make and push the dataset card
+        try:
+            from ..llm.client import get_default_client
+
+            config = {"metadata": get_default_client().get_config()}
+        except Exception:
+            config = {}
+        content = template.format(repo_id=repo_id, num_items=len(self._dataframe), config=json.dumps(config, indent=4))
+        return DatasetCard(content=content).push_to_hub(repo_id=repo_id, token=token, repo_type="dataset")
+
+    @classmethod
+    def load_from_hf_hub(cls, repo_id: str, token: str = None, **kwargs: Any) -> "QATestset":
+        """
+        Load an instance of the class from the Hugging Face Hub.
+
+        Parameters
+        ----------
+        repo_id : str
+            The repository ID on the Hugging Face Hub.
+        token : str, optional
+            Authentication token for private repositories. Defaults to None.
+        **kwargs : Any
+            Additional arguments passed to `load_dataset`.
+
+        Returns
+        -------
+        QATestset
+            An instance of the class itself loaded from the Hub.
+
+        Raises
+        ------
+        ImportError
+            If required dependencies are not installed.
+        """
+        dataset = load_dataset(repo_id, token=token, split="train", **kwargs)
+        dataframe = pd.DataFrame(dataset)
+        return cls.from_pandas(dataframe)
+
     def to_test_suite(self, name=None, slicing_metadata: Optional[Sequence[str]] = None):
         """
         Convert the testset to a Giskard test suite.
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/tests/llm/test_llm_client.py b/tests/llm/test_llm_client.py
diff --git a/tests/rag/test_qa_testset.py b/tests/rag/test_qa_testset.py
diff --git a/tests/rag/test_report.py b/tests/rag/test_report.py