policyhub-mcp-server-mcp.postman_collection.json
{
"info": {
"_postman_id": "96cfb78e-bf4f-4349-89ef-9726fbdb8107",
"name": "PolicyHub MCP Server (MCP Protocol)",
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json",
"_exporter_id": "51837495",
"_collection_link": "https://go.postman.co/collection/51837495-96cfb78e-bf4f-4349-89ef-9726fbdb8107?source=collection_link"
},
"item": [
{
"name": "Keyword Search (MCP)",
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"value": "application/json"
},
{
"key": "Accept",
"value": "application/json, text/event-stream"
},
{
"key": "MCP-Session-ID",
"value": "{{mcp_session_id}}"
}
],
"body": {
"mode": "raw",
"raw": "{\n \"jsonrpc\": \"2.0\",\n \"id\": 1,\n \"method\": \"call_tool\",\n \"params\": {\n \"name\": \"keyword_search\",\n \"arguments\": {\n \"query\": \"example query\",\n \"top_k\": 5\n }\n }\n}"
},
"url": {
"raw": "http://localhost:8000/mcp",
"protocol": "http",
"host": [
"localhost"
],
"port": "8000",
"path": [
"mcp"
]
}
},
"response": []
},
{
"name": "Vector Search (MCP)",
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"value": "application/json"
},
{
"key": "Accept",
"value": "application/json, text/event-stream"
},
{
"key": "MCP-Session-ID",
"value": "{{mcp_session_id}}"
}
],
"body": {
"mode": "raw",
"raw": "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/call\",\"params\":{\"name\":\"vector_search\",\"arguments\":{\"query\":\"example query\",\"top_k\":5}}}",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "http://localhost:8000/mcp",
"protocol": "http",
"host": [
"localhost"
],
"port": "8000",
"path": [
"mcp"
]
}
},
"response": []
},
{
"name": "Hybrid Search (MCP)",
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"value": "application/json"
},
{
"key": "Accept",
"value": "application/json, text/event-stream"
},
{
"key": "MCP-Session-ID",
"value": "{{mcp_session_id}}"
}
],
"body": {
"mode": "raw",
"raw": "{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"hybrid_search\",\"arguments\":{\"query\":\"example query\",\"top_k\":5}}}",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "http://localhost:8000/mcp",
"protocol": "http",
"host": [
"localhost"
],
"port": "8000",
"path": [
"mcp"
]
}
},
"response": []
},
{
"name": "Get Document (MCP)",
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"value": "application/json"
},
{
"key": "Accept",
"value": "application/json, text/event-stream"
},
{
"key": "MCP-Session-ID",
"value": "{{mcp_session_id}}"
}
],
"body": {
"mode": "raw",
"raw": "{\"jsonrpc\":\"2.0\",\"id\":4,\"method\":\"tools/call\",\"params\":{\"name\":\"get_document\",\"arguments\":{\"doc_id\":\"example_id\"}}}",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "http://localhost:8000/mcp",
"protocol": "http",
"host": [
"localhost"
],
"port": "8000",
"path": [
"mcp"
]
}
},
"response": []
},
{
"name": "Health Check (MCP)",
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"value": "application/json"
},
{
"key": "Accept",
"value": "application/json, text/event-stream"
},
{
"key": "MCP-Session-ID",
"value": "{{mcp_session_id}}"
}
],
"body": {
"mode": "raw",
"raw": "{\"jsonrpc\":\"2.0\",\"id\":5,\"method\":\"tools/call\",\"params\":{\"name\":\"health\",\"arguments\":{}}}",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "http://localhost:8000/mcp",
"protocol": "http",
"host": [
"localhost"
],
"port": "8000",
"path": [
"mcp"
]
}
},
"response": []
},
{
"name": "Initialize Session",
"event": [
{
"listen": "test",
"script": {
"type": "text/javascript",
"exec": [
"const sessionId = pm.response.headers.get(\"Mcp-Session-Id\") || pm.response.headers.get(\"MCP-Session-ID\");",
"if (sessionId) {",
" pm.environment.set(\"mcp_session_id\", sessionId);",
" console.log(\"MCP Session ID set: \" + sessionId);",
"} else {",
" console.warn(\"Session ID header not found in response. Check response headers.\");",
"}"
]
}
}
],
"request": {
"method": "POST",
"header": [
{
"key": "Content-Type",
"value": "application/json"
},
{
"key": "Accept",
"value": "application/json, text/event-stream"
}
],
"body": {
"mode": "raw",
"raw": "{\"jsonrpc\":\"2.0\",\"id\":0,\"method\":\"initialize\",\"params\":{\"protocolVersion\":\"2024-11-05\",\"capabilities\":{},\"clientInfo\":{\"name\":\"Postman\",\"version\":\"1.0\"}}}",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "http://localhost:8000/mcp",
"protocol": "http",
"host": [
"localhost"
],
"port": "8000",
"path": [
"mcp"
]
},
"description": "Generated from cURL: curl -X POST http://localhost:8000/mcp -H \"Content-Type: application/json\" -H \"Accept: application/json, text/event-stream\" -d '{\"jsonrpc\":\"2.0\",\"id\":0,\"method\":\"initialize\",\"params\":{\"protocolVersion\":\"2024-11-05\",\"capabilities\":{},\"clientInfo\":{\"name\":\"Postman\",\"version\":\"1.0\"}}}'"
},
"response": []
}
]
}
requirements.txt
fastmcp
uvicorn
gmf-forge-ai-data>=1.0.0,<2.0.0
gmf-forge-ai-shared-core>=1.0.0,<2.0.0
setup.py
"""PolicyHub MCP Server."""
from setuptools import setup, find_packages
setup(
name="policyhub-mcp-server",
version="0.1.0",
packages=find_packages(where="src"),
package_dir={"": "src"},
python_requires=">=3.11",
install_requires=[
"gmf-forge-ai-shared-core>=1.0.0", # Reference as a package
"gmf-forge-ai-data>=1.0.0", # Reference as a package
"fastmcp>=1.0.0",
"uvicorn[standard]>=0.29.0",
"pydantic>=2.0.0",
"python-dotenv>=1.0.0",
],
entry_points={
"console_scripts": [
"policyhub-mcp-server=main:mcp.run",
],
},
)
src/azure_search_service.py
from gmf_forge_ai_data.vector_stores.azure_ai_search_vector_store import AzureAISearchVectorStore
from gmf_forge_ai_data.embeddings import AzureOpenAIEmbeddings
from gmf_forge_ai_shared_core.observability.logger import BasicLogger
import dataclasses
from typing import Optional, List, Dict, Any
from models import PolicyDocument
class PolicyHubSearchService:
def __init__(self, search_endpoint, search_index, search_api_key, openai_endpoint, openai_api_key, openai_deployment, logger=None, ssl_cert_path=None):
self.logger = logger if logger is not None else BasicLogger("PolicyHubSearchService")
self.vector_store = AzureAISearchVectorStore(
endpoint=search_endpoint,
index_name=search_index,
api_key=search_api_key,
document_type=PolicyDocument,
vector_field_name="chunkContent_vector",
content_field_name="chunkContent",
)
self.embedder = AzureOpenAIEmbeddings(
endpoint=openai_endpoint,
api_key=openai_api_key,
deployment_name=openai_deployment,
ssl_cert_path=ssl_cert_path,
logger=self.logger
)
def keyword_search(self, query, top_k=5):
self.logger.info(f"Keyword search: {query}")
results = self.vector_store.search(query=query, top_k=top_k, search_type="keyword")
return results
def vector_search(self, query, top_k=5):
self.logger.info(f"Vector search: {query}")
embedding = self.embedder.embed_text(query)
results = self.vector_store.search(query_embedding=embedding, top_k=top_k, search_type="vector")
return results
def hybrid_search(self, query, top_k=5):
self.logger.info(f"Hybrid search: {query}")
embedding = self.embedder.embed_text(query)
results = self.vector_store.search(query=query, query_embedding=embedding, top_k=top_k, search_type="hybrid")
return results
def get_document(self, document_id):
self.logger.info(f"Fetching document by ID: {document_id}")
# Resolve by business field `document_id` (PolicyHub ID), not index key `id`.
escaped_document_id = str(document_id).replace("'", "''")
results = self.vector_store.search_client.search(
search_text="*",
filter=f"document_id eq '{escaped_document_id}'",
top=1,
)
for raw in results:
raw_dict = dict(raw)
# PolicyDocument inherits required base fields id/content from Document,
# while this index stores text under chunkContent.
raw_dict.setdefault("id", raw_dict.get("documentId") or raw_dict.get("document_id") or str(document_id))
raw_dict.setdefault("content", raw_dict.get("chunkContent") or "")
return PolicyDocument.from_dict(raw_dict)
return None
def filter_search(
self,
query: str,
top_k: int = 5,
language: Optional[str] = None,
locale: Optional[str] = None,
) -> List[Dict[str, Any]]:
# language values: "en-us", "spanish", "french", "portuguese"
# locale values: "US", "CA", "Global"
filters: dict = {}
if language:
filters["language"] = language.lower()
if locale:
filters["locale"] = locale
self.logger.info(
"PolicyHubSearchService.filter_search",
query=query, top_k=top_k, filters=filters
)
embedding = self.embedder.embed_text(query)
results = self.vector_store.search(
query=query,
query_embedding=embedding,
top_k=top_k,
search_type="hybrid",
filters=filters if filters else None,
)
return [
{k: v for k, v in dataclasses.asdict(r.document).items() if k != 'embedding'}
for r in results
]
src/config.py
"""Application configuration — loaded once from environment variables."""
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from dotenv import load_dotenv
from gmf_forge_ai_shared_core.observability import BasicLogger
logger = BasicLogger(__name__)
_ENV_PATH = Path(__file__).parent.parent / ".env"
@dataclass
class AppConfig:
# ── Azure OpenAI ─────────────────────────────
openai_endpoint: str
openai_api_key: str
embedding_deployment: str
# ── Azure AI Search ─────────────────────────
search_endpoint: str
search_api_key: str
search_index: str = "policyhub-index"
# ── Optional infrastructure ─────────────────
ssl_cert_path: Optional[str] = None
embedding_dimension: int = 1536
top_k: int = 5
def load_config() -> AppConfig:
"""Load and validate AppConfig from environment variables."""
load_dotenv(_ENV_PATH)
missing: list[str] = []
def _require(name: str) -> str:
val = os.getenv(name, "").strip()
if not val:
missing.append(name)
return val
def _optional(name: str, default: str = "") -> str:
return os.getenv(name, default).strip() or default
config = AppConfig(
openai_endpoint=_require("AZURE_OPENAI_ENDPOINT"),
openai_api_key=_require("AZURE_OPENAI_API_KEY"),
embedding_deployment=_require("AZURE_OPENAI_EMBEDDING_MODEL"),
search_endpoint=_require("AZURE_SEARCH_ENDPOINT"),
search_api_key=_require("AZURE_SEARCH_API_KEY"),
search_index=_optional("AZURE_SEARCH_INDEX", "policyhub-index"),
ssl_cert_path=_optional("SSL_CERT_PATH") or None,
embedding_dimension=int(_optional("EMBEDDING_DIMENSION", "1536")),
top_k=int(_optional("TOP_K", "5")),
)
if missing:
raise ValueError(
f"Missing required environment variables: {', '.join(missing)}"
)
logger.info(
"Configuration loaded",
index=config.search_index,
top_k=config.top_k,
embedding_dim=config.embedding_dimension,
)
return config
src/main.py
import dataclasses
from fastmcp import FastMCP
from typing import List, Optional
from azure_search_service import PolicyHubSearchService
from config import load_config, logger
config = load_config()
search_service = PolicyHubSearchService(
search_endpoint=config.search_endpoint,
search_index=config.search_index,
search_api_key=config.search_api_key,
openai_endpoint=config.openai_endpoint,
openai_api_key=config.openai_api_key,
openai_deployment=config.embedding_deployment,
logger=logger,
ssl_cert_path=config.ssl_cert_path,
)
mcp = FastMCP("PolicyHub MCP Server")
# Basic dynamic resource returning a string
@mcp.resource("resource://greeting")
def get_greeting() -> str:
"""Provides a simple greeting message."""
return "Hello from PolicyHub MCP Server!"
# Health check tool
@mcp.tool
def health() -> dict:
"""Health check endpoint."""
return {"status": "ok", "message": "MCP server is running"}
@mcp.tool
def keyword_search(query: str, top_k: int):
logger.info("[ENTRY] /keyword_search", query=query, top_k=top_k)
results = search_service.keyword_search(query, top_k=top_k)
logger.info("[EXIT] /keyword_search", result_count=len(results))
return [
{k: v for k, v in dataclasses.asdict(r.document).items() if k != 'embedding'}
for r in results
]
@mcp.tool
def vector_search(query: str, top_k: int):
logger.info("[ENTRY] /vector_search", query=query, top_k=top_k)
results = search_service.vector_search(query, top_k=top_k)
logger.info("[EXIT] /vector_search", result_count=len(results))
return [
{k: v for k, v in dataclasses.asdict(r.document).items() if k != 'embedding'}
for r in results
]
@mcp.tool
def hybrid_search(query: str, top_k: int):
logger.info("[ENTRY] /hybrid_search", query=query, top_k=top_k)
results = search_service.hybrid_search(query, top_k=top_k)
logger.info("[EXIT] /hybrid_search", result_count=len(results))
return [
{k: v for k, v in dataclasses.asdict(r.document).items() if k != 'embedding'}
for r in results
]
@mcp.tool
def get_document(document_id: str):
logger.info("[ENTRY] /get_document", document_id=document_id)
document = search_service.get_document(document_id)
if document:
logger.info("[EXIT] /get_document", document_id=document_id)
return {k: v for k, v in dataclasses.asdict(document).items() if k != 'embedding'}
else:
logger.warning("[EXIT] /get_document - Document not found", document_id=document_id)
return {"error": "Document not found", "document_id": document_id}
@mcp.tool
def filter_search(
query: str,
top_k: int,
language: Optional[str] = None,
locale: Optional[str] = None,
) -> List[dict]:
"""Search policy documents filtered by language and/or locale.
language values: 'en-us', 'spanish', 'french', 'portuguese'.
locale values: 'US', 'CA', 'Global'.
All filter parameters are optional — omit to leave that dimension unfiltered."""
logger.info("Tool: filter_search", query=query, top_k=top_k,
language=language, locale=locale)
return search_service.filter_search(
query=query, top_k=top_k,
language=language, locale=locale
)
# ASGI app for uvicorn: uvicorn main:app --port 8000
# stateless_http=True means each POST is self-contained — no session handshake required.
app = mcp.http_app(stateless_http=True)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="127.0.0.1", port=8000)
src/models.py
from dataclasses import dataclass
from pydantic import BaseModel, model_validator
from typing import Optional, Dict
from config import logger
from gmf_forge_ai_data.vector_stores.azure_ai_search_vector_store import Document
@dataclass
class PolicyDocument(Document):
"""Custom document type matching the policyhub index schema.
Top-level fields (not in base Document.metadata) are declared here so that
AzureAISearchVectorStore._build_filter_expression can build OData filters
on language and locale."""
# Chunk / document identity
chunkId: Optional[str] = None
documentId: str = "" # base64 parent-document ID
metadata_storage_name: str = "" # blob filename
metadata_storage_path: str = "" # blob URL
# Policy metadata
document_id: str = "" # numeric policy ID, e.g. "821"
document_name: str = ""
documentlink: str = ""
language: str = "" # "en-us", "spanish", "french", "portuguese"
locale: str = "" # "US", "CA", "Global"
revisionid: str = ""
source: str = ""
upload_date: str = ""
version: str = ""
pageNumber: int = 0
class SearchRequest(BaseModel):
query: str
top_k: Optional[int] = 5
@model_validator(mode="before")
def log_creation(cls, values):
logger.debug("Creating SearchRequest", values=values)
return values
class SearchResult(BaseModel):
id: str
content: str
metadata: Dict
@model_validator(mode="before")
def log_creation(cls, values):
logger.debug("Creating SearchResult", values=values)
return values