# Document Domain Schema — Layer 2 Base Type # KNO Schema Version: 0.6.0 # # Layer 2 base type that composes Layer 1 primitives (identity, history, quality). # All textual documents extend from this schema. # # KEY PRINCIPLE: Three orthogonal dimensions: # - FORMAT: How the file is encoded (markdown, yaml, d2) # - DOMAIN: What it contains (document, diagram, knowledge-base) # - PURPOSE: Why it exists (readme, instruction, guide, etc.) # # PRODUCED BY: markdown, plaintext, html formats # This schema consolidates document, doc, and instruction concepts. # # Schema Quality Principle (v0.5): # - Content lives ONLY in format layer (markdown.kno, plaintext.kno) # - Domain layer uses source_xri reference, NO content duplication # - Access content via: source_xri → resolve → format.content # - COMPOSES Layer 1 primitives for id, history, quality # Schema Conformance (FOUND-P3 hardening, #2512 — v0.9.1): # Enum-constrained fields in this schema (notably `purpose`, but also # `instruction_platform`, `instruction_scope.activation`, `invocation_mode`, # etc.) describe the documented vocabulary. They are NOT enforced at the # `saveEntity` write boundary in `services/pspace-api/src/lib/hive-storage.ts` # (see Option B in #2512). Writers MAY persist values outside the listed # enum; query-time consumers and dedicated detectors are responsible for # surfacing such values. Promotion to strict write-time validation is # triggered by any of: (a) a concrete detector failure motivated by a # typo'd or drifted value, (b) ≥2 additional callers of `saveEntity` # writing `document-schema`-conforming entities, or (c) a generalised # schema-conformance pass landing across the substrate. Until then, # treat the enum as advisory. # ============================================================================= # SCHEMA DECLARATION (RFC-007) # ============================================================================= $schema: kno@0.0.9 # ============================================================================= # BASIC TIER # ============================================================================= id: 01KGK3V73PWJ4HPXHZ1P8VTBHZ slug: document-schema type: spec version: 0.9.1 # ============================================================================= # STANDARD TIER # ============================================================================= title: "Document Domain Schema" purpose: | Unified domain schema for all textual documents. **The Problem**: Documents serve many purposes: READMEs, guides, instructions for AI agents, changelogs, references. Instead of creating separate schemas for each, we use three orthogonal dimensions: 1. **FORMAT** — How is it encoded? (markdown, yaml, etc.) 2. **DOMAIN** — What is it? (document = textual content) 3. **PURPOSE** — Why does it exist? (readme, instruction, guide, etc.) **The Solution**: A single document schema with a `purpose` field that classifies functional role. For instruction files, `instruction_platform` specifies the target AI assistant. This eliminates schema proliferation while preserving semantic richness. # ============================================================================= # RICH TIER # ============================================================================= provenance: origin: id: 01KGK3V73PWJ4HPXHZ1P8VTBHZ timestamp: "2026-02-04T01:47:56Z" tool: manual-migration taxonomy: topics: - documents - content - knowledge - domain-schemas keywords: - document - content - text - readme - guide - reference - documentation relationships: produced_by: - xri: "kno://specs/markdown-format" reason: "Primary authoring format — see CommonMark spec" - xri: "kno://formats/plaintext" reason: "Simple text documents" - xri: "kno://formats/html" reason: "Web documents" depends_on: - xri: "kno://specs/kno-spec" reason: "Conforms to KNO format specification" composes: - xri: "kno://specs/identity-schema" reason: "Layer 1: id, canonical_id, local_ids, equiv_ids" - xri: "kno://specs/history-schema" reason: "Layer 1: _history, changelog" - xri: "kno://specs/quality-schema" reason: "Layer 1: quality, validation, confidence" enables: - xri: "kno://specs/spec-schema" reason: "Layer 3: extends document for specifications" - xri: "kno://specs/rfc-schema" reason: "Layer 3: extends document for RFCs" - xri: "kno://specs/glossary-schema" reason: "Layer 3: extends document for glossaries" - xri: "kno://specs/guide-schema" reason: "Layer 3: extends document for how-to guides" related_to: - xri: "kno://concepts/knowledge" reason: "Documents contain knowledge" - xri: "kno://concepts/knowledge-base" reason: "Collections of documents" - xri: "kno://concepts/diagram" reason: "Visual documents" implements: - xri: "kno://principles/P9" reason: "Temporal/Historical — all documents in Hive are versioned" - xri: "kno://principles/VCS-Mandatory-Rule" reason: "Documents stored in Hive MUST be recorded in a VCS backend" quality: completeness: 0.85 last_reviewed: "2026-01-06" review_status: draft reviewed_by: "claude" # ============================================================================= # HISTORY (P9 Temporal — composed from history-schema.kno) # ============================================================================= _history: version: 6 created: "2025-12-25T00:00:00Z" created_by: "pspace-core-team" modified: "2026-02-21T00:00:00Z" modified_by: "claude" # ============================================================================= # SPECIFICATION CONTENT # ============================================================================= spec: status: Draft changelog: - version: "0.8.0" date: "2026-04-28" changes: - "Added purpose enum value: possibility-terms (Q-COL-14 Phase 6.7.5, #2133)" - "Added purpose enum value: rca (root cause analysis; M22 Wave 3, Epic #2156, issue #2161)" - "Companion: specs/rca-template.kno provides the structural template for RCA documents" - "Per Schema Minimization §0.6 Distinctness Gate, both extend document-schema rather than spawning new schemas." - "Backwards-compatible: new optional enum values." - version: "0.7.0" date: "2026-02-21" changes: - "Added VCS-Mandatory Rule cross-reference (implements relationship)" - "As Layer 2 base type, this cascades VCS requirement to all Layer 3 schemas" - version: "0.6.0" date: "2026-02-02" changes: - "Extended purpose enum with playbook component types (Issue #51)" - "Added purpose values: template, example, decision-record, api-spec" - "Updated documentation tables and ASCII diagram" - "Backwards-compatible: all new values are optional additions" - version: "0.5.0" date: "2026-01-06" changes: - "Refactored as Layer 2 base type composing Layer 1 primitives" - "Updated $schema to kno@0.0.9" - "Added _history block (P9 Temporal)" - "Added layer: 2 to facets" - "Added enables relationships to Layer 3 schemas (spec, rfc, glossary, guide)" - "Updated composes to reference identity, history, quality with full comments" - version: "0.4.0" date: "2025-12-31" changes: - "Deprecated content field - content lives ONLY in format layer" - "source_xri is now the canonical way to access source content" - "Aligns with schema quality guidelines (no content duplication)" - version: "0.3.0" date: "2025-12-28" changes: - "Added purpose variants: prompt, agent-profile, slash-command" - "Added invocable artifact fields: prompt_variables, agent_tools, allowed_tools" - "Added invocation_mode and invocation_name for explicit invocation" - "Extended three-dimension model for invocable artifacts" - version: "0.2.0" date: "2025-12-25" changes: - "Added purpose field for functional classification" - "Added instruction_platform for AI agent instructions" - "Merged doc.kno.yaml into document schema" # kno-yaml-allowed: provenance entry preserves historical filename at merge time - "Removed document_type in favor of purpose" - "Added export_config for static site generation" - version: "0.1.0" date: "2025-12-25" changes: - "Initial document domain schema" - "Added document type classification" - "Added semantic extraction fields (topics, entities, keywords)" - "Added audience and relationship fields" description: | ## Three Orthogonal Dimensions ``` ┌─────────────────────────────────────────────────────────────────────────┐ │ THREE ORTHOGONAL DIMENSIONS │ ├─────────────────────────────────────────────────────────────────────────┤ │ │ │ FORMAT DOMAIN PURPOSE │ │ (how encoded) (what it is) (why it exists) │ │ │ │ ├─ markdown ├─ document ◀─── ├─ readme │ │ ├─ yaml │ (this schema) ├─ contributing │ │ ├─ d2 ├─ diagram ├─ changelog │ │ └─ mermaid └─ knowledge-base ├─ license │ │ ├─ instruction │ │ ├─ prompt (invocable) │ │ ├─ agent-profile (invocable) │ │ ├─ slash-command (invocable) │ │ ├─ guide │ │ ├─ reference │ │ ├─ template │ │ ├─ example │ │ ├─ decision-record │ │ ├─ api-spec │ │ └─ (none = general) │ │ │ │ README.md = format:markdown + domain:document + purpose:readme │ │ copilot-instructions.md = format:markdown + domain:document │ │ + purpose:instruction │ │ + instruction_platform:copilot │ │ review.prompt.md = format:markdown + domain:document │ │ + purpose:prompt + invocation_mode:ask │ │ │ └─────────────────────────────────────────────────────────────────────────┘ ``` ## Purpose Field The `purpose` field classifies **why** a document exists: ### Passive Documents (always-active or reference) | Purpose | Description | File Examples | |---------------|----------------------------------|-----------------------------| | readme | Project introduction | README.md | | contributing | Contribution guidelines | CONTRIBUTING.md | | changelog | Version history | CHANGELOG.md | | license | Legal terms | LICENSE.md | | instruction | AI agent instructions | copilot-instructions.md | | guide | How-to documentation | Getting Started guides | | reference | API/technical reference | API docs | | tutorial | Step-by-step learning | Build your first X | | config | Configuration documentation | Config reference | | manifest | Package/project manifest | package.json docs | | template | Reusable document templates | PR templates, ADR templates | | example | Worked examples & case studies | Code examples, scenarios | | decision-record | Architecture decisions (ADRs) | docs/decisions/*.md | | api-spec | API specifications | OpenAPI, endpoint specs | | possibility-terms | Per-possibility terms of participation accepted at invite-claim time | content/possibilities/{slug}/terms.kno | | (omitted) | General purpose document | Any other document | ### Invocable Artifacts (user-triggered) | Purpose | Description | File Examples | Invocation | |----------------|----------------------------------|----------------------------------|---------------| | prompt | Copilot prompt templates | .github/prompts/*.prompt.md | @prompt-name | | agent-profile | Copilot custom agents | .github/agents/*.agent.md | @agent-name | | slash-command | Claude Code slash commands | .claude/commands/*.md | /command-name | **Key Difference:** Instructions are always active in context. Invocable artifacts are explicitly triggered by the user via @mention or /command. ## Instruction Platform (when purpose=instruction) | Platform | Files | Spec URL | |-----------|-------------------------------------------|---------------------------------------------| | copilot | copilot-instructions.md, *.instructions.md| docs.github.com/copilot | | cursor | .cursorrules, .cursor/rules/* | docs.cursor.com | | windsurf | .windsurf/rules/* | docs.codeium.com/windsurf | | cline | .clinerules | github.com/cline/cline | | warp | WARP.md | docs.warp.dev | | aider | .aider* | aider.chat | # =========================================================================== # SCHEMA # =========================================================================== schema: type: object required: - id - type - version - title - content properties: # ----------------------------------------------------------------------- # BASIC TIER # ----------------------------------------------------------------------- id: type: string description: | Unique identifier for this document. Convention: doc-{corpus_name}-{slug} examples: - "doc-awecelot-readme" - "doc-pspace-getting-started" type: const: document description: Always "document" for this schema version: type: string pattern: "^\\d+\\.\\d+\\.\\d+$" description: Version of this document entity # ----------------------------------------------------------------------- # DOCUMENT IDENTITY # ----------------------------------------------------------------------- title: type: string description: | Document title. Extracted from first heading or frontmatter. examples: - "Getting Started with Possibility" - "API Reference" slug: type: string description: | URL-safe identifier derived from title. pattern: "^[a-z0-9-]+$" examples: - "getting-started" - "api-reference" description: type: string description: | Brief description of the document's purpose. Extracted from frontmatter, first paragraph, or generated. examples: - "Step-by-step guide to setting up Possibility locally" # ----------------------------------------------------------------------- # PURPOSE CLASSIFICATION # ----------------------------------------------------------------------- purpose: type: string description: | The functional purpose this document serves. Detected from filename, location, and content patterns. Omit for general-purpose documents. **Invocable Purposes** (user-triggered artifacts): - prompt: Copilot prompt files (invoked via @prompt) - agent-profile: Copilot custom agents (invoked via @agent-name) - slash-command: Claude Code slash commands (invoked via /command) **Passive Purposes** (always-active or reference): - instruction: AI agent instructions (always active in context) - readme, guide, reference, etc.: Documentation enum: - readme - contributing - changelog - license - instruction - prompt - agent-profile - slash-command - guide - reference - tutorial - config - manifest - template - example - decision-record - api-spec - possibility-terms - rca - agent-run-digest examples: - "readme" - "instruction" - "prompt" - "agent-profile" - "slash-command" - "template" - "example" - "decision-record" - "api-spec" - "possibility-terms" - "rca" - "agent-run-digest" # ----------------------------------------------------------------------- # INSTRUCTION-SPECIFIC FIELDS # ----------------------------------------------------------------------- instruction_platform: type: string description: | Target AI coding assistant platform (when purpose=instruction). Omit for platform-agnostic instructions. enum: - copilot - cursor - windsurf - cline - warp - aider examples: - "copilot" - "cursor" instruction_scope: type: object description: | Scope/activation rules for instruction files. properties: apply_to: type: string description: Glob pattern for file matching examples: - "**/*.ts" - "**" activation: type: string enum: - always - manual - auto description: How the instruction is activated # ----------------------------------------------------------------------- # INVOCABLE ARTIFACT FIELDS # ----------------------------------------------------------------------- # These fields apply to purpose: prompt, agent-profile, slash-command # They capture invocation patterns, variables, and tool permissions. invocation_mode: type: string description: | How this artifact is invoked by the user. - ask: Chat-style invocation (Copilot default prompt mode) - edit: Code modification mode (Copilot edit mode) - agent: Agent-style invocation (@agent-name) - slash: Slash command invocation (/command) enum: - ask - edit - agent - slash examples: - "ask" - "agent" - "slash" invocation_name: type: string description: | The name used to invoke this artifact. Derived from filename (stripped of extension). - Prompts: filename.prompt.md → @filename - Agents: agent-name.agent.md → @agent-name - Slash commands: command.md → /command examples: - "review-code" - "security-agent" - "optimize" prompt_variables: type: array description: | Variables that can be passed to this prompt/command. Extracted from ${input:name}, $ARGUMENTS, $1-$9 patterns. items: type: object properties: name: type: string description: Variable name description: type: string description: Description of what the variable expects placeholder: type: string description: Placeholder text shown to user required: type: boolean description: Whether the variable must be provided default: false position: type: integer description: Positional index for positional args ($1, $2, etc.) examples: - - name: "focus" description: "Area to focus the review on" placeholder: "e.g., security, performance" required: false agent_tools: type: array description: | Tools this agent has access to (when purpose=agent-profile). Extracted from frontmatter tools[] array. items: type: string examples: - ["codebase", "githubRepo", "fetch"] allowed_tools: type: array description: | Tools explicitly allowed for this slash command. Extracted from allowed-tools frontmatter. If omitted, defaults to all available tools. items: type: string examples: - ["Read", "Bash"] # ----------------------------------------------------------------------- # DOCUMENT SUBTYPE (refinement within purpose) # ----------------------------------------------------------------------- subtype: type: string description: | More specific classification within the purpose. examples: - "getting-started" # subtype of guide - "api-reference" # subtype of reference - "adr" # architecture decision record # ----------------------------------------------------------------------- # CONTENT # ----------------------------------------------------------------------- content: type: string description: | The full text content of the document. May be the original markdown or converted plaintext. content_format: type: string enum: - markdown - plaintext - html default: markdown description: Format of the content field summary: type: string description: | AI-generated or human-written summary of the document. Typically 1-3 sentences. word_count: type: integer description: Approximate word count minimum: 0 reading_time_minutes: type: integer description: Estimated reading time in minutes minimum: 1 # ----------------------------------------------------------------------- # STRUCTURE # ----------------------------------------------------------------------- sections: type: array description: | Document sections extracted from headings. items: type: object properties: heading: type: string description: Section heading text level: type: integer minimum: 1 maximum: 6 anchor: type: string description: URL anchor for this section summary: type: string description: Brief summary of section content table_of_contents: type: array description: | Flat list of headings for navigation. items: type: object properties: text: type: string level: type: integer anchor: type: string # ----------------------------------------------------------------------- # SEMANTIC EXTRACTION # ----------------------------------------------------------------------- topics: type: array description: | Main topics covered in the document. items: type: string examples: - ["authentication", "security", "OAuth"] keywords: type: array description: | Important keywords and terms. items: type: string entities_mentioned: type: array description: | Named entities mentioned in the document. Can reference other .kno entities via XRI. items: type: object properties: name: type: string description: Entity name as it appears type: type: string description: Entity type (person, org, tool, concept) xri: type: string description: XRI if this maps to a .kno entity technologies: type: array description: | Technologies, tools, or frameworks mentioned. items: type: string examples: - ["TypeScript", "PostgreSQL", "Docker"] # ----------------------------------------------------------------------- # PREREQUISITES (audience and skill_level removed per DC-1/DC-2) # ----------------------------------------------------------------------- # REMOVED: audience field (DC-1: all .kno files serve all audiences) # REMOVED: skill_level field (DC-2: qualitative judgments inferred, not declared) prerequisites: type: array description: | Documents or knowledge required before reading this. items: type: string examples: - ["doc-pspace-getting-started", "Familiarity with TypeScript"] # ----------------------------------------------------------------------- # PROVENANCE # ----------------------------------------------------------------------- source_format: type: string description: | The format schema this document was produced from. examples: - "markdown" - "plaintext" source_xri: type: string description: | XRI reference to the source format entity. examples: - "pspace://markdown:markdown-awecelot-readme" # ----------------------------------------------------------------------- # RELATIONSHIPS # ----------------------------------------------------------------------- relationships: type: object properties: related_to: type: array description: Related documents or entities items: type: string depends_on: type: array description: Documents this depends on items: type: string enables: type: array description: Documents this enables understanding of items: type: string supersedes: type: array description: Documents this replaces items: type: string # ----------------------------------------------------------------------- # METADATA # ----------------------------------------------------------------------- language: type: string description: | Primary language of the document (ISO 639-1). default: "en" examples: - "en" - "es" - "ja" last_modified: type: string format: date-time description: | When the source document was last modified. author: type: string description: | Author of the document (if known). license: type: string description: | License under which the document is published. # =========================================================================== # EXAMPLES # =========================================================================== examples: - title: "README Document" description: "A project README transformed to document entity" yaml: | $schema: document@0.0.3 id: doc-awecelot-readme type: document version: 0.1.0 title: "Awecelot" slug: "awecelot" description: "A methodology for agentic development" purpose: readme content: | # Awecelot A methodology for agentic development that emphasizes structured collaboration between humans and AI agents. ## Getting Started ... content_format: markdown summary: "Awecelot is a methodology for structured human-AI collaboration in software development." word_count: 2450 reading_time_minutes: 10 sections: - heading: "Awecelot" level: 1 anchor: "awecelot" - heading: "Getting Started" level: 2 anchor: "getting-started" topics: - "agentic development" - "AI collaboration" - "methodology" keywords: - "agents" - "collaboration" - "workflow" - "structured development" technologies: - "GitHub Copilot" - "VS Code" # REMOVED: audience and skill_level (deprecated per DC-1/DC-2) source_format: markdown source_xri: "pspace://markdown:markdown-awecelot-readme" language: "en" - title: "Copilot Instruction Document" description: "An AI instruction file transformed to document entity" yaml: | $schema: document@0.0.3 id: doc-pspace-git-safety-instruction type: document version: 0.1.0 title: "Git Safety Instructions" slug: "git-safety-instructions" description: "Safety protocols for git operations" purpose: instruction instruction_platform: copilot instruction_scope: apply_to: "**" activation: always content: | --- description: "Safety protocols for git operations" applyTo: "**" --- # Git Safety Instructions ## Core Principles - Never force push to main - Create backup branch before destructive operations content_format: markdown word_count: 150 reading_time_minutes: 1 topics: - "git" - "safety" - "version control" # REMOVED: audience (deprecated per DC-1) source_format: markdown source_xri: "pspace://markdown:markdown-pspace-git-safety" - title: "API Reference Document" description: "Technical reference documentation" yaml: | $schema: document@0.0.3 id: doc-pspace-api-reference type: document version: 0.1.0 title: "Possibility API Reference" slug: "api-reference" description: "Complete API reference for Possibility endpoints" purpose: reference subtype: api-reference content: | # Possibility API Reference ## Authentication All API requests require authentication via Bearer token. ... content_format: markdown word_count: 5200 reading_time_minutes: 21 topics: - "API" - "REST" - "authentication" - "endpoints" # REMOVED: audience and skill_level (deprecated per DC-1/DC-2) prerequisites: - "doc-pspace-getting-started" source_format: markdown source_xri: "pspace://markdown:markdown-pspace-api-ref" - title: "Copilot Prompt File" description: "An invocable prompt template for code review" yaml: | $schema: document@0.0.3 id: doc-pspace-review-prompt type: document version: 0.1.0 title: "Code Review Prompt" slug: "review-code" description: "Review code for quality, security, and best practices" purpose: prompt invocation_mode: ask invocation_name: "review-code" prompt_variables: - name: "focus" description: "Area to focus the review on" placeholder: "e.g., security, performance, readability" required: false content: | --- mode: ask description: "Review code for quality issues" --- Review the selected code for: - Code quality and readability - Security vulnerabilities - Performance issues - Best practice violations Focus area: ${input:focus} content_format: markdown word_count: 50 reading_time_minutes: 1 topics: - "code review" - "quality" - "security" # REMOVED: audience (deprecated per DC-1) source_format: markdown source_xri: "pspace://markdown:markdown-pspace-review-prompt" - title: "Copilot Custom Agent" description: "A custom agent with specific tool access" yaml: | $schema: document@0.0.3 id: doc-pspace-security-agent type: document version: 0.1.0 title: "Security Analysis Agent" slug: "security-agent" description: "An agent specialized in security analysis" purpose: agent-profile invocation_mode: agent invocation_name: "security" agent_tools: - codebase - githubRepo - fetch content: | --- name: security description: "Analyzes code for security vulnerabilities" tools: - codebase - githubRepo - fetch --- You are a security analysis expert. When analyzing code: 1. Check for common vulnerability patterns (OWASP Top 10) 2. Review authentication and authorization logic 3. Look for data exposure risks 4. Verify input validation content_format: markdown word_count: 75 reading_time_minutes: 1 topics: - "security" - "vulnerability analysis" - "code review" # REMOVED: audience (deprecated per DC-1) source_format: markdown source_xri: "pspace://markdown:markdown-pspace-security-agent" - title: "Claude Code Slash Command" description: "A slash command for quick optimization" yaml: | $schema: document@0.0.3 id: doc-pspace-optimize-command type: document version: 0.1.0 title: "Optimize Command" slug: "optimize" description: "Quick optimization suggestions for selected code" purpose: slash-command invocation_mode: slash invocation_name: "optimize" prompt_variables: - name: "ARGUMENTS" description: "Optional focus area for optimization" required: false allowed_tools: - Read - Bash content: | --- allowed-tools: - Read - Bash --- Analyze the selected code and suggest optimizations. Focus: $ARGUMENTS Consider: - Time complexity improvements - Memory usage optimization - Readability improvements content_format: markdown word_count: 40 reading_time_minutes: 1 topics: - "optimization" - "performance" # REMOVED: audience (deprecated per DC-1) source_format: markdown source_xri: "pspace://markdown:markdown-pspace-optimize-cmd" # ============================================================================= # CONTAINER TIER — Navigation Index # ============================================================================= _index: - path: "identity" line: 28 keywords: [id, type, version, document] - path: "spec/purpose_types" line: 150 keywords: [readme, changelog, guide, instruction, reference] - path: "spec/instruction_platform" line: 250 keywords: [copilot, claude, cursor, aider, windsurf] - path: "spec/composition" line: 400 keywords: [composes, identity, history, quality, Layer-2] - path: "spec/fields" line: 500 keywords: [title, purpose, sections, source_xri] - path: "examples" line: 800 keywords: [minimal, full, readme, instruction] contains: - xri: "#identity" role: section title: "Schema Metadata" keywords: [id, type, version] - xri: "#spec" role: section title: "Document Schema" keywords: [purpose-types, fields, composition] - xri: "#examples" role: section title: "Usage Examples" keywords: [minimal, full, readme, instruction]