{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://nightboxllc.com/.well-known/commons-schema.json",
  "title": "Nightbox US Citizen AI Commons — Submission Schema",
  "description": "Schema for citizen contributions to the Nightbox AI Commons. Accepted contribution kinds: LoRA adapter, distilled prompt-response corpus, evaluation dataset. All accepted contributions must be Apache-2.0, CC-BY-4.0, or MIT licensed; submitter must self-attest U.S. citizen or U.S. permanent resident; base model (if applicable) must be from the US-only Tier 1 list.",
  "version": "1.0",
  "updated": "2026-05-08",

  "type": "object",
  "required": ["kind", "license", "base_model_or_na", "description", "submitter_handle", "us_person_attestation"],
  "properties": {
    "kind": {
      "type": "string",
      "enum": ["lora_adapter", "distilled_corpus", "eval_dataset", "prompt_pack", "tokenizer_extension"],
      "description": "Type of contribution."
    },
    "license": {
      "type": "string",
      "enum": ["Apache-2.0", "CC-BY-4.0", "MIT"],
      "description": "Must be one of the permissive open licenses accepted by the Commons."
    },
    "base_model_or_na": {
      "type": "string",
      "enum": [
        "llama-3.1-8b-instruct",
        "llama-3.1-70b-instruct",
        "phi-3.5-mini",
        "phi-3.5-moe",
        "n/a-corpus-or-eval-only"
      ],
      "description": "Base model the contribution targets. Must be from the US-only Tier 1 list (Meta or Microsoft). For corpus or eval contributions with no base model, use n/a-corpus-or-eval-only."
    },
    "description": {
      "type": "string",
      "minLength": 30,
      "maxLength": 2000,
      "description": "Plain-English description of what this contribution is, what it adds, and how it was prepared. Required."
    },
    "submitter_handle": {
      "type": "string",
      "minLength": 2,
      "maxLength": 80,
      "pattern": "^[A-Za-z0-9_.@+\\-]+$",
      "description": "Public handle (Twitter / GitHub / email). Used for attribution in the ledger."
    },
    "submitter_email_optional": {
      "type": "string",
      "format": "email",
      "description": "Optional contact email for review correspondence. Not published in ledger."
    },
    "us_person_attestation": {
      "type": "boolean",
      "const": true,
      "description": "Submitter must self-attest U.S. citizen or U.S. permanent resident. Required by the US-only Zero Trust posture."
    },
    "artifact_url": {
      "type": "string",
      "format": "uri",
      "description": "URL to the published artifact (Hugging Face, GitHub release, IPFS). Recommended path. Either artifact_url or inline_artifact_b64 is required."
    },
    "inline_artifact_b64": {
      "type": "string",
      "maxLength": 6000000,
      "description": "Base64-encoded inline artifact for small contributions (capped at ~4.5 MB decoded). For larger artifacts use artifact_url."
    },
    "checksum_sha256": {
      "type": "string",
      "pattern": "^[a-f0-9]{64}$",
      "description": "SHA-256 hex digest of the artifact bytes. Required when artifact_url is used; computed server-side for inline."
    },
    "size_bytes": {
      "type": "integer",
      "minimum": 1,
      "maximum": 21474836480,
      "description": "Size of the artifact in bytes. Up to 20 GB for URL-referenced LoRA bundles."
    },
    "metrics": {
      "type": "object",
      "description": "Optional self-reported metrics: eval scores, perplexity, training loss curve summary, etc.",
      "additionalProperties": true
    },
    "training_recipe_url": {
      "type": "string",
      "format": "uri",
      "description": "Optional URL to training recipe / training script / hyper-parameters used to produce the artifact."
    },
    "tags": {
      "type": "array",
      "items": { "type": "string", "minLength": 1, "maxLength": 40 },
      "maxItems": 12,
      "description": "Optional tags (domain, language, task)."
    }
  },
  "additionalProperties": false,

  "examples": [
    {
      "kind": "lora_adapter",
      "license": "Apache-2.0",
      "base_model_or_na": "llama-3.1-8b-instruct",
      "description": "LoRA adapter trained on 50K U.S.-citizen-curated prompt-response pairs covering federal contracting glossary (NAICS, FAR, DFARS, NIST SP 800-171, Section 889, OMB M-22-09). Rank 16, alpha 32, learning rate 2e-4, 3 epochs.",
      "submitter_handle": "@example_citizen",
      "us_person_attestation": true,
      "artifact_url": "https://huggingface.co/example-citizen/llama-3.1-fed-glossary-lora",
      "checksum_sha256": "0000000000000000000000000000000000000000000000000000000000000000",
      "size_bytes": 134217728,
      "metrics": {"eval_loss": 1.42, "task_accuracy": 0.78},
      "tags": ["federal-glossary", "contracting", "english"]
    },
    {
      "kind": "distilled_corpus",
      "license": "CC-BY-4.0",
      "base_model_or_na": "n/a-corpus-or-eval-only",
      "description": "12,400 high-quality prompt-response pairs distilled from public NIH SBIR award abstracts and reviewer feedback letters, manually curated for Phase I proposal coaching.",
      "submitter_handle": "@example2",
      "us_person_attestation": true,
      "artifact_url": "https://github.com/example2/nih-sbir-corpus/releases/download/v1/corpus.jsonl",
      "checksum_sha256": "0000000000000000000000000000000000000000000000000000000000000000",
      "size_bytes": 18874368,
      "tags": ["nih", "sbir", "phase-1", "english"]
    }
  ],

  "lifecycle": {
    "states": ["submitted", "reviewing", "accepted", "rejected"],
    "transitions": {
      "submitted": ["reviewing"],
      "reviewing": ["accepted", "rejected"],
      "accepted": [],
      "rejected": []
    },
    "review_sla_business_days": 5,
    "review_criteria": [
      "license is Apache-2.0 / CC-BY-4.0 / MIT",
      "submitter self-attests U.S. citizen or U.S. permanent resident",
      "base model (if applicable) is on the US-only Tier 1 list",
      "artifact passes basic sanity check (loads as advertised, hash verifies)",
      "no obvious malware, injection payloads, or copyrighted-content leakage",
      "description is genuine and intelligible",
      "no PII or sensitive personal data in corpora"
    ]
  },

  "post_acceptance_workflow": "Accepted contributions are appended to /data/commons-ledger.jsonl (append-only public ledger). LoRA adapters and corpora become input for Nightbox's nightly federated re-training run. The merged model release is published under the same license as the input artifacts (re-share-alike) on the Nightbox Hugging Face org and announced via the news feed."
}