{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://nightboxllc.com/.well-known/commons-schema.json", "title": "Nightbox US Citizen AI Commons — Submission Schema", "description": "Schema for citizen contributions to the Nightbox AI Commons. Accepted contribution kinds: LoRA adapter, distilled prompt-response corpus, evaluation dataset. All accepted contributions must be Apache-2.0, CC-BY-4.0, or MIT licensed; submitter must self-attest U.S. citizen or U.S. permanent resident; base model (if applicable) must be from the US-only Tier 1 list.", "version": "1.0", "updated": "2026-05-08", "type": "object", "required": ["kind", "license", "base_model_or_na", "description", "submitter_handle", "us_person_attestation"], "properties": { "kind": { "type": "string", "enum": ["lora_adapter", "distilled_corpus", "eval_dataset", "prompt_pack", "tokenizer_extension"], "description": "Type of contribution." }, "license": { "type": "string", "enum": ["Apache-2.0", "CC-BY-4.0", "MIT"], "description": "Must be one of the permissive open licenses accepted by the Commons." }, "base_model_or_na": { "type": "string", "enum": [ "llama-3.1-8b-instruct", "llama-3.1-70b-instruct", "phi-3.5-mini", "phi-3.5-moe", "n/a-corpus-or-eval-only" ], "description": "Base model the contribution targets. Must be from the US-only Tier 1 list (Meta or Microsoft). For corpus or eval contributions with no base model, use n/a-corpus-or-eval-only." }, "description": { "type": "string", "minLength": 30, "maxLength": 2000, "description": "Plain-English description of what this contribution is, what it adds, and how it was prepared. Required." }, "submitter_handle": { "type": "string", "minLength": 2, "maxLength": 80, "pattern": "^[A-Za-z0-9_.@+\\-]+$", "description": "Public handle (Twitter / GitHub / email). Used for attribution in the ledger." }, "submitter_email_optional": { "type": "string", "format": "email", "description": "Optional contact email for review correspondence. Not published in ledger." }, "us_person_attestation": { "type": "boolean", "const": true, "description": "Submitter must self-attest U.S. citizen or U.S. permanent resident. Required by the US-only Zero Trust posture." }, "artifact_url": { "type": "string", "format": "uri", "description": "URL to the published artifact (Hugging Face, GitHub release, IPFS). Recommended path. Either artifact_url or inline_artifact_b64 is required." }, "inline_artifact_b64": { "type": "string", "maxLength": 6000000, "description": "Base64-encoded inline artifact for small contributions (capped at ~4.5 MB decoded). For larger artifacts use artifact_url." }, "checksum_sha256": { "type": "string", "pattern": "^[a-f0-9]{64}$", "description": "SHA-256 hex digest of the artifact bytes. Required when artifact_url is used; computed server-side for inline." }, "size_bytes": { "type": "integer", "minimum": 1, "maximum": 21474836480, "description": "Size of the artifact in bytes. Up to 20 GB for URL-referenced LoRA bundles." }, "metrics": { "type": "object", "description": "Optional self-reported metrics: eval scores, perplexity, training loss curve summary, etc.", "additionalProperties": true }, "training_recipe_url": { "type": "string", "format": "uri", "description": "Optional URL to training recipe / training script / hyper-parameters used to produce the artifact." }, "tags": { "type": "array", "items": { "type": "string", "minLength": 1, "maxLength": 40 }, "maxItems": 12, "description": "Optional tags (domain, language, task)." } }, "additionalProperties": false, "examples": [ { "kind": "lora_adapter", "license": "Apache-2.0", "base_model_or_na": "llama-3.1-8b-instruct", "description": "LoRA adapter trained on 50K U.S.-citizen-curated prompt-response pairs covering federal contracting glossary (NAICS, FAR, DFARS, NIST SP 800-171, Section 889, OMB M-22-09). Rank 16, alpha 32, learning rate 2e-4, 3 epochs.", "submitter_handle": "@example_citizen", "us_person_attestation": true, "artifact_url": "https://huggingface.co/example-citizen/llama-3.1-fed-glossary-lora", "checksum_sha256": "0000000000000000000000000000000000000000000000000000000000000000", "size_bytes": 134217728, "metrics": {"eval_loss": 1.42, "task_accuracy": 0.78}, "tags": ["federal-glossary", "contracting", "english"] }, { "kind": "distilled_corpus", "license": "CC-BY-4.0", "base_model_or_na": "n/a-corpus-or-eval-only", "description": "12,400 high-quality prompt-response pairs distilled from public NIH SBIR award abstracts and reviewer feedback letters, manually curated for Phase I proposal coaching.", "submitter_handle": "@example2", "us_person_attestation": true, "artifact_url": "https://github.com/example2/nih-sbir-corpus/releases/download/v1/corpus.jsonl", "checksum_sha256": "0000000000000000000000000000000000000000000000000000000000000000", "size_bytes": 18874368, "tags": ["nih", "sbir", "phase-1", "english"] } ], "lifecycle": { "states": ["submitted", "reviewing", "accepted", "rejected"], "transitions": { "submitted": ["reviewing"], "reviewing": ["accepted", "rejected"], "accepted": [], "rejected": [] }, "review_sla_business_days": 5, "review_criteria": [ "license is Apache-2.0 / CC-BY-4.0 / MIT", "submitter self-attests U.S. citizen or U.S. permanent resident", "base model (if applicable) is on the US-only Tier 1 list", "artifact passes basic sanity check (loads as advertised, hash verifies)", "no obvious malware, injection payloads, or copyrighted-content leakage", "description is genuine and intelligible", "no PII or sensitive personal data in corpora" ] }, "post_acceptance_workflow": "Accepted contributions are appended to /data/commons-ledger.jsonl (append-only public ledger). LoRA adapters and corpora become input for Nightbox's nightly federated re-training run. The merged model release is published under the same license as the input artifacts (re-share-alike) on the Nightbox Hugging Face org and announced via the news feed." }