@prefix this: . @prefix sub: . @prefix rdfs: . @prefix xsd: . @prefix np: . @prefix npx: . @prefix dcterms: . @prefix prov: . @prefix schema: . @prefix foaf: . sub:head { this: np:hasAssertion sub:assertion; np:hasProvenance sub:provenance; np:hasPublicationInfo sub:pubinfo; a np:Nanopublication . } sub:assertion { "webpage" . sub:assertion dcterms:creator ; ; ; rdfs:comment """ New paper alert! 🚨 We've been exploring the impact of context on LLM performance evaluation. Turns out, evaluating models on individual examples might not tell the whole story. #MachineLearning #AI Our findings suggest that batch evaluation allows models to identify patterns and tendencies, leading to more nuanced assessments. Plus, a two-step decision process (analysis + scoring) shows promising results. Exciting times for ML eval! 📊🧠 To learn more, check out the paper: https://arvix.org/abs/2207.15796 """; schema:keywords "AI", "LLM", "MachineLearning", "batch-evaluation", "performance-evaluation", "two-step-decision-process"; . } sub:provenance { a prov:SoftwareAgent; prov:actedOnBehalfOf . sub:activity a ; prov:wasAssociatedWith . sub:assertion prov:linksTo ; prov:wasAssociatedWith ; prov:wasAttributedTo ; prov:wasGeneratedBy sub:activity . foaf:account . } sub:pubinfo { sub:sig npx:hasAlgorithm "RSA"; npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArHtI92jm8pAYVsvJabxLGfOT+7G0JyJGh2gwjB5x2pFPga6wWTd+rNBWWUZViIFnaJrBEsJpgdnoupLU9ppwn+khMiGRfxqGsDDzwHcj3Jc75CRys7d3etwXdBdoXfBgjsJiZBazwm13idr6tljRrC1TaEJBnRQAqzBw9cLDeGY77cSznzXT39feUGT168dpCSE9O6u/48DvvWVqciHGsH9cQ+LroJJVsMrorwtsdZnAK+q48wtIP6pIpw5shSJ5LnA0qeN/f4TvTFDV6ItYIXjiWWpTECc/Bxmfnyat3B5xWCu9nvz8fEs7Ns0TuzQwT3/K55iSKDEIi/E0nO97xwIDAQAB"; npx:hasSignature "UfNKMSMxjJF6FmekyAFM3JrDGaiwLMq8OK0b3TsssKDHOlONBvYOzIWsO+Q5sDE3EGKMNcf0L9RvIgAwkaOJd4jgM4DgISHMEE7mOdJJ8+ogj3qea5jjPDXjwPAaUC1v51Hzc7v40LKALWGD3uJEyorHVpAL1z8FO9DlrLbu9sYzQ9zUHxvnRl0fJKGXkzzT1Z5ODlEs5c5/oq2L8LtKlDg5NSW/o2+5ELcKUDXF9cB2qGy8mymmlFXId4D4Q2BuE52/YtOLdRCJQhiEJaC9ZEw8NBUmDILw3NmKj7kOaU9BZzhQkWfc415rNBJSbXxwj6uM5JQIxTX6zUcwTEa9Sw=="; npx:hasSignatureTarget this:; npx:singedBy ; prov:wasAssociatedWith . this: dcterms:created "2024-09-27T15:38:26.351Z"^^xsd:dateTime; dcterms:creator ; dcterms:license ; npx:hasNanopubType ; npx:wasCreatedAt ; rdfs:label "CoSMO Semantic Post"; "0x5b9967FC42C160f6146d5ea1f0d08E88370f370b" . foaf:name "Quinn Zhang, PhD" . }