diff --git a/src/components/hardware.js b/src/components/hardware.js index 6e0caa4..1cffecf 100644 --- a/src/components/hardware.js +++ b/src/components/hardware.js @@ -25,12 +25,14 @@ export function hardware_plot(hardware, { width, height } = {}) { grid: true, label: "RAM (GB)", nice: true, + domain: [0, 140], }, x: { grid: true, label: "Release Date", type: "time", nice: true, + domain: [new Date("2020-01-01"), new Date("2025-01-01")], }, marks: [ Plot.dot(data), diff --git a/src/components/protein_table.js b/src/components/protein_table.js index 2516b26..a11253f 100644 --- a/src/components/protein_table.js +++ b/src/components/protein_table.js @@ -1,14 +1,8 @@ // - import * as Inputs from "npm:@observablehq/inputs"; -import * as Plot from "npm:@observablehq/plot"; -import * as d3 from "npm:d3"; import { html } from "npm:htl"; -import { parseDate, parseMemorySize } from "./utiities.js"; -// import * as Academicons from "npm:academicons"; -// import * as FontAwesome from "npm:font-awesome-icons"; +// import { hf_icon } from "./utilities.js"; -// Process individual fields function processName(row) { return row; } @@ -92,6 +86,60 @@ function processLicense(row) { } function processHuggingFace(row) { + if (!row.HuggingFace) return row; + + if (typeof row.HuggingFace !== "string") return row; + + let icon; + if (row.HuggingFace.includes("/")) { + row.HuggingFace = html`${row.HuggingFace}`; + } + + return row; +} + +function processLinks(row) { + let links = []; + + // Process Publication URL + if (row.Publication_URL && typeof row.Publication_URL === "string") { + let icon; + if (row.Publication_URL.includes("pubmed")) { + icon = html``; + } else if (row.Publication_URL.includes("biorxiv")) { + icon = html``; + } else if (row.Publication_URL.includes("arxiv")) { + icon = html``; + } else { + icon = html``; + } + links.push( + html`${icon}`, + ); + } + + // Process Source URL + if (row.SourceURL && typeof row.SourceURL === "string") { + let icon; + if (row.SourceURL.includes("github")) { + icon = html``; + } else if (row.SourceURL.includes("gitlab")) { + icon = html``; + } else { + icon = html``; + } + links.push(html`${icon}`); + } + + // Combine links with spacing + row.Links = html`
+ ${links} +
`; + return row; } @@ -101,16 +149,17 @@ function process_models(data) { data // .map((row) => processName(row)) .map((row) => processPublication(row)) - .map((row) => processPublicationURL(row)) + // .map((row) => processPublicationURL(row)) // .map((row) => processPublicationDate(row)) // .map((row) => processVersion(row)) - .map((row) => processSourceURL(row)) - // .map((row) => processWeightURL(row)) - // .map((row) => processTotalWeightsSize(row)) - // .map((row) => processArchitecture(row)) - // .map((row) => processTrainingDataSize(row)) - // .map((row) => processLicense(row)) - // .map((row) => processHuggingFace(row)) + // .map((row) => processSourceURL(row)) + // .map((row) => processWeightURL(row)) + // .map((row) => processTotalWeightsSize(row)) + // .map((row) => processArchitecture(row)) + // .map((row) => processTrainingDataSize(row)) + // .map((row) => processLicense(row)) + .map((row) => processHuggingFace(row)) + .map((row) => processLinks(row)) ); } @@ -121,15 +170,18 @@ export function protein_model_table( let data = process_models(models); console.log(data); return Inputs.table(data, { + rows: 25, columns: [ "Name", "Publication", - "Publication_URL", - "SourceURL", - "WeightURL", + // "Publication_URL", + // "SourceURL", + // "WeightURL", "TotalWeightsSize", "Architecture", "License", + "Links", + "HuggingFace", ], names: { Publication_URL: "pubURL", @@ -139,6 +191,8 @@ export function protein_model_table( format: { Publication_URL: (d) => d, SourceURL: (d) => d, + Links: (d) => d, + HuggingFace: (d) => d, }, }); } diff --git a/src/components/utiities.js b/src/components/utiities.js index 492eacf..c17506c 100644 --- a/src/components/utiities.js +++ b/src/components/utiities.js @@ -1,3 +1,4 @@ +import { svg } from "npm:htl"; import * as d3 from "npm:d3"; export const parseDate = d3.timeParse("%b %Y"); @@ -38,3 +39,19 @@ export function munge_protein_models(models) { return isValid; }); } + +export const hf_icon = svg` + + + + + + + + + + + + + + `; diff --git a/src/data/desktop_hardware.csv b/src/data/desktop_hardware.csv index 2bf93a2..577a3d0 100644 --- a/src/data/desktop_hardware.csv +++ b/src/data/desktop_hardware.csv @@ -10,3 +10,4 @@ Apple,M2 Ultra,64GB,Jun 2023,192GB,24 (16P+8E),60/76,3.68(P)/2.4(E),1.398 Apple,M3,8GB,Oct 2023,24GB,8 (4P+4E),10,4.05(P)/2.7(E),1.5 Apple,M3 Pro,18GB,Oct 2023,36GB,12 (6P+6E),18,4.05(P)/2.7(E),1.5 Apple,M3 Max,36GB,Oct 2023,128GB,16 (12P+4E),40,4.05(P)/2.7(E),1.5 +Tinygrad,Tinybox Green,128GB,Oct 2024,,,,, diff --git a/src/data/protein_language_models.tsv b/src/data/protein_language_models.tsv index d8dbf50..a6909fc 100644 --- a/src/data/protein_language_models.tsv +++ b/src/data/protein_language_models.tsv @@ -1,11 +1,12 @@ Name Publication Publication_URL Publication_Date Version SourceURL WeightURL TotalWeightsSize Architecture Training Data Size License HuggingFace -AlphaFold2 Highly accurate protein structure prediction with AlphaFold https://www.nature.com/articles/s41586-021-03819-2 Jul 2021 v2.3.1 https://github.com/google-deepmind/alphafold https://storage.googleapis.com/alphafold/ ~3GB Transformer-based ~170_000 structures (PDB) Apache 2.0 None -AlphaFold2 Multimer Protein complex prediction with AlphaFold-Multimer https://www.biorxiv.org/content/10.1101/2021.10.04.463034v1 Oct 2021 +AlphaFold2 Highly accurate protein structure prediction with AlphaFold https://www.nature.com/articles/s41586-021-03819-2 Jul 2021 v2.3.1 https://github.com/google-deepmind/alphafold https://storage.googleapis.com/alphafold/ ~3GB Transformer-based ~170_000 structures (PDB) Apache 2.0 +AlphaFold2 Multimer Protein complex prediction with AlphaFold-Multimer https://www.biorxiv.org/content/10.1101/2021.10.04.463034v2 Mar 2022 https://github.com/google-deepmind/alphafold https://storage.googleapis.com/alphafold/ ~3GB ESM2 Language models of protein sequences at the scale of evolution enable accurate structure prediction https://www.science.org/doi/10.1126/science.ade2574 Jan 2023 ESM-2 https://github.com/facebookresearch/esm https://dl.fbaipublicfiles.com/fair-esm/models/ ~3GB Transformer 250M sequences MIT facebook/esm2 ESMFold High-accuracy protein structure prediction with language models https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2 Dec 2022 v1 https://github.com/facebookresearch/esm Same as ESM2 ~3GB Transformer + Structure Module Based on ESM2 MIT facebook/esmfold -ProteinMPNN Neural network-based protein sequence design https://www.science.org/doi/10.1126/science.add2187 Jul 2022 v1 https://github.com/dauparas/ProteinMPNN https://files.ipd.uw.edu/pub/training/weights/ ~200MB Message Passing Neural Network PDB structures MIT None -OpenFold OpenFold: Retraining AlphaFold2 from scratch https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 Sep 2022 v1.0 https://github.com/aqlaboratory/openfold Available through repo ~3GB Similar to AlphaFold2 Same as AlphaFold2 Apache 2.0 None -RoseTTAFold Accurate prediction of protein structures and interactions using a three-track neural network https://pubmed.ncbi.nlm.nih.gov/34282049/ Jul 2021 v1 https://github.com/RosettaCommons/RoseTTAFold Available through repo ~1GB Three-track architecture PDB + MSA data MIT None -LigandMPNN Robust deep learning-based protein sequence design using ProteinMPNN https://www.biorxiv.org/content/10.1101/2023.12.22.573103v1 Dec 2023 v1 https://github.com/dauparas/LigandMPNN https://files.ipd.uw.edu/pub/protein_mpnn/ ~200MB Message Passing Neural Network PDB structures MIT None -Amplify Functional Protein Design by Artificial Intelligence https://www.biorxiv.org/content/10.1101/2024.09.23.614603v1 Dec 2023 v1 https://github.com/chandar-lab/AMPLIFY Available through repo ~500MB Transformer PDB + UniProt data MIT microsoft/amplify -SaProt SaProt: Protein Language Modeling with Structure-aware Vocabulary https://www.biorxiv.org/content/10.1101/2023.10.01.560349v1 Oct 2023 v1 https://github.com/westlake-repl/SaProt Available through repo ~1GB Transformer with structure-aware tokenization UniProt + PDB data Apache 2.0 None +ProteinMPNN Neural network-based protein sequence design https://www.science.org/doi/10.1126/science.add2187 Jul 2022 v1 https://github.com/dauparas/ProteinMPNN https://files.ipd.uw.edu/pub/training/weights/ ~10MB Message Passing Neural Network PDB structures MIT +OpenFold OpenFold: Retraining AlphaFold2 from scratch https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 Sep 2022 v1.0 https://github.com/aqlaboratory/openfold Available through repo ~3GB Similar to AlphaFold2 Same as AlphaFold2 Apache 2.0 +RoseTTAFold Accurate prediction of protein structures and interactions using a three-track neural network https://pubmed.ncbi.nlm.nih.gov/34282049/ Jul 2021 v1 https://github.com/RosettaCommons/RoseTTAFold Available through repo ~1GB Three-track architecture PDB + MSA data MIT +LigandMPNN Robust deep learning-based protein sequence design using ProteinMPNN https://www.biorxiv.org/content/10.1101/2023.12.22.573103v1 Dec 2023 v1 https://github.com/dauparas/LigandMPNN https://files.ipd.uw.edu/pub/protein_mpnn/ ~20MB Message Passing Neural Network PDB structures MIT +Amplify Functional Protein Design by Artificial Intelligence https://www.biorxiv.org/content/10.1101/2024.09.23.614603v1 Dec 2023 v1 https://github.com/chandar-lab/AMPLIFY Available through repo ~500MB Transformer PDB + UniProt data MIT chandar-lab/AMPLIFY_120M +SaProt SaProt: Protein Language Modeling with Structure-aware Vocabulary https://www.biorxiv.org/content/10.1101/2023.10.01.560349v1 Oct 2023 v1 https://github.com/westlake-repl/SaProt Available through repo ~1GB Transformer with structure-aware tokenization UniProt + PDB data Apache 2.0 +ProstT5 ProstT5: Bilingual Language Model for Protein Sequence and Structure https://www.biorxiv.org/content/10.1101/2023.07.23.550085v1 Jul 23 V1 https://github.com/mheinzinger/ProstT5 Available through repo 11GB MIT "Rostlab/ProstT5_fp16,Rostlab/ProstT5" diff --git a/src/index.md b/src/index.md index c94042e..8667cae 100644 --- a/src/index.md +++ b/src/index.md @@ -33,7 +33,7 @@ display(plmtable); ## Desktop Hardware -This is highlighting the release year and basic specs of available Desktop softare. At the moment focusing on the Mac M-series machines. This is related +This is highlighting the release year and basic specs of available Desktop hardware. At the moment focusing on the Mac M-series machines. This is related to a broader interest in making machine learning models that can be used locally. ```js