Skip to content

Commit

Permalink
Update the data and layout (#10)
Browse files Browse the repository at this point in the history
* clean it up a bit

* update the graph

* add logscale toggle

* update models

* update the plot of models and their weight sizes

* update
  • Loading branch information
zachcp authored Jan 6, 2025
1 parent 4e30aa0 commit 7b83f2a
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 43 deletions.
8 changes: 6 additions & 2 deletions src/components/hardware.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import * as Plot from "npm:@observablehq/plot";
import { parseDate, parseMemorySize } from "./utiities.js";
import * as d3 from "npm:d3";

// export const parseDate = d3.timeParse("%m %Y");

export function hardware_plot(hardware, { width, height } = {}) {
let data = hardware
Expand All @@ -21,6 +24,7 @@ export function hardware_plot(hardware, { width, height } = {}) {
});

return Plot.plot({
margin: 50,
y: {
grid: true,
label: "RAM (GB)",
Expand All @@ -32,6 +36,7 @@ export function hardware_plot(hardware, { width, height } = {}) {
label: "Release Date",
type: "time",
nice: true,
labelOffset: 40,
domain: [new Date("2020-01-01"), new Date("2025-01-01")],
},
marks: [
Expand All @@ -40,10 +45,9 @@ export function hardware_plot(hardware, { width, height } = {}) {
x: "x",
y: "y",
text: "model",
dy: -8,
}),
],
height: 400,
width: 800,
width: 1200,
});
}
123 changes: 101 additions & 22 deletions src/components/protein_models.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import * as Plot from "npm:@observablehq/plot";
import * as d3 from "npm:d3";
import { parseDate, parseMemorySize } from "./utiities.js";

function munge_protein_models(models) {
return models
.map((d) => {
const parsedDate = parseDate(d.Publication_Date);
const parsedSize = parseMemorySize(d.TotalWeightsSize);
const parsedSize = parseMemorySize(d.ModelSize);
return {
x: parsedDate,
y: parsedSize,
Expand All @@ -21,32 +22,110 @@ function munge_protein_models(models) {
});
}

export function protein_model_plot(models, { width = 800, height = 400 } = {}) {
let data = munge_protein_models(models);
return Plot.plot({
x: {
grid: true,
label: "Publication Date",
type: "time",
nice: true,
domain: [new Date("2020-01-01"), new Date("2024-12-31")],
},
y: {
function formatTooltip(d) {
return `Model: ${d.name}
Size in GB: ${d.y}`;
}

export function createProteinModelPlot(models, container) {
let isLogScale = true; // Default to log scale
let showLabels = false; // Default to showing labels

// Create button container for better layout
const buttonContainer = document.createElement("div");
buttonContainer.style.marginBottom = "10px";

// Scale toggle button
const scaleButton = document.createElement("button");
scaleButton.textContent = "Toggle Log/Linear Scale";
scaleButton.style.marginRight = "10px";

// Label toggle button
const labelButton = document.createElement("button");
labelButton.textContent = "Toggle Labels";

// Add buttons to container
buttonContainer.appendChild(scaleButton);
buttonContainer.appendChild(labelButton);

const plotDiv = document.createElement("div");
container.appendChild(buttonContainer);
container.appendChild(plotDiv);

function updatePlot() {
const data = munge_protein_models(models);

const yAxis = {
grid: true,
label: "Model Size (GB)",
nice: true,
domain: [0, 4],
},
marks: [
Plot.dot(data),
Plot.text(data, {
};

if (isLogScale) {
yAxis.type = "log";
yAxis.domain = [0.1, Math.max(...data.map((d) => d.y))];
} else {
yAxis.type = "linear";
yAxis.domain = [0, Math.max(...data.map((d) => d.y))];
}

const marks = [
// Points are always shown
Plot.dot(data, {
r: 4,
x: "x",
y: "y",
text: "name",
dy: -8,
fill: "red",
tip: true,
title: formatTooltip,
}),
],
height,
width,
];

// Only add labels if showLabels is true
if (showLabels) {
marks.push(
Plot.text(data, {
x: "x",
y: "y",
text: "name",
fontSize: 12,
fill: "black",
dy: -10,
}),
);
}

const plot = Plot.plot({
aspectRatio: false,
margin: 40,
x: {
grid: true,
label: "Publication Date",
type: "time",
nice: true,
domain: [new Date("2021-01-01"), new Date("2025-12-31")],
},
y: yAxis,
marks: marks,
height: 400,
width: 1200,
});

plotDiv.innerHTML = "";
plotDiv.appendChild(plot);
}

// Add click handlers to buttons
scaleButton.addEventListener("click", () => {
isLogScale = !isLogScale;
updatePlot();
});

labelButton.addEventListener("click", () => {
showLabels = !showLabels;
updatePlot();
});

// Initial plot render
updatePlot();
}
12 changes: 7 additions & 5 deletions src/components/protein_table.js
Original file line number Diff line number Diff line change
Expand Up @@ -174,17 +174,19 @@ export function protein_model_table(
columns: [
"Name",
"Publication",
"Links",
"HuggingFace",
"ModelSize",
"Publication_Date",
// "Publication_URL",
// "SourceURL",
// "WeightURL",
"TotalWeightsSize",
// "TotalWeightsSize",
"Architecture",
"License",
"Links",
"HuggingFace",
// "License",
],
names: {
Publication_URL: "pubURL",
// Publication_URL: "pubURL",
// pubURL: "Publication_URL",
SourceURL: "SourceURL",
},
Expand Down
16 changes: 8 additions & 8 deletions src/components/utiities.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions src/hf-logo-pirate.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 35 additions & 6 deletions src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,66 @@ title: Protein Language Models
toc: true
---

Data is found at this [gSheet](https://docs.google.com/spreadsheets/d/1iJ7bPG81_yYITVQn-huoQonKTel7GBQ7AMM9AixQbH8/edit?gid=1996577388#gid=1996577388)


<!-- Todo: move this to a proper import -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
</head>


## Protein Language Models
## Into to Local-First Protein Language Models

- There has been a proliferation of really powerful AI models to assist in scientific discovery.
- We have seen a shrinking of the commercial LLMs where [LLAMA 3.3](https://ollama.com/library/llama3.3) can now be run on a machine with 548GB of RAM.
- Some of the powerful ML models like ProteinMPNN and Ligand MPNN are only ~20MB!!!
- At the same time we've gotten really powerful desktop harward that increasingly can handle ML models locally via GPUs or CPU acceleration. Apples Metal framework seems to be expesially good.
- Can we develop local machine learning tools that can acccelerate scientists in their effort to use these tools.
- That was the idea behind [ferritin-bio](https://ferritin-bio.github.io/ferritin/) - port common ML models to compile and run locally.




### Model Size by Release Date
```js
const plms = FileAttachment("./data/protein_language_models.tsv").tsv();
import {protein_model_plot } from "./components/protein_models.js";
// import {protein_model_plot } from "./components/protein_models.js";
import {createProteinModelPlot } from "./components/protein_models.js";
import {protein_model_table } from "./components/protein_table.js";

```



<div id="plm-plot-01"></div>

```js
let plmplot = protein_model_plot(plms);
display(plmplot)
const container = document.getElementById('plm-plot-01');
let plmplot = createProteinModelPlot(plms, container);
```

</br>

### Protein Language Model Table

```js
let plmtable = protein_model_table(plms);
display(plmtable);

```


</br>

## Desktop Hardware

This is highlighting the release year and basic specs of available Desktop hardware. At the moment focusing on the Mac M-series machines. This is related
to a broader interest in making machine learning models that can be used locally.


</br>

### Hardware Table

```js
const hardware = FileAttachment("./data/desktop_hardware.tsv").tsv();
import {hardware_plot} from "./components/hardware.js";
Expand All @@ -47,3 +72,7 @@ import {hardware_plot} from "./components/hardware.js";
let hplot = hardware_plot(hardware);
display(hplot)
```

</br>

Data for these charts can be found at this [gSheet](https://docs.google.com/spreadsheets/d/1iJ7bPG81_yYITVQn-huoQonKTel7GBQ7AMM9AixQbH8/edit?gid=1996577388#gid=1996577388). Source code for the graphs are [here](https://github.com/ferritin-bio/protein-language-models)

0 comments on commit 7b83f2a

Please sign in to comment.