From 1b1fd747706f44de1b7c2ea85ae806245f370667 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Tue, 12 Nov 2024 08:55:59 +0100 Subject: [PATCH 01/29] reorder sidebar --- src/content/docs/datahub/datahub-generate-pat.mdx | 2 -- .../datahub/{ => navigation-settings}/datahub-arc-panel.mdx | 0 .../datahub/{ => navigation-settings}/datahub-arc-settings.md | 0 .../datahub/{ => navigation-settings}/datahub-navigation.mdx | 0 .../docs/datahub/{ => navigation-settings}/datahub-projects.mdx | 0 .../datahub/{ => navigation-settings}/datahub-user-settings.mdx | 0 .../{ => working-together}/datahub-arc-members-isa-contacts.mdx | 0 .../docs/datahub/{ => working-together}/datahub-groups.mdx | 0 .../{ => working-together}/datahub-invite-collaborators.mdx | 0 .../index.mdx} | 0 10 files changed, 2 deletions(-) rename src/content/docs/datahub/{ => navigation-settings}/datahub-arc-panel.mdx (100%) rename src/content/docs/datahub/{ => navigation-settings}/datahub-arc-settings.md (100%) rename src/content/docs/datahub/{ => navigation-settings}/datahub-navigation.mdx (100%) rename src/content/docs/datahub/{ => navigation-settings}/datahub-projects.mdx (100%) rename src/content/docs/datahub/{ => navigation-settings}/datahub-user-settings.mdx (100%) rename src/content/docs/datahub/{ => working-together}/datahub-arc-members-isa-contacts.mdx (100%) rename src/content/docs/datahub/{ => working-together}/datahub-groups.mdx (100%) rename src/content/docs/datahub/{ => working-together}/datahub-invite-collaborators.mdx (100%) rename src/content/docs/datahub/{datahub-working-together.mdx => working-together/index.mdx} (100%) diff --git a/src/content/docs/datahub/datahub-generate-pat.mdx b/src/content/docs/datahub/datahub-generate-pat.mdx index a3fd7431a..16ed15cbe 100644 --- a/src/content/docs/datahub/datahub-generate-pat.mdx +++ b/src/content/docs/datahub/datahub-generate-pat.mdx @@ -3,8 +3,6 @@ title: Personal Access Token (PAT) authors: - dominik-brilhaus lastUpdated: 2023-07-07 -sidebar: - order: 10 --- import { Steps } from '@astrojs/starlight/components'; diff --git a/src/content/docs/datahub/datahub-arc-panel.mdx b/src/content/docs/datahub/navigation-settings/datahub-arc-panel.mdx similarity index 100% rename from src/content/docs/datahub/datahub-arc-panel.mdx rename to src/content/docs/datahub/navigation-settings/datahub-arc-panel.mdx diff --git a/src/content/docs/datahub/datahub-arc-settings.md b/src/content/docs/datahub/navigation-settings/datahub-arc-settings.md similarity index 100% rename from src/content/docs/datahub/datahub-arc-settings.md rename to src/content/docs/datahub/navigation-settings/datahub-arc-settings.md diff --git a/src/content/docs/datahub/datahub-navigation.mdx b/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx similarity index 100% rename from src/content/docs/datahub/datahub-navigation.mdx rename to src/content/docs/datahub/navigation-settings/datahub-navigation.mdx diff --git a/src/content/docs/datahub/datahub-projects.mdx b/src/content/docs/datahub/navigation-settings/datahub-projects.mdx similarity index 100% rename from src/content/docs/datahub/datahub-projects.mdx rename to src/content/docs/datahub/navigation-settings/datahub-projects.mdx diff --git a/src/content/docs/datahub/datahub-user-settings.mdx b/src/content/docs/datahub/navigation-settings/datahub-user-settings.mdx similarity index 100% rename from src/content/docs/datahub/datahub-user-settings.mdx rename to src/content/docs/datahub/navigation-settings/datahub-user-settings.mdx diff --git a/src/content/docs/datahub/datahub-arc-members-isa-contacts.mdx b/src/content/docs/datahub/working-together/datahub-arc-members-isa-contacts.mdx similarity index 100% rename from src/content/docs/datahub/datahub-arc-members-isa-contacts.mdx rename to src/content/docs/datahub/working-together/datahub-arc-members-isa-contacts.mdx diff --git a/src/content/docs/datahub/datahub-groups.mdx b/src/content/docs/datahub/working-together/datahub-groups.mdx similarity index 100% rename from src/content/docs/datahub/datahub-groups.mdx rename to src/content/docs/datahub/working-together/datahub-groups.mdx diff --git a/src/content/docs/datahub/datahub-invite-collaborators.mdx b/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx similarity index 100% rename from src/content/docs/datahub/datahub-invite-collaborators.mdx rename to src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx diff --git a/src/content/docs/datahub/datahub-working-together.mdx b/src/content/docs/datahub/working-together/index.mdx similarity index 100% rename from src/content/docs/datahub/datahub-working-together.mdx rename to src/content/docs/datahub/working-together/index.mdx From c00a89306c9063eee77352e036a49da4a7e616dd Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 11:05:11 +0100 Subject: [PATCH 02/29] try structure datahub section --- astro.config.mts | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/astro.config.mts b/astro.config.mts index 63752bead..486747ab1 100644 --- a/astro.config.mts +++ b/astro.config.mts @@ -87,12 +87,30 @@ export default defineConfig({ collapsed: true, autogenerate: { directory: 'arc-validation' }, }, + // { + // label: 'DataHUB', + // // Collapse the group by default. + // collapsed: true, + // autogenerate: { directory: 'datahub' }, + // }, { label: 'DataHUB', - // Collapse the group by default. collapsed: true, - autogenerate: { directory: 'datahub' }, - }, + items:[ + { + label: 'Navigation & Settings', + autogenerate: { directory: 'datahub/navigation-settings'}, + }, + { + label: 'Working Together', + autogenerate: { directory: 'datahub/working-together'}, + }, + { + label: 'Data Publications', + autogenerate: { directory: 'datahub/data-publications'}, + }, + ] + }, { label: 'ARC Commander', // Collapse the group by default. From 8960a708289ee447b9af51c61b567bf3714d6950 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 13:01:18 +0100 Subject: [PATCH 03/29] restructure datahub section --- astro.config.mts | 25 +++++++++++++------ .../index.mdx} | 0 .../datahub-arc-commits.mdx | 0 .../{ => arc-features}/datahub-arc-fork.mdx | 0 .../datahub-arc-license.mdx} | 0 .../{ => arc-features}/datahub-arc-wiki.md | 0 .../datahub/{ => arc-files}/datahub-files.mdx | 0 .../datahub/{ => arc-files}/datahub-lfs.mdx | 0 src/content/docs/datahub/index.mdx | 1 + .../datahub-generate-pat.mdx | 0 .../docs/datahub/working-together/index.mdx | 9 ------- src/content/docs/git/git-lfs.mdx | 2 +- src/content/docs/resources/galaxy.mdx | 1 + 13 files changed, 20 insertions(+), 18 deletions(-) rename src/content/docs/datahub/{datahub-account.mdx => account/index.mdx} (100%) rename src/content/docs/datahub/{ => arc-features}/datahub-arc-commits.mdx (100%) rename src/content/docs/datahub/{ => arc-features}/datahub-arc-fork.mdx (100%) rename src/content/docs/datahub/{datahub-license.mdx => arc-features/datahub-arc-license.mdx} (100%) rename src/content/docs/datahub/{ => arc-features}/datahub-arc-wiki.md (100%) rename src/content/docs/datahub/{ => arc-files}/datahub-files.mdx (100%) rename src/content/docs/datahub/{ => arc-files}/datahub-lfs.mdx (100%) rename src/content/docs/datahub/{ => navigation-settings}/datahub-generate-pat.mdx (100%) diff --git a/astro.config.mts b/astro.config.mts index 486747ab1..dfc0b3f68 100644 --- a/astro.config.mts +++ b/astro.config.mts @@ -87,24 +87,33 @@ export default defineConfig({ collapsed: true, autogenerate: { directory: 'arc-validation' }, }, - // { - // label: 'DataHUB', - // // Collapse the group by default. - // collapsed: true, - // autogenerate: { directory: 'datahub' }, - // }, { label: 'DataHUB', collapsed: true, - items:[ + // autogenerate: { directory: 'datahub' }, + items:[ + 'datahub/index', + { + label: 'DataPLANT Account', + collapsed: false, + autogenerate: { directory: 'datahub/account'}, + }, { label: 'Navigation & Settings', autogenerate: { directory: 'datahub/navigation-settings'}, }, { - label: 'Working Together', + label: 'Working together', autogenerate: { directory: 'datahub/working-together'}, }, + { + label: 'ARC files', + autogenerate: { directory: 'datahub/arc-files'}, + }, + { + label: 'ARC features', + autogenerate: { directory: 'datahub/arc-features'}, + }, { label: 'Data Publications', autogenerate: { directory: 'datahub/data-publications'}, diff --git a/src/content/docs/datahub/datahub-account.mdx b/src/content/docs/datahub/account/index.mdx similarity index 100% rename from src/content/docs/datahub/datahub-account.mdx rename to src/content/docs/datahub/account/index.mdx diff --git a/src/content/docs/datahub/datahub-arc-commits.mdx b/src/content/docs/datahub/arc-features/datahub-arc-commits.mdx similarity index 100% rename from src/content/docs/datahub/datahub-arc-commits.mdx rename to src/content/docs/datahub/arc-features/datahub-arc-commits.mdx diff --git a/src/content/docs/datahub/datahub-arc-fork.mdx b/src/content/docs/datahub/arc-features/datahub-arc-fork.mdx similarity index 100% rename from src/content/docs/datahub/datahub-arc-fork.mdx rename to src/content/docs/datahub/arc-features/datahub-arc-fork.mdx diff --git a/src/content/docs/datahub/datahub-license.mdx b/src/content/docs/datahub/arc-features/datahub-arc-license.mdx similarity index 100% rename from src/content/docs/datahub/datahub-license.mdx rename to src/content/docs/datahub/arc-features/datahub-arc-license.mdx diff --git a/src/content/docs/datahub/datahub-arc-wiki.md b/src/content/docs/datahub/arc-features/datahub-arc-wiki.md similarity index 100% rename from src/content/docs/datahub/datahub-arc-wiki.md rename to src/content/docs/datahub/arc-features/datahub-arc-wiki.md diff --git a/src/content/docs/datahub/datahub-files.mdx b/src/content/docs/datahub/arc-files/datahub-files.mdx similarity index 100% rename from src/content/docs/datahub/datahub-files.mdx rename to src/content/docs/datahub/arc-files/datahub-files.mdx diff --git a/src/content/docs/datahub/datahub-lfs.mdx b/src/content/docs/datahub/arc-files/datahub-lfs.mdx similarity index 100% rename from src/content/docs/datahub/datahub-lfs.mdx rename to src/content/docs/datahub/arc-files/datahub-lfs.mdx diff --git a/src/content/docs/datahub/index.mdx b/src/content/docs/datahub/index.mdx index bacedf87a..589155a17 100644 --- a/src/content/docs/datahub/index.mdx +++ b/src/content/docs/datahub/index.mdx @@ -1,5 +1,6 @@ --- title: General +slug: datahub/index lastUpdated: 2023-07-07 authors: - dominik-brilhaus diff --git a/src/content/docs/datahub/datahub-generate-pat.mdx b/src/content/docs/datahub/navigation-settings/datahub-generate-pat.mdx similarity index 100% rename from src/content/docs/datahub/datahub-generate-pat.mdx rename to src/content/docs/datahub/navigation-settings/datahub-generate-pat.mdx diff --git a/src/content/docs/datahub/working-together/index.mdx b/src/content/docs/datahub/working-together/index.mdx index fa23eb81e..20192964d 100644 --- a/src/content/docs/datahub/working-together/index.mdx +++ b/src/content/docs/datahub/working-together/index.mdx @@ -9,15 +9,6 @@ sidebar: import { Steps } from '@astrojs/starlight/components'; -:::note[About this guide] -In this guide we explain the different ways offered by the DataHUB to share an ARC with collaborators. -::: - -:::tip[Before we can start] -- You have a [DataPLANT account](https://register.nfdi4plants.org) -- You have created an ARC -::: - ## Options to share an ARC via the DataHUB To suit a wide range of collaborative scenarios, the DataHUB offers multiple, flexible ways of sharing an ARC. diff --git a/src/content/docs/git/git-lfs.mdx b/src/content/docs/git/git-lfs.mdx index 01a29969c..0e6ef3dcf 100644 --- a/src/content/docs/git/git-lfs.mdx +++ b/src/content/docs/git/git-lfs.mdx @@ -26,7 +26,7 @@ import ARCCommanderLFS from '../arc-commander/lfs.mdx' ### DataHUB -import DataHUBLFS from '../datahub/datahub-lfs.mdx' +import DataHUBLFS from '../datahub/arc-files/datahub-lfs.mdx' diff --git a/src/content/docs/resources/galaxy.mdx b/src/content/docs/resources/galaxy.mdx index c88a16de9..5ad6135c7 100644 --- a/src/content/docs/resources/galaxy.mdx +++ b/src/content/docs/resources/galaxy.mdx @@ -88,3 +88,4 @@ It is also possible to export datasets back to an ARC. You can do this as descri on the workload on the Galaxy server and the size of your file. +../datahub/navigation-settings/datahub-generate-pat.mdx \ No newline at end of file From 9cf314e614ac262d9fd22031411153b15aa71188 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 13:04:02 +0100 Subject: [PATCH 04/29] polish --- .../docs/datahub/arc-features/datahub-arc-commits.mdx | 2 -- .../docs/datahub/arc-features/datahub-arc-fork.mdx | 10 ---------- .../docs/datahub/arc-features/datahub-arc-license.mdx | 4 +--- src/content/docs/datahub/arc-files/datahub-files.mdx | 2 +- 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/src/content/docs/datahub/arc-features/datahub-arc-commits.mdx b/src/content/docs/datahub/arc-features/datahub-arc-commits.mdx index 4ddb64489..40bd2a24c 100644 --- a/src/content/docs/datahub/arc-features/datahub-arc-commits.mdx +++ b/src/content/docs/datahub/arc-features/datahub-arc-commits.mdx @@ -9,8 +9,6 @@ import { Steps } from '@astrojs/starlight/components'; All commit messages added to your ARC via [ARCitect](/nfdi4plants.knowledgebase/arcitect) or [ARC Commander](/nfdi4plants.knowledgebase/arc-commander) are available in the DataHUB. -## Commits - 1. Open your ARC in the [DataHUB](https://git.nfdi4plants.org/) diff --git a/src/content/docs/datahub/arc-features/datahub-arc-fork.mdx b/src/content/docs/datahub/arc-features/datahub-arc-fork.mdx index 34b71e2b4..fd8d44222 100644 --- a/src/content/docs/datahub/arc-features/datahub-arc-fork.mdx +++ b/src/content/docs/datahub/arc-features/datahub-arc-fork.mdx @@ -8,16 +8,6 @@ authors: import { Steps } from '@astrojs/starlight/components'; - -:::note[About this guide] -The DataHUB allows to "fork" an ARC. This creates an exact copy of an ARC under a different user or group account while staying connected to the original ARC. -::: - -:::tip[Before we can start] -- You have a [DataPLANT account](https://register.nfdi4plants.org) -- You have created an ARC -::: - 1. Navigate to the ARC that you would like to fork and click "Fork" in the top-right corner (1). diff --git a/src/content/docs/datahub/arc-features/datahub-arc-license.mdx b/src/content/docs/datahub/arc-features/datahub-arc-license.mdx index 63bf0a59c..95476c9a1 100644 --- a/src/content/docs/datahub/arc-features/datahub-arc-license.mdx +++ b/src/content/docs/datahub/arc-features/datahub-arc-license.mdx @@ -8,11 +8,9 @@ authors: import { Steps } from '@astrojs/starlight/components'; -:::note[About this guide] Licenses are essential for defining how others can use, modify, and distribute the code or data within a project. When you create and share an ARC, a license provides the formal framework that protects the data creators’ rights while clarifying the terms of use for the content. -::: -In the DataHUB, a license is essentially just a standardized text file. +In the DataHUB, a license is essentially a standardized text file. To add a `LICENSE` to your ARC diff --git a/src/content/docs/datahub/arc-files/datahub-files.mdx b/src/content/docs/datahub/arc-files/datahub-files.mdx index 4566a7444..f8163e10c 100644 --- a/src/content/docs/datahub/arc-files/datahub-files.mdx +++ b/src/content/docs/datahub/arc-files/datahub-files.mdx @@ -1,5 +1,5 @@ --- -title: Manages files in the DataHUB +title: Manage files in the DataHUB lastUpdated: 2024-07-22 authors: - dominik-brilhaus From db8a94f8eaf4b0765cc047b955893e95ccab915b Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 13:23:19 +0100 Subject: [PATCH 05/29] fix links --- src/content/docs/git/git-syncing-recommendation.mdx | 2 +- src/content/docs/resources/galaxy.mdx | 2 +- src/content/docs/start-here/share.mdx | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/content/docs/git/git-syncing-recommendation.mdx b/src/content/docs/git/git-syncing-recommendation.mdx index 511fb99d2..c7197c1b6 100644 --- a/src/content/docs/git/git-syncing-recommendation.mdx +++ b/src/content/docs/git/git-syncing-recommendation.mdx @@ -57,6 +57,6 @@ The following exemplifies Viola's commit messages along with the progress of her ## Follow your progress in the DataHUB -import DataHUBCommits from '../datahub/datahub-arc-commits.mdx' +import DataHUBCommits from '../datahub/arc-features/datahub-arc-commits.mdx' diff --git a/src/content/docs/resources/galaxy.mdx b/src/content/docs/resources/galaxy.mdx index 5ad6135c7..814a30f03 100644 --- a/src/content/docs/resources/galaxy.mdx +++ b/src/content/docs/resources/galaxy.mdx @@ -12,7 +12,7 @@ In this guide we introduce how to access ARCs from Galaxy. To access the data of ## Create an access token in DataHUB -import DataHubPat from '../datahub/datahub-generate-pat.mdx' +import DataHubPat from '../datahub/navigation-settings/datahub-generate-pat.mdx' diff --git a/src/content/docs/start-here/share.mdx b/src/content/docs/start-here/share.mdx index efc6719fc..fe688e3c5 100644 --- a/src/content/docs/start-here/share.mdx +++ b/src/content/docs/start-here/share.mdx @@ -29,7 +29,7 @@ If you do not have an account, please register with DataPLANT to use the PLANTDa
Follow these instructions to sign up -import DataHUBAccount from '../datahub/datahub-account.mdx' +import DataHUBAccount from '../datahub/account/index.mdx' From 91644c68734ebc0f15b16b319b485ba8cc0b598c Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 13:26:30 +0100 Subject: [PATCH 06/29] fix nav links --- astro.config.mts | 2 +- src/content/docs/datahub/arc-features/datahub-arc-wiki.md | 2 +- src/content/docs/datahub/arc-files/datahub-files.mdx | 4 ++-- src/content/docs/datahub/index.mdx | 2 +- .../docs/datahub/navigation-settings/datahub-arc-settings.md | 2 +- .../docs/datahub/navigation-settings/datahub-projects.mdx | 2 +- src/content/docs/guides/arc-enabling-platforms.md | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/astro.config.mts b/astro.config.mts index dfc0b3f68..ddf6f674c 100644 --- a/astro.config.mts +++ b/astro.config.mts @@ -92,7 +92,7 @@ export default defineConfig({ collapsed: true, // autogenerate: { directory: 'datahub' }, items:[ - 'datahub/index', + 'datahub', { label: 'DataPLANT Account', collapsed: false, diff --git a/src/content/docs/datahub/arc-features/datahub-arc-wiki.md b/src/content/docs/datahub/arc-features/datahub-arc-wiki.md index 3426d370c..e03954349 100644 --- a/src/content/docs/datahub/arc-features/datahub-arc-wiki.md +++ b/src/content/docs/datahub/arc-features/datahub-arc-wiki.md @@ -14,4 +14,4 @@ This is particularly useful to keep the ARC clean of files that are not consider ![](@images/datahub/datahub-wiki.png) -To open your ARC's wiki click on **Plan** and then **Wiki** in the sidebar of your [ARC panel](/nfdi4plants.knowledgebase/datahub/datahub-arc-panel). +To open your ARC's wiki click on **Plan** and then **Wiki** in the sidebar of your [ARC panel](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-panel). diff --git a/src/content/docs/datahub/arc-files/datahub-files.mdx b/src/content/docs/datahub/arc-files/datahub-files.mdx index f8163e10c..ac41495ab 100644 --- a/src/content/docs/datahub/arc-files/datahub-files.mdx +++ b/src/content/docs/datahub/arc-files/datahub-files.mdx @@ -16,7 +16,7 @@ The DataHUB allows to directly operate files and folders without using any other -1. From the [ARC panel](/nfdi4plants.knowledgebase/datahub/datahub-arc-panel), navigate to subdirectory of your ARC. +1. From the [ARC panel](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-panel), navigate to subdirectory of your ARC. 2. Click the plus sign next to your ARC's name (1). 3. Select to upload a file (2) or create a new directory (3) in the current directory. 4. This will open a new menu where you can upload your file or choose a name for your new directory. @@ -51,7 +51,7 @@ The DataHUB allows to directly operate files and folders without using any other -1. From the [ARC panel](/nfdi4plants.knowledgebase/datahub/datahub-arc-panel), navigate to subdirectory of your ARC that you would like to download +1. From the [ARC panel](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-panel), navigate to subdirectory of your ARC that you would like to download 2. From the `Code` dropdown menu, you can... 3. ...download the complete ARC including all data files (1) or 4. ...download only the currently displayed directory including all its files (2) diff --git a/src/content/docs/datahub/index.mdx b/src/content/docs/datahub/index.mdx index 589155a17..9773caf04 100644 --- a/src/content/docs/datahub/index.mdx +++ b/src/content/docs/datahub/index.mdx @@ -1,6 +1,6 @@ --- title: General -slug: datahub/index +slug: datahub lastUpdated: 2023-07-07 authors: - dominik-brilhaus diff --git a/src/content/docs/datahub/navigation-settings/datahub-arc-settings.md b/src/content/docs/datahub/navigation-settings/datahub-arc-settings.md index a9ace94ed..61910c2f4 100644 --- a/src/content/docs/datahub/navigation-settings/datahub-arc-settings.md +++ b/src/content/docs/datahub/navigation-settings/datahub-arc-settings.md @@ -8,7 +8,7 @@ sidebar: order: 4.5 --- -To open your ARC's settings click on *Settings* in the sidebar of your [ARC panel](/nfdi4plants.knowledgebase/datahub/datahub-arc-panel). +To open your ARC's settings click on *Settings* in the sidebar of your [ARC panel](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-panel). ![](@images/datahub/datahub-arc-settings.drawio.png) diff --git a/src/content/docs/datahub/navigation-settings/datahub-projects.mdx b/src/content/docs/datahub/navigation-settings/datahub-projects.mdx index 288b7a007..4a973bd47 100644 --- a/src/content/docs/datahub/navigation-settings/datahub-projects.mdx +++ b/src/content/docs/datahub/navigation-settings/datahub-projects.mdx @@ -36,7 +36,7 @@ The projects panel lists all ARCs you have access to. :::note -Once you click on one of the listed ARCs, you are directed to the [ARC panel](/nfdi4plants.knowledgebase/datahub/datahub-arc-panel) of that ARC. +Once you click on one of the listed ARCs, you are directed to the [ARC panel](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-panel) of that ARC. ::: :::tip diff --git a/src/content/docs/guides/arc-enabling-platforms.md b/src/content/docs/guides/arc-enabling-platforms.md index c2a7e26bc..6920dc8d0 100644 --- a/src/content/docs/guides/arc-enabling-platforms.md +++ b/src/content/docs/guides/arc-enabling-platforms.md @@ -30,7 +30,7 @@ As a platform you manage a lot of projects in parallel. Keeping these projects u Here's a few tips to support your project management: - You can use the [wiki associated to the ARC](/nfdi4plants.knowledgebase/datahub/datahub-arc-wiki) to collect meeting minutes with your collaborators -- You can use the [ARC's issue board](/nfdi4plants.knowledgebase/datahub/datahub-arc-panel) to coordinate tasks between collaborators, team members, data analysts and others involved +- You can use the [ARC's issue board](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-panel) to coordinate tasks between collaborators, team members, data analysts and others involved - You can use your established system of identifiers (e.g. for projects, samples) in [ISA metadata](/nfdi4plants.knowledgebase/core-concepts/isa) - You can also keep naming your ARCs with the same way you are used to name your project folders From 8f93639099d0f3360044b4e7d2b64ba1641d8294 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 13:27:12 +0100 Subject: [PATCH 07/29] fix account links --- src/content/docs/datahub/index.mdx | 2 +- src/content/docs/fundamentals/data-sharing.mdx | 2 +- src/content/docs/fundamentals/version-control-git.mdx | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/content/docs/datahub/index.mdx b/src/content/docs/datahub/index.mdx index 9773caf04..57856a247 100644 --- a/src/content/docs/datahub/index.mdx +++ b/src/content/docs/datahub/index.mdx @@ -50,4 +50,4 @@ A plant biologist's day-to-day routines circle around more than just data and co ### Register with DataPLANT -In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/datahub-account) with DataPLANT. +In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/index) with DataPLANT. diff --git a/src/content/docs/fundamentals/data-sharing.mdx b/src/content/docs/fundamentals/data-sharing.mdx index b4edfb15c..0ce36c7d0 100644 --- a/src/content/docs/fundamentals/data-sharing.mdx +++ b/src/content/docs/fundamentals/data-sharing.mdx @@ -33,5 +33,5 @@ In order to support [FAIR](/nfdi4plants.knowledgebase/fundamentals/fair-data-pri ### Register with DataPLANT -In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/datahub-account): with DataPLANT. +In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/index): with DataPLANT. diff --git a/src/content/docs/fundamentals/version-control-git.mdx b/src/content/docs/fundamentals/version-control-git.mdx index aac3534b7..fef27f9b8 100644 --- a/src/content/docs/fundamentals/version-control-git.mdx +++ b/src/content/docs/fundamentals/version-control-git.mdx @@ -37,4 +37,4 @@ Yes, although we spare the technical details here, Git at first glance is comple ### Register with DataPLANT -In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/datahub-account): with DataPLANT. +In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/index): with DataPLANT. From b0d339716b6b0532af13b851068058e0fbc055ab Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 13:37:17 +0100 Subject: [PATCH 08/29] fix links --- src/content/docs/datahub/index.mdx | 2 +- .../navigation-settings/datahub-arc-panel.mdx | 2 +- .../navigation-settings/datahub-generate-pat.mdx | 2 +- .../navigation-settings/datahub-navigation.mdx | 6 +++--- .../docs/datahub/working-together/datahub-groups.mdx | 2 +- .../datahub-invite-collaborators.mdx | 2 +- src/content/docs/datahub/working-together/index.mdx | 12 ++++++------ src/content/docs/fundamentals/data-sharing.mdx | 2 +- .../docs/fundamentals/version-control-git.mdx | 2 +- src/content/docs/git/git-selective-sharing.mdx | 2 +- src/content/docs/guides/arc-enabling-platforms.md | 8 ++++---- src/content/docs/resources/arc-vs-code.mdx | 2 +- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/content/docs/datahub/index.mdx b/src/content/docs/datahub/index.mdx index 57856a247..ef98be282 100644 --- a/src/content/docs/datahub/index.mdx +++ b/src/content/docs/datahub/index.mdx @@ -50,4 +50,4 @@ A plant biologist's day-to-day routines circle around more than just data and co ### Register with DataPLANT -In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/index) with DataPLANT. +In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/) with DataPLANT. diff --git a/src/content/docs/datahub/navigation-settings/datahub-arc-panel.mdx b/src/content/docs/datahub/navigation-settings/datahub-arc-panel.mdx index e8780852b..981241e7b 100644 --- a/src/content/docs/datahub/navigation-settings/datahub-arc-panel.mdx +++ b/src/content/docs/datahub/navigation-settings/datahub-arc-panel.mdx @@ -21,7 +21,7 @@ In the sidebar to the left, you can 1. manage the project (1), e.g. invite members to the ARC 2. plan (2) and organize tasks in issue lists and boards or take notes in a wiki to your ARC, 3. check any merge requests or commit history (3), -4. adapt the [settings (4) of the ARC](/nfdi4plants.knowledgebase/datahub/datahub-arc-settings/). +4. adapt the [settings (4) of the ARC](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-settings). diff --git a/src/content/docs/datahub/navigation-settings/datahub-generate-pat.mdx b/src/content/docs/datahub/navigation-settings/datahub-generate-pat.mdx index 16ed15cbe..ed75b4d1e 100644 --- a/src/content/docs/datahub/navigation-settings/datahub-generate-pat.mdx +++ b/src/content/docs/datahub/navigation-settings/datahub-generate-pat.mdx @@ -12,7 +12,7 @@ This guide shows you how to generate a Personal Access Token (PAT). The PAT can 1. Sign in to the [DataHUB](https://git.nfdi4plants.org/) -2. Navigate to the [user settings](/nfdi4plants.knowledgebase/datahub/datahub-user-settings) +2. Navigate to the [user settings](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-user-settings) 2. Go to the [Access Tokens](https://git.nfdi4plants.org/-/user_settings/personal_access_tokens) settings 3. Fill all required information: - Token name: e.g. the name of the machine to be linked ("Office PC") (1) diff --git a/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx b/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx index 0f39e318f..5b88d8da1 100644 --- a/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx +++ b/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx @@ -26,7 +26,7 @@ From the navigation bar you can -1. navigate directly to the [projects panel](/nfdi4plants.knowledgebase/datahub/datahub-projects) via the icon in the top-left (1) +1. navigate directly to the [projects panel](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-projects) via the icon in the top-left (1) 2. hide the [side bar](#sidebar) (2) 3. use the search field (3) to find ARCs, users and groups 4. open the [avatar menu](#avatar-menu) (4) @@ -40,7 +40,7 @@ import NavBar from "@images/datahub/datahub-navbar.drawio.png" ### Sidebar -From the side bar in the top-left corner (1) you can navigate to [projects](/nfdi4plants.knowledgebase/datahub/datahub-projects) (2) or [groups](/nfdi4plants.knowledgebase/datahub/datahub-groups) (3) panels. +From the side bar in the top-left corner (1) you can navigate to [projects](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-projects) (2) or [groups](/nfdi4plants.knowledgebase/datahub/working-together/datahub-groups) (3) panels. ![](@images/datahub/datahub-hamburger-menu.drawio.png) @@ -55,7 +55,7 @@ In the avatar menu (1) in the top-right corner of the navigation bar you can 1. find your profile name and user name (2) -2. navigate to the [user settings](/nfdi4plants.knowledgebase/datahub/datahub-user-settings) (3), and +2. navigate to the [user settings](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-user-settings) (3), and 3. sign out (4) of the DataHUB. diff --git a/src/content/docs/datahub/working-together/datahub-groups.mdx b/src/content/docs/datahub/working-together/datahub-groups.mdx index 0c44d67a0..1ab3acaf3 100644 --- a/src/content/docs/datahub/working-together/datahub-groups.mdx +++ b/src/content/docs/datahub/working-together/datahub-groups.mdx @@ -16,7 +16,7 @@ import { Steps } from '@astrojs/starlight/components'; **DataHUB Groups** -- A "Group" is a group of users (then "members" of that group) with specific [permissions](/nfdi4plants.knowledgebase/datahub/datahub-working-together#roles-and-permissions). +- A "Group" is a group of users (then "members" of that group) with specific [permissions](/nfdi4plants.knowledgebase/datahub/working-together/#roles-and-permissions). - A group (e.g. your lab or consortium) can share ARCs. - Groups help you communicate with all group members and manage permissions for the ARCs shared in that group. - Depending on their assigned roles, members of the group can view, edit, delete, and / or maintain the ARCs of that group. diff --git a/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx b/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx index bb647ab07..0e4e11e80 100644 --- a/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx +++ b/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx @@ -49,5 +49,5 @@ import MemberRole from "@images/datahub/datahub-members-seq6.png" :::note -For details about the roles, see [Roles and permissions](/nfdi4plants.knowledgebase/datahub/datahub-working-together#roles-and-permissions) +For details about the roles, see [Roles and permissions](/nfdi4plants.knowledgebase/datahub/working-together/#roles-and-permissions) ::: diff --git a/src/content/docs/datahub/working-together/index.mdx b/src/content/docs/datahub/working-together/index.mdx index 20192964d..e641d9c52 100644 --- a/src/content/docs/datahub/working-together/index.mdx +++ b/src/content/docs/datahub/working-together/index.mdx @@ -16,14 +16,14 @@ This flexibility can be confusing at first. Generally, ARCs can be uploaded to t The figure below is supposed to give a quick overview. Once an ARC exists in the DataHUB, you can choose between these options to share the ARC with collaborators. -1. You can [invite individual users](/nfdi4plants.knowledgebase/datahub/datahub-invite-collaborators) (1). This is helpful to share an ARC with selected colleagues. -2. To share an ARC with a group of users (e.g. a lab or consortium), you can [invite a group](/nfdi4plants.knowledgebase/datahub/datahub-invite-collaborators) (2). +1. You can [invite individual users](/nfdi4plants.knowledgebase/datahub/working-together/datahub-invite-collaborators) (1). This is helpful to share an ARC with selected colleagues. +2. To share an ARC with a group of users (e.g. a lab or consortium), you can [invite a group](/nfdi4plants.knowledgebase/datahub/working-together/datahub-invite-collaborators) (2). In both cases (1 and 2), the ARC "stays associated" with the original owner only (visible by the [namespace](#namespaces) and URL address). Furthermore the [roles and permissions](#roles-and-permissions) can be set for individual users and groups. -3. Alternatively, you [can create a fork of your ARC](/nfdi4plants.knowledgebase/datahub/datahub-arc-fork) (3). This generates a copy linked to the original ARC, but now associated with the group. This can be used to share an ARC at a certain stage, without sharing the full progress after that stage. However, since the two ARCs (the original and the fork) can now be developed independently. This can easily lead to divergence and requires a bit more technical expertise to keep both ARCs in sync (if desired). +3. Alternatively, you [can create a fork of your ARC](/nfdi4plants.knowledgebase/datahub/arc-features/datahub-arc-fork) (3). This generates a copy linked to the original ARC, but now associated with the group. This can be used to share an ARC at a certain stage, without sharing the full progress after that stage. However, since the two ARCs (the original and the fork) can now be developed independently. This can easily lead to divergence and requires a bit more technical expertise to keep both ARCs in sync (if desired). -4. Finally, you can [transfer your ARC](/nfdi4plants.knowledgebase/datahub/datahub-arc-settings#advanced-settings) to a group (4). This moves the ARC to new namespace (that of the group). +4. Finally, you can [transfer your ARC](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-settings#advanced-settings) to a group (4). This moves the ARC to new namespace (that of the group). In both cases 3 and 4, you must have at least [maintainer access](#roles-and-permissions) to the group. If you have maintainer access, you can also directly create or upload an ARC to a group. @@ -32,7 +32,7 @@ If you have maintainer access, you can also directly create or upload an ARC to ### Visibility -The visibility of ARCs and groups can be managed individually for each ARC (see [ARC settings](/nfdi4plants.knowledgebase/datahub/datahub-arc-settings)) or group see ([Creating a Group](/nfdi4plants.knowledgebase/datahub/datahub-groups)). +The visibility of ARCs and groups can be managed individually for each ARC (see [ARC settings](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-settings)) or group see ([Creating a Group](/nfdi4plants.knowledgebase/datahub/working-together/datahub-groups)). - **Private** – Access must be granted explicitly to each user or a group. - **Internal** – Can be accessed by any logged in user. @@ -62,7 +62,7 @@ A detailed list of all permissions for the individual roles can be found [here]( In the DataHUB, namespaces help organize related projects. - Every user has a personal namespace, where they can upload or create new ARCs. -- Every [group and subgroup](/nfdi4plants.knowledgebase/datahub/datahub-groups) has an own namespace, respectively. +- Every [group and subgroup](/nfdi4plants.knowledgebase/datahub/working-together/datahub-groups) has an own namespace, respectively. You can check the namespace by looking at the URL. For example: diff --git a/src/content/docs/fundamentals/data-sharing.mdx b/src/content/docs/fundamentals/data-sharing.mdx index 0ce36c7d0..91edbbd44 100644 --- a/src/content/docs/fundamentals/data-sharing.mdx +++ b/src/content/docs/fundamentals/data-sharing.mdx @@ -33,5 +33,5 @@ In order to support [FAIR](/nfdi4plants.knowledgebase/fundamentals/fair-data-pri ### Register with DataPLANT -In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/index): with DataPLANT. +In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/): with DataPLANT. diff --git a/src/content/docs/fundamentals/version-control-git.mdx b/src/content/docs/fundamentals/version-control-git.mdx index fef27f9b8..fdf2185ff 100644 --- a/src/content/docs/fundamentals/version-control-git.mdx +++ b/src/content/docs/fundamentals/version-control-git.mdx @@ -37,4 +37,4 @@ Yes, although we spare the technical details here, Git at first glance is comple ### Register with DataPLANT -In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/index): with DataPLANT. +In order to use the [DataHUB](/nfdi4plants.knowledgebase/datahub) and other DataPLANT infrastructure and services, please [sign up](/nfdi4plants.knowledgebase/datahub/account/): with DataPLANT. diff --git a/src/content/docs/git/git-selective-sharing.mdx b/src/content/docs/git/git-selective-sharing.mdx index 3d0cb9679..b2f623af2 100644 --- a/src/content/docs/git/git-selective-sharing.mdx +++ b/src/content/docs/git/git-selective-sharing.mdx @@ -12,7 +12,7 @@ This requires to work with [git branches](/nfdi4plants.knowledgebase/git/git-wor ## Scenario -You have [shared an ARC](/nfdi4plants.knowledgebase/datahub/datahub-working-together) via fork with a group. +You have [shared an ARC](/nfdi4plants.knowledgebase/datahub/working-together/) via fork with a group. Since you forked your ARC, you have progressed on your private version of the ARC, which now includes files that should not all be visible to the group. You still want to share some data (e.g. metadata) files with the group. ## Solution diff --git a/src/content/docs/guides/arc-enabling-platforms.md b/src/content/docs/guides/arc-enabling-platforms.md index 6920dc8d0..8633c6cda 100644 --- a/src/content/docs/guides/arc-enabling-platforms.md +++ b/src/content/docs/guides/arc-enabling-platforms.md @@ -29,7 +29,7 @@ As a platform you manage a lot of projects in parallel. Keeping these projects u Here's a few tips to support your project management: -- You can use the [wiki associated to the ARC](/nfdi4plants.knowledgebase/datahub/datahub-arc-wiki) to collect meeting minutes with your collaborators +- You can use the [wiki associated to the ARC](/nfdi4plants.knowledgebase/datahub/arc-features/datahub-arc-wiki) to collect meeting minutes with your collaborators - You can use the [ARC's issue board](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-panel) to coordinate tasks between collaborators, team members, data analysts and others involved - You can use your established system of identifiers (e.g. for projects, samples) in [ISA metadata](/nfdi4plants.knowledgebase/core-concepts/isa) - You can also keep naming your ARCs with the same way you are used to name your project folders @@ -109,7 +109,7 @@ They are optional and it depends on your platform, whether these are suitable. - Where can they find what information (protocols, datasets, results)? - How can they interact with the ARC (upload, download, edit) -☑️ **[Create a DataHUB group](/nfdi4plants.knowledgebase/datahub/datahub-groups) for your platform** +☑️ **[Create a DataHUB group](/nfdi4plants.knowledgebase/datahub/working-together/datahub-groups) for your platform** - A group can help you organize all running projects in one place - You can easily manage access for multiple ARCs @@ -153,7 +153,7 @@ Running a central platform, you probably follow an established project managemen This is particularly important for how project folders are named on your platform's data storage. When implementing ARCs, **you do not need to change** this system. You can simply name the ARCs the same way you are used to name your project folders. -This is also true for scenario B exemplified above. You can simply [fork](/nfdi4plants.knowledgebase/datahub/datahub-arc-fork) the collaborator's ARC and rename it according to your system. +This is also true for scenario B exemplified above. You can simply [fork](/nfdi4plants.knowledgebase/datahub/arc-features/datahub-arc-fork) the collaborator's ARC and rename it according to your system. :::tip You might want to consider requiring your collaborators to name the assays (folder name or assay identifier) according to your project management system. @@ -163,4 +163,4 @@ You might want to consider requiring your collaborators to name the assays (fold Not all of your collaborators use ARCs or are planning to do so, but you still want to interact with those collaborators in the same routines employed with everyone else. They only need a DataHUB account and they can simply sign in with their existing scientific account. -The DataHUB comes with [built-in features](/nfdi4plants.knowledgebase/datahub/datahub-files) that allow interaction with the ARC solely via the web browser without any addional tools. +The DataHUB comes with [built-in features](/nfdi4plants.knowledgebase/datahub/arc-files/datahub-files) that allow interaction with the ARC solely via the web browser without any addional tools. diff --git a/src/content/docs/resources/arc-vs-code.mdx b/src/content/docs/resources/arc-vs-code.mdx index 4afe665a3..2d87ca947 100644 --- a/src/content/docs/resources/arc-vs-code.mdx +++ b/src/content/docs/resources/arc-vs-code.mdx @@ -35,7 +35,7 @@ You can also use the extension right away in the [DataHUB](/nfdi4plants.knowledg 1. Log in to the [PLANTDataHUB](https://git.nfdi4plants.org) -2. Navigate to the [user preferences](/nfdi4plants.knowledgebase/datahub/datahub-user-settings) +2. Navigate to the [user preferences](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-user-settings) 3. Scroll to the section "Integrations" at the bottom 4. Check the box "Enable extension marketplace" to activate vscode extensions and click `Save changes` From a98b822aa267f6287b6daf885e094e53eed48b7a Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 13:48:49 +0100 Subject: [PATCH 09/29] polish datahub headings --- .../navigation-settings/datahub-navigation.mdx | 6 +++--- .../datahub/navigation-settings/datahub-projects.mdx | 4 ++-- .../docs/datahub/working-together/datahub-groups.mdx | 12 +++++------- .../datahub-invite-collaborators.mdx | 9 +-------- src/content/docs/datahub/working-together/index.mdx | 6 +++--- 5 files changed, 14 insertions(+), 23 deletions(-) diff --git a/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx b/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx index 5b88d8da1..c7f61e003 100644 --- a/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx +++ b/src/content/docs/datahub/navigation-settings/datahub-navigation.mdx @@ -20,7 +20,7 @@ This is the DataHUB landing page: Once you are logged in, the following menus are available for navigation. ::: -### Navigation Bar +## Navigation Bar From the navigation bar you can @@ -38,7 +38,7 @@ import NavBar from "@images/datahub/datahub-navbar.drawio.png" -### Sidebar +## Sidebar From the side bar in the top-left corner (1) you can navigate to [projects](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-projects) (2) or [groups](/nfdi4plants.knowledgebase/datahub/working-together/datahub-groups) (3) panels. @@ -48,7 +48,7 @@ From the side bar in the top-left corner (1) you can navigate to [projects](/nfd Once you navigate to an ARC ("Project") or group, the sidebar shows other options. ::: -### Avatar Menu +## Avatar Menu In the avatar menu (1) in the top-right corner of the navigation bar you can diff --git a/src/content/docs/datahub/navigation-settings/datahub-projects.mdx b/src/content/docs/datahub/navigation-settings/datahub-projects.mdx index 4a973bd47..14dbb54d7 100644 --- a/src/content/docs/datahub/navigation-settings/datahub-projects.mdx +++ b/src/content/docs/datahub/navigation-settings/datahub-projects.mdx @@ -9,13 +9,13 @@ sidebar: import { Steps } from '@astrojs/starlight/components'; -**Project = ARC** +## Project = ARC - In the DataHUB, ARCs are called "projects"; they are the same. - An ARC can be shared with individual users (invited as "members") or a group. :::note -"Projects" and "Groups" are not the same +"Projects" and ["Groups"](/nfdi4plants.knowledgebase/datahub/working-together/datahub-groups) are not the same ::: ## Projects panel diff --git a/src/content/docs/datahub/working-together/datahub-groups.mdx b/src/content/docs/datahub/working-together/datahub-groups.mdx index 1ab3acaf3..3c165a586 100644 --- a/src/content/docs/datahub/working-together/datahub-groups.mdx +++ b/src/content/docs/datahub/working-together/datahub-groups.mdx @@ -10,13 +10,7 @@ sidebar: import { Steps } from '@astrojs/starlight/components'; -:::note -"Projects" and "Groups" are not the same -::: - -**DataHUB Groups** - -- A "Group" is a group of users (then "members" of that group) with specific [permissions](/nfdi4plants.knowledgebase/datahub/working-together/#roles-and-permissions). +- A "Group" is a group of users (then called "members" of that group) with specific [permissions](/nfdi4plants.knowledgebase/datahub/working-together/#roles-and-permissions). - A group (e.g. your lab or consortium) can share ARCs. - Groups help you communicate with all group members and manage permissions for the ARCs shared in that group. - Depending on their assigned roles, members of the group can view, edit, delete, and / or maintain the ARCs of that group. @@ -24,6 +18,10 @@ import { Steps } from '@astrojs/starlight/components'; - The group Owner(s) can adjust group settings and manage group memberships. - Groups can have subgroups (with a subset of members). Subgroups have the same features as groups. +:::note +"Projects" and "Groups" are not the same +::: + ## Groups panel diff --git a/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx b/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx index 0e4e11e80..83ad863b2 100644 --- a/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx +++ b/src/content/docs/datahub/working-together/datahub-invite-collaborators.mdx @@ -10,14 +10,7 @@ sidebar: import { Steps } from '@astrojs/starlight/components'; -:::note[About this guide] -In this guide we show you how to invite lab colleagues or project partners to your ARC for collaboration. -::: - -:::tip[Before we can start] -- You have a [DataPLANT account](https://register.nfdi4plants.org) -- You have created an ARC -::: +Here we show you how to invite lab colleagues or project partners to your ARC for collaboration. import MemberSelect from "@images/datahub/datahub-members-seq5.png" import MemberRole from "@images/datahub/datahub-members-seq6.png" diff --git a/src/content/docs/datahub/working-together/index.mdx b/src/content/docs/datahub/working-together/index.mdx index e641d9c52..a2435e789 100644 --- a/src/content/docs/datahub/working-together/index.mdx +++ b/src/content/docs/datahub/working-together/index.mdx @@ -30,7 +30,7 @@ If you have maintainer access, you can also directly create or upload an ARC to ![](@images/datahub/arc-sharing.drawio.png) -### Visibility +## Visibility The visibility of ARCs and groups can be managed individually for each ARC (see [ARC settings](/nfdi4plants.knowledgebase/datahub/navigation-settings/datahub-arc-settings)) or group see ([Creating a Group](/nfdi4plants.knowledgebase/datahub/working-together/datahub-groups)). @@ -42,7 +42,7 @@ The visibility of ARCs and groups can be managed individually for each ARC (see By default every ARC and every group is set to **private**. ::: -### Roles and permissions +## Roles and permissions If you create or upload an ARC to the DataHUB, you are the *Owner* by default. When inviting new members to an ARC or group, you can choose between different roles. @@ -57,7 +57,7 @@ When inviting new members to an ARC or group, you can choose between different r A detailed list of all permissions for the individual roles can be found [here](https://docs.gitlab.com/ee/user/permissions.html) ::: -### Namespaces +## Namespaces In the DataHUB, namespaces help organize related projects. From 4301e3ee4b2bd1c2da1a83eb89cbec99c60d44e7 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:07:07 +0100 Subject: [PATCH 10/29] add article template --- .github/ISSUE_TEMPLATE/article.yml | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/article.yml diff --git a/.github/ISSUE_TEMPLATE/article.yml b/.github/ISSUE_TEMPLATE/article.yml new file mode 100644 index 000000000..0f61c161e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/article.yml @@ -0,0 +1,53 @@ +name: Add article +description: Add an article to the DataPLANT Knowledge Base +title: "[Add article]: " +body: + - type: input + id: title + attributes: + label: Article Title + validations: + required: true + - type: input + id: section + attributes: + label: Section + description: Please name the section where this article should be added + validations: + required: false + - type: textarea + id: authors + attributes: + label: Name article authors + description: Please add the article author(s) + value: | + name: + # Optionally add socials and affiliation + socials: + - icon: simple-icons:github + href: https://github.com/ + - icon: simple-icons:orcid + href: https://orcid.org/ + affiliation: + # Optionally add author image. Can be uploaded via comment below this issue. + image: "@images/authors/" + render: markdown + validations: + required: true + - type: textarea + id: content + attributes: + label: Article content + value: | + # First level heading + + ## Second level heading + + + 1. Install ... + 2. Open ... + 2. Click ... + + render: markdown + validations: + required: true \ No newline at end of file From 7be944547a41a4476db08b725d89398b05a30cb2 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:09:40 +0100 Subject: [PATCH 11/29] article template --- .github/ISSUE_TEMPLATE/article.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/article.yml b/.github/ISSUE_TEMPLATE/article.yml index 0f61c161e..7dbef93e8 100644 --- a/.github/ISSUE_TEMPLATE/article.yml +++ b/.github/ISSUE_TEMPLATE/article.yml @@ -1,11 +1,11 @@ name: Add article -description: Add an article to the DataPLANT Knowledge Base +description: Add an article. This is just a low-barrier entrypoint. Please consider opening a PR for contributions. title: "[Add article]: " body: - type: input id: title attributes: - label: Article Title + label: Article title validations: required: true - type: input From aa549bb796e9bc0eed0f61a7dee8ecbb07021a57 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:11:14 +0100 Subject: [PATCH 12/29] simplify --- .github/ISSUE_TEMPLATE/article.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/article.yml b/.github/ISSUE_TEMPLATE/article.yml index 7dbef93e8..de98ae2cd 100644 --- a/.github/ISSUE_TEMPLATE/article.yml +++ b/.github/ISSUE_TEMPLATE/article.yml @@ -1,13 +1,9 @@ name: Add article description: Add an article. This is just a low-barrier entrypoint. Please consider opening a PR for contributions. title: "[Add article]: " +labels: + - content body: - - type: input - id: title - attributes: - label: Article title - validations: - required: true - type: input id: section attributes: @@ -15,10 +11,16 @@ body: description: Please name the section where this article should be added validations: required: false + - type: input + id: title + attributes: + label: Title + validations: + required: true - type: textarea id: authors attributes: - label: Name article authors + label: Authors description: Please add the article author(s) value: | name: @@ -37,7 +39,7 @@ body: - type: textarea id: content attributes: - label: Article content + label: Content value: | # First level heading From d2e56003ed77fa5d99bf706c355d7f5230bf0109 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:17:36 +0100 Subject: [PATCH 13/29] bug template --- .github/ISSUE_TEMPLATE/bug.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug.yml diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml new file mode 100644 index 000000000..96dbab790 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -0,0 +1,20 @@ +name: Report a bug +title: "[bug]" +description: If you found a bug, error, inconsistency, please let us know. +labels: + - bug +body: + - type: input + id: url + attributes: + label: Article URL + description: Please paste the URL of the buggy article + validations: + required: true + - type: textarea + id: description + attributes: + label: Bug + description: Please describe the bug. + validations: + required: true From c2153766c865a99f416a2bd3fcf701935f034fd4 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:25:58 +0100 Subject: [PATCH 14/29] article request template --- .../{article.yml => article-add.yml} | 2 +- .github/ISSUE_TEMPLATE/article-request.yml | 43 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) rename .github/ISSUE_TEMPLATE/{article.yml => article-add.yml} (98%) create mode 100644 .github/ISSUE_TEMPLATE/article-request.yml diff --git a/.github/ISSUE_TEMPLATE/article.yml b/.github/ISSUE_TEMPLATE/article-add.yml similarity index 98% rename from .github/ISSUE_TEMPLATE/article.yml rename to .github/ISSUE_TEMPLATE/article-add.yml index de98ae2cd..afafaacbd 100644 --- a/.github/ISSUE_TEMPLATE/article.yml +++ b/.github/ISSUE_TEMPLATE/article-add.yml @@ -2,7 +2,7 @@ name: Add article description: Add an article. This is just a low-barrier entrypoint. Please consider opening a PR for contributions. title: "[Add article]: " labels: - - content + - new-content body: - type: input id: section diff --git a/.github/ISSUE_TEMPLATE/article-request.yml b/.github/ISSUE_TEMPLATE/article-request.yml new file mode 100644 index 000000000..d275a8ba7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/article-request.yml @@ -0,0 +1,43 @@ +name: Request an article +description: If you're missing a guide about a tool, software or concept, please let us know. +title: "[Add article]: " +labels: + - missing-content +body: + - type: dropdown + id: type + attributes: + label: Type + description: What type of article are you looking for? + multiple: true + options: + - guide + - tool tutorial + - introduction to concept + - use-case + - type: textarea + id: description + attributes: + label: Description + description: Please describe the article you'd like to see with a few bullet points + value: | + - + - + - + render: markdown + validations: + required: true + - type: textarea + id: related + attributes: + label: Related to + description: Please link related knowledge base content or sections here. + validations: + required: false + - type: textarea + id: resources + attributes: + label: Resources + description: If you have suitable resources on a similar topic, please share them. + validations: + required: false \ No newline at end of file From 4858c8c72278efacf90c2e084cc32b92b0e6e234 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:51:20 +0100 Subject: [PATCH 15/29] polish contribution guide --- CONTRIBUTING.md | 107 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 90 insertions(+), 17 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0838f5ffd..a43ec45f8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,64 @@ Your contribution to the DataPLANT Knowledge Base is highly appreciated. This guide is intended to show you how to contribute new articles and tutorials or review and adapt parts of existing ones. For changes and suggestions, feel free to open a GitHub issue or pull request. -The DataPLANT Knowledge Base is built on [astro starlight](https://starlight.astro.build). Many features not covered here specifically, may be found in their docs. + +- [Setup](#setup) + - [Installation](#installation) + - [Create content and watch locally](#create-content-and-watch-locally) + - [Markdown](#markdown) + - [VSCode](#vscode) +- [Where should I place my content?](#where-should-i-place-my-content) + - [Start here](#start-here) + - [Guides](#guides) + - [``](#specific-tool) + - [Core concepts](#core-concepts) +- [Style Guide](#style-guide) + - [Use Starlight components](#use-starlight-components) + - [General file information](#general-file-information) + - [Authors](#authors) + - [Images](#images) + - [html](#html) + - [Tables](#tables) + - [Lists](#lists) + - [Article cross-references](#article-cross-references) + +## Setup + +The DataPLANT Knowledge Base is built on [astro starlight](https://starlight.astro.build). Many features not covered here specifically may be found in their docs. + +Starlight itself builds on [Astro](https://astro.build). Please check out [their website](https://docs.astro.build/en/install-and-setup/) for detailed installation instructions. + +### Installation + +1. Install [Node JS](https://nodejs.org/) +2. Clone the Knowledge Base repository via `git clone https://github.com/nfdi4plants/nfdi4plants.knowledgebase` + +### Create content and watch locally + +1. Install package dependencies via `npm install` +2. Start the knowledge base in watch mode via `npm run dev` + +#### Check for dead links + +Especially when moving or cross-linking files (other articles or images), make sure to test build the site via `npm run build`! This validates all links (cross-references between articles and image links). + +### Markdown + +All articles are written in markdown (.md or .mdx). +See https://starlight.astro.build/guides/authoring-content/ for a short introduction. + +### VSCode + +We recommend working with VSCode to generate content. + +Recommended VSCode extensions: + +- Astro Build: https://marketplace.visualstudio.com/items?itemName=astro-build.astro-vscode +- MDX: https://marketplace.visualstudio.com/items?itemName=unifiedjs.vscode-mdx + +:::note +The mdx files cannot be previewed in VSCode. Please use `npm run dev` as described above. +::: ## Where should I place my content? @@ -14,7 +71,7 @@ This is a place for content required for the recommended way to work with an ARC Here you can place content for a specific topic or task. Do not place specific tools documentation here. -### +### `` If you have a tool that is used in the context of an ARC, you can place the documentation here. @@ -46,6 +103,31 @@ Always follow **DRY (Don't Repeat Yourself)** principle. If you have the same co In `.mdx` you can not only link to other content, but also directly insert other content in the current file. +### Authors + +Authors listed via a file in [`src/content/authors`](src/content/authors) can easily be mentioned in the yaml header of articles. + +For example `src/content/authors/kevin-frey.yml`: + +```yaml +name: Kevin Frey +image: "@images/authors/kevin-frey.jpg" +socials: + - icon: simple-icons:github + href: https://github.com/Freymaurer + - icon: simple-icons:orcid + href: https://orcid.org/0000-0002-8510-6810 +affiliation: DataPLANT +styling: + text: KFR +``` + +The author is linked simply via yaml article metadata + +```yaml +authors: + - kevin-frey +``` ### Images @@ -83,9 +165,12 @@ import MacOSSecurity from "@images/arcitect/macos-security.png" ``` -### `
` +### html -Don't! +Try to avoid html as it will usually override the consistent page design. +**Don't use `
`!** + +If you really need some special design or styling, raise an issue or contact the main contributors to discuss. ### Tables @@ -133,16 +218,4 @@ Instead, use references starting from the `docs` folder as root and add `/nfdi4p ```md [wiki associated to the ARC](/nfdi4plants.knowledgebase/datahub/datahub-arc-wiki) -``` - -### Test-build locally - -On a fresh clone of this repository, run - -1. `npm install` to install the latest package dependencies -2. `npm run dev` to render the knowledge base in watch mode - -#### Check for dead links - -Especially when editing / adding / moving files, make sure to test build the site via `npm run build`. -This validates all links (cross-references between articles and image links). +``` \ No newline at end of file From d1222d410cfb26b35bc423804eadc08d2977e5da Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:54:41 +0100 Subject: [PATCH 16/29] small note --- CONTRIBUTING.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a43ec45f8..94cab08ab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,6 +6,7 @@ Your contribution to the DataPLANT Knowledge Base is highly appreciated. This gu - [Setup](#setup) - [Installation](#installation) - [Create content and watch locally](#create-content-and-watch-locally) + - [Check for dead links](#check-for-dead-links) - [Markdown](#markdown) - [VSCode](#vscode) - [Where should I place my content?](#where-should-i-place-my-content) @@ -39,9 +40,9 @@ Starlight itself builds on [Astro](https://astro.build). Please check out [their 1. Install package dependencies via `npm install` 2. Start the knowledge base in watch mode via `npm run dev` -#### Check for dead links +### Check for dead links -Especially when moving or cross-linking files (other articles or images), make sure to test build the site via `npm run build`! This validates all links (cross-references between articles and image links). +Especially when moving or cross-linking files (other articles or images), make sure to test-build the site via `npm run build`! This validates all links. ### Markdown From 24b4a7553cb0191447e3eedd34a46b887a7d210c Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 14:59:15 +0100 Subject: [PATCH 17/29] Update CONTRIBUTING.md --- CONTRIBUTING.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 94cab08ab..d77da0955 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -58,9 +58,7 @@ Recommended VSCode extensions: - Astro Build: https://marketplace.visualstudio.com/items?itemName=astro-build.astro-vscode - MDX: https://marketplace.visualstudio.com/items?itemName=unifiedjs.vscode-mdx -:::note -The mdx files cannot be previewed in VSCode. Please use `npm run dev` as described above. -::: +💡 The mdx files cannot be previewed in VSCode. Please use `npm run dev` as described above. ## Where should I place my content? @@ -219,4 +217,4 @@ Instead, use references starting from the `docs` folder as root and add `/nfdi4p ```md [wiki associated to the ARC](/nfdi4plants.knowledgebase/datahub/datahub-arc-wiki) -``` \ No newline at end of file +``` From 5b61737e32e4fb726e9387ae55833a5db4b1a0db Mon Sep 17 00:00:00 2001 From: Heinrich Lukas Weil Date: Wed, 13 Nov 2024 16:45:38 +0100 Subject: [PATCH 18/29] add some more introduction text to datamap and datafragments part --- src/content/docs/start-here/datamap.mdx | 32 +++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/content/docs/start-here/datamap.mdx b/src/content/docs/start-here/datamap.mdx index 3a2728f49..0af6ce1ca 100644 --- a/src/content/docs/start-here/datamap.mdx +++ b/src/content/docs/start-here/datamap.mdx @@ -13,13 +13,23 @@ import { Card } from '@astrojs/starlight/components'; import Datamap from "@images/start-here/arc-prototypic-datamap.svg" - + +The data file I added contained multiple columns, but in the assay annotation, I just pointed to the file. Is there a way to specify which column belongs to which sample? + -A Datamap allows you to precisely annotate data points in dataset files. +Yes! For this, you can make use of data fragment selectors. + +:::note +A data fragment selector is a short text which can be added to a file path to specify a portion of the file. +::: + +There exist various different file formats and each requires its own format for annotating fragments as well. +In this case, we have a tabular file format, for which our tooling provides help. ## Point into results – Fragment selectors -Using fragment selectors the provenence from the laboratory sample to the data point can persistently be described. +We will now add fragment selectors to the previously created annotation table to connect our sample to specific data points. +This way, provenance is persistently tracked. Data scientists down the line will thank you for the increase in machine-actionability. ![](@images/start-here/arc-prototypic-datamap-fragment-selectors1.svg) @@ -51,7 +61,21 @@ Using fragment selectors the provenence from the laboratory sample to the data p ## DataMAP: Annotation for the fragment selectors -A datamap allows to add structured annotation to data in a similar manner as with biological materials. + +Wow, this is very verbose, even though it only took a few clicks! And you say data scientists will now finally be content? + + +Yes, but with a final bit of annotation you might even raise a smile from them. +Using data fragment selectors in the annotation table, you described the provenance between your samples and your data fragments. + +In addition, you can further annotate these data fragments and even those which do not directly originate from any of the samples. +For example, the column containing the names of the sugars. This annotation is done in the datamap. + +:::note +A datamap is a collection of data fragment selectors with additional annotation, meant to aid with parsing and understanding the contents of the file. +::: + +We'll now use the datamap to annotate all columns in the file, even the ones we didn't cover in the annotation table. ![](@images/start-here/arc-prototypic-datamap-fragment-selectors2.svg) From f6961f0f7c3c294983453ad12b441a5e22df5d36 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 16:57:07 +0100 Subject: [PATCH 19/29] remove old user journey (now replaced by start here guide) --- src/content/docs/vault/arc-user-journey.mdx | 71 --------------------- 1 file changed, 71 deletions(-) delete mode 100644 src/content/docs/vault/arc-user-journey.mdx diff --git a/src/content/docs/vault/arc-user-journey.mdx b/src/content/docs/vault/arc-user-journey.mdx deleted file mode 100644 index 67c48df56..000000000 --- a/src/content/docs/vault/arc-user-journey.mdx +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: ARC User Journey -lastUpdated: 2022-08-05 -authors: - - martin-kuhl -status: published -pagefind: false ---- - -## About this guide - -In this guide we focus on explaining the ARC structure and its different components. - -## Viola's ARC - -Let's imagine a scenario where your project partner suggests at a conference to use this cool new Annotated Research Context (ARC) for your collaboration. Convinced by the versioning system and the single point of entry logic, you are motivated to set up your first own ARC after returning to the lab and fill it with your latest project results. Back home, however, you only remember the basic ARC structure and something about some isa.xlsx files. So how do you transfer your project into the empty ARC your project partner shared with you? - -import BaseArc from '@components/mdx/BaseARC.mdx' - - - -To answer this question, we will first take a look back at Viola's [metadata](/nfdi4plants.knowledgebase/fundamentals/metadata)example: - -> Viola investigates the effect of the plant circadian clock on sugar metabolism in *W. mirabilis*. For her PhD project, which is part of an EU-funded consortium in Prof. Beetroot's lab, she acquires seeds from a South-African Botanical Society. Viola grows the plants under different light regimes, harvests leaves from a two-day time series experiment, extracts polar metabolites as well as RNA and submits the samples to nearby core facilities for metabolomics and transcriptomics measurements, respectively. After a few weeks of iterative consultation with the facilities' heads as well as technicians and computational biologists involved, Viola receives back a wealth of raw and processed data. From the data she produces figures and wraps everything up to publish the results in the *Journal of Wonderful Plant Sciences*. - -The entire information given in this example can be stored within an ARC. To illustrate the [ARC specifications](/nfdi4plants.knowledgebase/core-concepts/arc/#arc-specification), we will highlight and explain every (sub)directory and ISA-file of the ARC with references to Viola's example. - -## isa.investigation.xlsx - -The ISA investigation workbook allows you to record administrative metadata of your project. In Viola's example, the title of the project, the contact persons, and related publications correspond to such metadata. Besides that, the workbook can also contain a short description of your project, but also lists included studies with respective design types, assays, protocols, etc.. Although we recommend to use the [ARC Commander](/nfdi4plants.knowledgebase/arc-commander) for adding these metadata, you can of course fill the workbook (and also the [isa.study.xlsx](#isastudyxlsx) and [isa.assay.xlsx](#isaassayxlsx)) manually. - -## Studies - -In the `studies` (sub)folders you can collect material and resources used within your studies. Corresponding information in Viola's project include the source of her seeds (South-African Botanical Society), how she grew the plants, and the design of the experiment (two-day time series, etc.). - -In case your investigation contains more than one study, each of these studies is placed in an individual subdirectory. The "resources" directory allows you to store material samples or external data as virtual sample files. You can use the protocol subdirectory to store free-text protocols that describe how the samples or materials were created. - -### isa.study.xlsx - -Every study contains one `isa.study.xlsx` file to specify the characteristics of all material and resources. Resources described in a study file can be the input for one or multiple assays or workflows. The workbook contains (at least) two worksheets: - -- "CircadianClock_Light regimes": One or more worksheets, depending on the number of used protocols, to annotate the properties of your source material following the ISA model. The sheet name is not obligatory to be the exact same as the "Study Identifier". While this can be done manually, we recommend using our ontology supported annotation tool [Swate](/nfdi4plants.knowledgebase/swate). -- "Study": Viola collected the administrative metadata of her study in this worksheet. This information can later be transferred into the `isa.investigation.xlsx` using the [ARC Commander](/nfdi4plants.knowledgebase/arc-commander). - -## Assays - -The `assays` folder allows you to store data and metadata from experimental processes or analytical measurements. Each assay is a collection of files stored in a single directory, including corresponding metadata files in form of an `isa.assay.xlsx`. Viola needs two subdirectories, one for her metabolomics and one for her transcriptomics dataset, respectively. Assay data files and free-text protocols are placed in individual subdirectories. Data files produced by an assay can be the input for one or multiple [workflows](#workflows). - -### isa.assay.xlsx - -Viola can annotate her experimental workflows of the metabolomics and transcriptomics assays with process parameters in the `isa.assay.xlsx` file, which needs to be present for every assay. The workbook contains two or more worksheets, depending on the number of used protocols: - - -- "MetaboliteExtraction": A worksheet to annotate the experimental workflow, in this case for extraction of metabolites. While this can be done manually, we recommend using our ontology supported annotation tool [Swate](/nfdi4plants.knowledgebase/swate). -> Note: Using the name of the protocol for the name of the worksheet can provide clarity. -- "MetaboliteMeasurement": A worksheet that describes the quantification of polar metabolites using gas-chromatography mass-spectrometry. -- "Assay": Viola collected the administrative metadata of her assay in this worksheet. This information can later be transferred into the `isa.investigation.xlsx` using the ARC Commander. - -## Workflows - -In an ARC `workflows` represent the processing steps used in computational analyses and other transformations of data originating from studies and assays. Typical examples include data cleaning and preprocessing, computational analysis, or visualization. The outcomes of these workflows ("run results") are stored in [runs](#runs). - -Viola received for her transcriptome and metabolome assays various processed data files, which she now can use to generate some nice plots. Additionally, the computational biologists sent her the code used for data processing, including an executable Common Workflow Language (CWL) file, which contains a standardized tool or workflow description. She stores these files in individual subdirectories for each workflow. - -## Runs - -After Viola generated her plots, she placed them in individual subdirectories, specific to the run they were generated with. In general, you can use the runs folder to store plots, tables, or similar result files that derive from computations on study, assay, external data or other runs. - -## Cheat sheet - -We hope that these examples nicely illustrated the ARC structure and that you are now ready to produce your own ARCs. Use the figure below as a cheat sheet to remember where to store which files. Or follow the [ARC Commander QuickStart](/nfdi4plants.knowledgebase/arc-commander/arc-commander-quick-start) to try it out yourself. From 5442750fb7bdc18029eb081f3f2a2b739f0cdcec Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 17:41:24 +0100 Subject: [PATCH 20/29] move and polish entry guide --- .../docs/guides/arc-practical-entry.mdx | 40 ++ .../vault/ARC-practical-entry-stepwise.md | 209 ------ src/content/docs/vault/ARC-practical-entry.md | 623 ------------------ 3 files changed, 40 insertions(+), 832 deletions(-) create mode 100644 src/content/docs/guides/arc-practical-entry.mdx delete mode 100644 src/content/docs/vault/ARC-practical-entry-stepwise.md delete mode 100644 src/content/docs/vault/ARC-practical-entry.md diff --git a/src/content/docs/guides/arc-practical-entry.mdx b/src/content/docs/guides/arc-practical-entry.mdx new file mode 100644 index 000000000..7cfdef67f --- /dev/null +++ b/src/content/docs/guides/arc-practical-entry.mdx @@ -0,0 +1,40 @@ +--- +title: Practical guide into the ARC ecosystem +lastUpdated: 2024-11-13 +authors: + - dominik-brilhaus +draft: true +hidden: true +pagefind: false +--- + +In this guide we collect recommendations and considerations on creating an ARC based on your current project and datasets + +## A "final" ARC does not exist – Immutable, yet evolving! + +Filling and structuring an ARC is a gradual process +- You (currently) won't win an award for the best ARC. And while there may be some ARCs that are more intuitive, helpful, insightful or reusable than others, at the very first they should be of help to you. Not being able to produce the perfect ARC right away should not keep you from creating an ARC at all. +- reserachers have different affinities and priorities for what needs to be FAIR first and what can be polished later + +So, try to take it easy, when converting your project into an ARC + +1. At first you might want to just dump the files into your ARC + - One of the ARC's core-concepts is that "everything is a file". + - Your daily work is probably a bunch of files and folders + - So why not just create an "empty" ARC to pack and decorate your files? + - don't worry too much about where to put everything in the first place + - dump what you have into an "additional payload" folder inside the ARC + - Already by doing that, your files are version-controlled via git. + - If you now upload the ARC to the DataHUB, you also have a save copy. + + :::tip + If you have large files (e.g. raw data). You can also dump them first anywhere. If you make sure they are properly Git LFS-tracked, it will be an easy task to later on move the LFS file pointers (rather than the actual large files). This can be done from anywhere. + ::: +2. Add a bit of metadata (e.g. about the project and the data creators) to your `investigation`. This makes it more sharable and citable right away. +2. Sketch your laboratory workflows + - One goal of the ARC is to be able to tell, which finding or result originated from which biological experiment. This would ultimately require to link the dataset files back to the individual sample. To do so, we essentially follow a path of *processes* with *inputs* and *outputs*. Some of the inputs and outputs want to be reused or reproduced, some of the processes want to be applied to other inputs. So before structuring your ARC for the existing dataset, it might help to sketch what was done in the lab. +2. Once you have a good overview and structure that suits your investigation, you can move your files into better places in `studies` and `assays`. Already at that stage, everyone will know where to find your raw data (`dataset`) and the steps you followed to create the data (`protocols`) +3. Before breaking down ("parameterizing") your protocols or computational workflows too all detail into annotation tables, as a first step it already helps a lot to just connect your `studies` and `assays` via the `Input` and `Output` nodes. You can basically re-draw your sketch of lab workflows via tables. And then simply reference the existing free-text protocols (`Protocol REF`). +4. Once you decide to make the data more machine-readable and searchable, you can start to parameterize your protocols and fill out the `study` and `assay` annotation tables. +5. The same applies to your data analysis. No matter if it's based on clickable softwares or code and scripts. Keep it simple first: create virtual `assays` where you simply treat your data analysis as `protocols` and store the results in `dataset` +6. If you want to make scripts more reusable and the data analysis reproducible, wrap them with CWL, use containers and other means of software dependency management \ No newline at end of file diff --git a/src/content/docs/vault/ARC-practical-entry-stepwise.md b/src/content/docs/vault/ARC-practical-entry-stepwise.md deleted file mode 100644 index 14e17a047..000000000 --- a/src/content/docs/vault/ARC-practical-entry-stepwise.md +++ /dev/null @@ -1,209 +0,0 @@ ---- -title: Practical Guide into the ARC ecosystem -lastUpdated: 2023-11-29 -authors: - - dominik-brilhaus -draft: true -hidden: true -pagefind: false ---- - -## About this guide - -In this guide we collect recommendations and considerations on creating an ARC based on your current project and datasets - - -## Convert your project into an ARC - -- you have files and folders -- they are stored somewhere -- pack them / decorate them in an ARC. - -## Sketch your laboratory workflows - -One goal of the ARC is to be able to tell, which finding or result originated from which biological experiment. This would ultimately require to link the dataset files back to the individual sample. To do so, we essentially follow a path of *processes* with *inputs* and *outputs*. Some of the inputs and outputs want to be reused or reproduced, some of the processes want to be applied to other inputs. - -Before creating an ARC for an existing dataset, it might help to visualize what was done in the lab. The following is very simplified example that most plant biologists can hopefully relate to. - -```mermaid - -%%{ - init: { - 'theme': 'base', - 'themeVariables': { - 'background': '#fff', - 'lineColor': '#2d3e50', - 'primaryTextColor': '#2d3e50' - } - } -}%% - -graph TD - -%% Nodes - S1(Seeds) - - S2(Leaves) - - M1(RNA) - M2(protein) - M3(cDNA) - M4(RNASeq Libraries) - M5(SDS-gel) - M6(western blot) - - P1>plant growth] - P2>RNA extraction] - P3>protein extraction] - P4>cDNA synthesis] - P5>qRT-PCR] - P6>Library preparation] - P7>Next Generation Sequencing] - P8>SDS Page] - P9>taking a photo] - P10>Immunoblotting] - P11>mapping] - - D1("qRT results") - D2(fastq files) - D3(Image of \n SDS gel) - D4(reference \n genome) - D5(count table) - - -%% Links - -subgraph Studies - subgraph study:drought - S1 ---P1--drought\nstress--> S2 - end - - subgraph study:heat - P13>plant growth] - - S1 ---P13--heat\nstress--> S4(Leaves) - end - - subgraph study:genome-ref - P12>Download] - x(Paper supplement) ---P12--> D4 - end - -end - - -subgraph Assays - - subgraph assay:Another Assay - P14>Process XY] - D6(Output XY) - - S4 ---P14--> D6 - end - - subgraph assay:qRT-PCR - S2 ---P2--> M1 - M1 ---P4--> M3 - M3 ---P5--> D1 - end - - subgraph assay:SDS-gel - S2 ---P3--> M2 - M2 ---P8--> M5 - M5 ---P9--> D3 - end - - subgraph assay:RNA-Seq - M1 ---P6--> M4 - M4 ---P7--> D2 - end - - subgraph assay:western Blot - M5 ---P10--> M6 - end - -end - -subgraph Worklows/Runs - - subgraph workflow:mapping - D2 --- P11 - D4 --- P11 - end - - subgraph run - P11 --> D5 - end - -end - - - - -%% Add legend -subgraph Legend - Sx(Sample) - Mx(Material) - Dx(Data) - Px>Process] -end - -%% Defining node styles - classDef S fill:#b4ce82, stroke:#333; - classDef M fill:#ffc000; - classDef D fill:#c21f3a,color:white; - classDef P stroke-width:0px; - -%% Assigning styles to nodes - class Sx,S1,S2,S4 S; - class Mx,M1,M2,M3,M4,M5,M6 M; - class Dx,D1,D2,D3,D4,D5,D6 D; - class Px,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14 P; - -%% Box style -style Worklows/Runs fill:#fff, stroke-width:2px, stroke:#333; -style Studies fill:#fff, stroke-width:2px, stroke:#333; -style Assays fill:#fff, stroke-width:2px, stroke:#333; - -``` - -:bulb: On a side note, the above is a very wet-lab heavy example. However, conceptually the same applies to computational workflows. Coders oftentimes design their scripts, workflows and pipelines in successive modules with defined inputs and outputs. - - -## Now action - -Once - - - - -## Work with identifiers - -The ARC and the ISA metadata model offer determined places to - -- `Input` and `Output` fields such as Source Name, Sample Name, Data File Names -- `Protocol REF` - - -### Every file (name) is an identifier diff --git a/src/content/docs/vault/ARC-practical-entry.md b/src/content/docs/vault/ARC-practical-entry.md deleted file mode 100644 index be98e1102..000000000 --- a/src/content/docs/vault/ARC-practical-entry.md +++ /dev/null @@ -1,623 +0,0 @@ ---- -title: Practical Guide into the ARC ecosystem -lastUpdated: 2023-11-29 -authors: - - dominik-brilhaus -draft: true -hidden: true -pagefind: false ---- - -## About this guide - -In this guide we collect recommendations and considerations on creating an ARC based on your current project and datasets - - -## Convert your project into an ARC - -- you have files and folders -- they are stored somewhere -- pack them / decorate them in an ARC. - - -## Sketch your laboratory workflows - -One goal of the ARC is to be able to tell, which finding or result originated from which biological experiment. This would ultimately require to link the dataset files back to the individual sample. To do so, we essentially follow a path of *processes* with *inputs* and *outputs*. Some of the inputs and outputs want to be reused or reproduced, some of the processes want to be applied to other inputs. - -Before creating an ARC for an existing dataset, it might help to visualize what was done in the lab. The following is very simplified example that most plant biologists can hopefully relate to. - - -### Green-house to gene expression - -Consider you want to investigate the effect of drought stress on the transcript levels of you gene of interest (GOI) via qRT-PCR. You grow plants from seeds, drought-stress the plants and collect leaves at the end of the growth study. From the leave samples – homogenized to powder and stored in a freezer – you take an aliquot to extract RNA, from which you synthesize cDNA. The cDNA (together with other biologicals and chemicals) is the input for a qRT-PCR yielding relative transcript levels as the output. - -```mermaid - -%%{ - init: { - 'theme': 'base', - 'themeVariables': { - 'background': '#fff', - 'lineColor': '#2d3e50', - 'primaryTextColor': '#2d3e50' - } - } -}%% - -flowchart LR - -%% Nodes - S1(Seeds) - S2(Leaves) - - M1(RNA) - M3(cDNA) - - P1>plant growth] - P2>RNA extraction] - P4>cDNA synthesis] - P5>qRT-PCR] - - D1("qRT results") - -%% Links - - S1 ---P1--drought\nstress--> S2 - S2 ---P2--> M1 - M1 ---P4--> M3 - M3 ---P5--> D1 - -%% Defining node styles - classDef S fill:#b4ce82, stroke:#333; - classDef M fill:#ffc000; - classDef D fill:#c21f3a,color:white; - classDef P stroke-width:0px; - -%% Assigning styles to nodes - class Sx,S1,S2 S; - class Mx,M1,M2,M3,M4,M5,M6 M; - class Dx,D1,D2,D3,D4,D5 D; - class Px,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13 P; - -``` - -### Confirm findings on protein level - -You found your GOI affected by drought stress on transcript level. To confirm that the expression of the encoded protein is likewise affected, you take another aliquot from the same leave samples, extract proteins, separate them by SDS-PAGE and immunoblot the SDS gel with antibodies specific for your GOI. - -```mermaid - -%%{ - init: { - 'theme': 'base', - 'themeVariables': { - 'background': '#fff', - 'lineColor': '#2d3e50', - 'primaryTextColor': '#2d3e50' - } - } -}%% - - -graph LR - -%% Nodes - S1(Seeds) - S2(Leaves) - - M1(RNA) - M2(protein) - M3(cDNA) - M5(SDS-gel) - M6(western blot) - - P1>plant growth] - P2>RNA extraction] - P3>protein extraction] - P4>cDNA synthesis] - P5>qRT-PCR] - P8>SDS Page] - P9>taking a photo] - P10>Immunoblotting] - - D1("qRT results") - D3(Image of \n SDS gel) - -%% Links - - S1 ---P1--drought\nstress--> S2 - - S2 ---P2--> M1 - S2 ---P3--> M2 - M1 ---P4--> M3 - M3 ---P5--> D1 - M2 ---P8--> M5 - M5 ---P9--> D3 - M5 ---P10--> M6 - -%% Defining node styles - classDef S fill:#b4ce82, stroke:#333; - classDef M fill:#ffc000; - classDef D fill:#c21f3a,color:white; - classDef P stroke-width:0px; - -%% Assigning styles to nodes - class Sx,S1,S2 S; - class Mx,M1,M2,M3,M4,M5,M6 M; - class Dx,D1,D2,D3,D4,D5 D; - class Px,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13 P; - -``` - -### Global overview of gene expression - -You could show that the expression of your GOI was affected by drought on both transcript and protein level. In order to identify transcripts that correlate with your GOI under drought stress, you prepare RNA extracted earlier and submit it to a company for mRNA-Seq. - - -```mermaid - -%%{ - init: { - 'theme': 'base', - 'themeVariables': { - 'background': '#fff', - 'lineColor': '#2d3e50', - 'primaryTextColor': '#2d3e50' - } - } -}%% - - -graph LR - -%% Nodes - S1(Seeds) - - S2(Leaves) - - M1(RNA) - M2(protein) - M3(cDNA) - M4(RNASeq Libraries) - M5(SDS-gel) - M6(western blot) - - P1>plant growth] - P2>RNA extraction] - P3>protein extraction] - P4>cDNA synthesis] - P5>qRT-PCR] - P6>Library preparation] - P7>Next Generation Sequencing] - P8>SDS Page] - P9>taking a photo] - P10>Immunoblotting] - - D1("qRT results") - D2(fastq files) - D3(Image of \n SDS gel) - -%% Links - -S1 ---P1--drought\nstress--> S2 - - S2 ---P2--> M1 - S2 ---P3--> M2 - M1 ---P4--> M3 - M3 ---P5--> D1 - M1 ---P6--> M4 - M4 ---P7--> D2 - M2 ---P8--> M5 - M5 ---P9--> D3 - M5 ---P10--> M6 - -%% Defining node styles - classDef S fill:#b4ce82, stroke:#333; - classDef M fill:#ffc000; - classDef D fill:#c21f3a,color:white; - classDef P stroke-width:0px; - -%% Assigning styles to nodes - class Sx,S1,S2,S4 S; - class Mx,M1,M2,M3,M4,M5,M6 M; - class Dx,D1,D2,D3,D4,D5 D; - class Px,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13 P; - -``` - -### Adding external data - -From the company you receive the RNA-Seq reads in form of fastq files. In order to quantify the reads and generate a count table, you map them against a suitable reference genome downloaded from an online database or publication's supplemental data. - - -```mermaid - -%%{ - init: { - 'theme': 'base', - 'themeVariables': { - 'background': '#fff', - 'lineColor': '#2d3e50', - 'primaryTextColor': '#2d3e50' - } - } -}%% - - -graph LR - -%% Nodes - S1(Seeds) - - S2(Leaves) - - M1(RNA) - M2(protein) - M3(cDNA) - M4(RNASeq Libraries) - M5(SDS-gel) - M6(western blot) - - P1>plant growth] - P2>RNA extraction] - P3>protein extraction] - P4>cDNA synthesis] - P5>qRT-PCR] - P6>Library preparation] - P7>Next Generation Sequencing] - P8>SDS Page] - P9>taking a photo] - P10>Immunoblotting] - P11>mapping] - - D1("qRT results") - D2(fastq files) - D3(Image of \n SDS gel) - D4(reference \n genome) - D5(count table) - -%% Links - S1 ---P1--drought\nstress--> S2 - P12>Download] - x(Paper supplement) ---P12--> D4 - - S2 ---P2--> M1 - S2 ---P3--> M2 - M1 ---P4--> M3 - M3 ---P5--> D1 - M1 ---P6--> M4 - M4 ---P7--> D2 - D2 --- P11 - D4 --- P11 - P11 --> D5 - M2 ---P8--> M5 - M5 ---P9--> D3 - M5 ---P10--> M6 - - -%% Defining node styles - classDef S fill:#b4ce82, stroke:#333; - classDef M fill:#ffc000; - classDef D fill:#c21f3a,color:white; - classDef P stroke-width:0px; - -%% Assigning styles to nodes - class Sx,S1,S2,S4 S; - class Mx,M1,M2,M3,M4,M5,M6 M; - class Dx,D1,D2,D3,D4,D5 D; - class Px,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13 P; - -``` - -### What this could look like in an ARC - -```mermaid - -%%{ - init: { - 'theme': 'base', - 'themeVariables': { - 'background': '#fff', - 'lineColor': '#2d3e50', - 'primaryTextColor': '#2d3e50' - } - } -}%% - - -graph LR - -%% Nodes - S1(Seeds) - - S2(Leaves) - - M1(RNA) - M2(protein) - M3(cDNA) - M4(RNASeq Libraries) - M5(SDS-gel) - M6(western blot) - - P1>plant growth] - P2>RNA extraction] - P3>protein extraction] - P4>cDNA synthesis] - P5>qRT-PCR] - P6>Library preparation] - P7>Next Generation Sequencing] - P8>SDS Page] - P9>taking a photo] - P10>Immunoblotting] - P11>mapping] - - D1("qRT results") - D2(fastq files) - D3(Image of \n SDS gel) - D4(reference \n genome) - D5(count table) - - -%% Links - -subgraph Studies - subgraph study:drought - S1 ---P1--drought\nstress--> S2 - end - - subgraph study:genome-ref - P12>Download] - x(Paper supplement) ---P12--> D4 - end - -end - - -subgraph Assays - - subgraph assay:qRT-PCR - S2 ---P2--> M1 - M1 ---P4--> M3 - M3 ---P5--> D1 - end - - subgraph assay:SDS-gel - S2 ---P3--> M2 - M2 ---P8--> M5 - M5 ---P9--> D3 - end - - subgraph assay:RNA-Seq - M1 ---P6--> M4 - M4 ---P7--> D2 - end - - subgraph assay:western Blot - M5 ---P10--> M6 - end - -end - -subgraph Worklows/Runs - - subgraph workflow:mapping - D2 --- P11 - D4 --- P11 - end - - subgraph run - P11 --> D5 - end - -end - - -%% Add legend -subgraph Legend - Sx(Sample) - Px>Process] - Mx(Material) - Dx(Data) -end - -%% Defining node styles - classDef S fill:#b4ce82, stroke:#333; - classDef M fill:#ffc000; - classDef D fill:#c21f3a,color:white; - classDef P stroke-width:0px; - -%% Assigning styles to nodes - class Sx,S1,S2,S4 S; - class Mx,M1,M2,M3,M4,M5,M6 M; - class Dx,D1,D2,D3,D4,D5 D; - class Px,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13 P; - -%% Box style -style Worklows/Runs fill:#fff, stroke-width:2px, stroke:#333; -style Studies fill:#fff, stroke-width:2px, stroke:#333; -style Assays fill:#fff, stroke-width:2px, stroke:#333; - -``` - - -### Add a new study and sample set - -```mermaid - -%%{ - init: { - 'theme': 'base', - 'themeVariables': { - 'background': '#fff', - 'lineColor': '#2d3e50', - 'primaryTextColor': '#2d3e50' - } - } -}%% - - -graph LR - -%% Nodes - S1(Seeds) - - S2(Leaves) - - M1(RNA) - M2(protein) - M3(cDNA) - M4(RNASeq Libraries) - M5(SDS-gel) - M6(western blot) - - P1>plant growth] - P2>RNA extraction] - P3>protein extraction] - P4>cDNA synthesis] - P5>qRT-PCR] - P6>Library preparation] - P7>Next Generation Sequencing] - P8>SDS Page] - P9>taking a photo] - P10>Immunoblotting] - P11>mapping] - - D1("qRT results") - D2(fastq files) - D3(Image of \n SDS gel) - D4(reference \n genome) - D5(count table) - - -%% Links - -subgraph Studies - subgraph study:drought - S1 ---P1--drought\nstress--> S2 - end - - subgraph study:heat - P13>plant growth] - - S1 ---P13--heat\nstress--> S4(Leaves) - end - - subgraph study:genome-ref - P12>Download] - x(Paper supplement) ---P12--> D4 - end - -end - - -subgraph Assays - - subgraph assay:Another Assay - P14>Process XY] - D6(Output XY) - - S4 ---P14--> D6 - end - - subgraph assay:qRT-PCR - S2 ---P2--> M1 - M1 ---P4--> M3 - M3 ---P5--> D1 - end - - subgraph assay:SDS-gel - S2 ---P3--> M2 - M2 ---P8--> M5 - M5 ---P9--> D3 - end - - subgraph assay:RNA-Seq - M1 ---P6--> M4 - M4 ---P7--> D2 - end - - subgraph assay:western Blot - M5 ---P10--> M6 - end - -end - -subgraph Worklows/Runs - - subgraph workflow:mapping - D2 --- P11 - D4 --- P11 - end - - subgraph run - P11 --> D5 - end - -end - - - - -%% Add legend -subgraph Legend - Sx(Sample) - Px>Process] - Mx(Material) - Dx(Data) -end - -%% Defining node styles - classDef S fill:#b4ce82, stroke:#333; - classDef M fill:#ffc000; - classDef D fill:#c21f3a,color:white; - classDef P stroke-width:0px; - -%% Assigning styles to nodes - class Sx,S1,S2,S4 S; - class Mx,M1,M2,M3,M4,M5,M6 M; - class Dx,D1,D2,D3,D4,D5,D6 D; - class Px,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14 P; - -%% Box style -style Worklows/Runs fill:#fff, stroke-width:2px, stroke:#333; -style Studies fill:#fff, stroke-width:2px, stroke:#333; -style Assays fill:#fff, stroke-width:2px, stroke:#333; - -``` - -:bulb: On a side note, the above is a very wet-lab heavy example. However, conceptually the same applies to computational workflows. Coders oftentimes design their scripts, workflows and pipelines in successive modules with defined inputs and outputs. - - -## Now action - -Once - - - - -## Work with identifiers - -The ARC and the ISA metadata model offer determined places to - -- `Input` and `Output` fields such as Source Name, Sample Name, Data File Names -- `Protocol REF` - - -### Every file (name) is an identifier From d8d3e39544c7c872182c5ab2bbf12c53a62246e0 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 18:03:18 +0100 Subject: [PATCH 21/29] publish practical entry --- .../docs/guides/arc-practical-entry.mdx | 119 +++++++++++++----- 1 file changed, 85 insertions(+), 34 deletions(-) diff --git a/src/content/docs/guides/arc-practical-entry.mdx b/src/content/docs/guides/arc-practical-entry.mdx index 7cfdef67f..67f196690 100644 --- a/src/content/docs/guides/arc-practical-entry.mdx +++ b/src/content/docs/guides/arc-practical-entry.mdx @@ -1,40 +1,91 @@ --- -title: Practical guide into the ARC ecosystem +title: Creating an ARC for Your Project lastUpdated: 2024-11-13 authors: - dominik-brilhaus -draft: true -hidden: true -pagefind: false +sidebar: + order: 0 + badge: + text: new + variant: tip --- -In this guide we collect recommendations and considerations on creating an ARC based on your current project and datasets - -## A "final" ARC does not exist – Immutable, yet evolving! - -Filling and structuring an ARC is a gradual process -- You (currently) won't win an award for the best ARC. And while there may be some ARCs that are more intuitive, helpful, insightful or reusable than others, at the very first they should be of help to you. Not being able to produce the perfect ARC right away should not keep you from creating an ARC at all. -- reserachers have different affinities and priorities for what needs to be FAIR first and what can be polished later - -So, try to take it easy, when converting your project into an ARC - -1. At first you might want to just dump the files into your ARC - - One of the ARC's core-concepts is that "everything is a file". - - Your daily work is probably a bunch of files and folders - - So why not just create an "empty" ARC to pack and decorate your files? - - don't worry too much about where to put everything in the first place - - dump what you have into an "additional payload" folder inside the ARC - - Already by doing that, your files are version-controlled via git. - - If you now upload the ARC to the DataHUB, you also have a save copy. - - :::tip - If you have large files (e.g. raw data). You can also dump them first anywhere. If you make sure they are properly Git LFS-tracked, it will be an easy task to later on move the LFS file pointers (rather than the actual large files). This can be done from anywhere. - ::: -2. Add a bit of metadata (e.g. about the project and the data creators) to your `investigation`. This makes it more sharable and citable right away. -2. Sketch your laboratory workflows - - One goal of the ARC is to be able to tell, which finding or result originated from which biological experiment. This would ultimately require to link the dataset files back to the individual sample. To do so, we essentially follow a path of *processes* with *inputs* and *outputs*. Some of the inputs and outputs want to be reused or reproduced, some of the processes want to be applied to other inputs. So before structuring your ARC for the existing dataset, it might help to sketch what was done in the lab. -2. Once you have a good overview and structure that suits your investigation, you can move your files into better places in `studies` and `assays`. Already at that stage, everyone will know where to find your raw data (`dataset`) and the steps you followed to create the data (`protocols`) -3. Before breaking down ("parameterizing") your protocols or computational workflows too all detail into annotation tables, as a first step it already helps a lot to just connect your `studies` and `assays` via the `Input` and `Output` nodes. You can basically re-draw your sketch of lab workflows via tables. And then simply reference the existing free-text protocols (`Protocol REF`). -4. Once you decide to make the data more machine-readable and searchable, you can start to parameterize your protocols and fill out the `study` and `assay` annotation tables. -5. The same applies to your data analysis. No matter if it's based on clickable softwares or code and scripts. Keep it simple first: create virtual `assays` where you simply treat your data analysis as `protocols` and store the results in `dataset` -6. If you want to make scripts more reusable and the data analysis reproducible, wrap them with CWL, use containers and other means of software dependency management \ No newline at end of file +You followed Viola's steps during the [start here](/nfdi4plants.knowledgebase/start-here/) guide and are now overwhelmed? Sure, a guide streamlined onto a demo dataset is a whole different story than achieving this with your own complex data. +Here we provide recommendations and considerations for structuring an ARC based on **your current project and datasets**. Remember: creating an ARC is an ongoing process, and it’s meant to evolve over time. + + +## The "final" ARC does not exist – Immutable, Yet Evolving! + +Think of your ARC as an evolving entity that adapts and improves as your project progresses. + +- **Don't aim for perfection right away:** At first, your ARC doesn't need to be flawless. You’re not expected to win an award for the best ARC from the outset. The goal is for it to be useful to **you**. As long as your ARC serves its purpose—whether by organizing data, tracking workflows, or aiding in reproducibility—that’s a win. +- **Priorities vary across researchers:** Different people may have different ideas about what should be made FAIR first and what can be polished later. Allow yourself to start with the basics and improve it step by step. + +So, **don't stress** about making your ARC perfect from the get-go—focus on making it functional. + +## Start Simple: Just Dump the Files Into Your ARC + +An ARC’s core principle is that "everything is a file." It’s common to work with a collection of files and folders in your daily research. Why not just start by organizing them into an ARC? + +- **Initial File Dump:** At first, don’t worry too much about the precise structure. Simply place your files into an “**additional payload**” folder within the ARC. This will help you get started without overthinking the details. +- **Version Control with Git:** By putting your files in the ARC, you instantly gain the benefit of [version control through Git](/nfdi4plants.knowledgebase/fundamentals/version-control-git). This helps you track changes and maintain a history of your files. +- **Safe Backup via [DataHUB](/nfdi4plants.knowledgebase/datahub):** Once you upload your ARC to the [DataHUB](/nfdi4plants.knowledgebase/datahub), you’ll also have a secure backup of your files. + +:::tip +If you’re dealing with large files (e.g., raw sequencing data), you can initially store them anywhere. Just make sure they’re tracked with [Git LFS (Large File Storage)](/nfdi4plants.knowledgebase/git/git-lfs.mdx). This way, you can later move the LFS pointers into your ARC without dealing with the actual large files. +::: + +## Add Metadata to Make Your ARC More Shareable and Citable + +Next, enrich your ARC with some **basic metadata**: + +- **Project and Creator Info:** Include metadata about your project and the researchers involved. This step makes your ARC more sharable and **citable** from the start. +- **Link to the Investigation:** Add this metadata to your `investigation` section. This is an easy way to ensure your work is discoverable and properly credited. + +## Sketch Your Laboratory Workflows + +A key goal of an ARC is to trace each finding or result back to its originating biological experiment. To achieve this, your ARC will need to link dataset files to individual samples through a series of **processes** (laboratory or computational steps) with defined **inputs** and **outputs**. + +- **Map Out Your Lab Workflows:** Before diving into the structure of your ARC, take some time to **sketch** what you did in the lab. What experiments did you perform? What samples did you analyze? Which protocols did you follow? This sketch will help you understand how to organize your data and workflows later. + +--- + +## Organize Your Files into `studies` and `assays` + +Once you have a better understanding of your lab processes, you can begin organizing your ARC: + +- **Define `studies` and `assays`:** Structure your data by moving files into relevant folders, such as `studies` and `assays`. This makes it clear where the raw data (`dataset`) is stored and which protocols were used to generate that data. +- **Reference Protocols:** As you organize, simply reference the **existing protocols** (stored as free-text documents) in your ARC. This ensures consistency without overwhelming you with unnecessary details at this stage. + +## Simple First: Link `Input` and `Output` Nodes + +Before delving into complex parameterization or detailed annotation tables, start simple: + +- **Connect Inputs and Outputs:** Begin by connecting your `studies` and `assays` through **input** and **output** nodes. This allows you to trace the flow of data through your workflows without getting bogged down by excessive detail. +- **Re-draw Lab Workflows:** At this stage, you can essentially redraw your lab workflows as tables, mapping each process step to its inputs and outputs. + +## Parameterize Your Protocols for Machine Readability + +Once you have the basic structure in place, you can start making your data more **machine-readable** and **searchable**: + +- **Parameterize Protocols:** To improve reproducibility, break down your protocols and workflows into structured annotation tables. This will allow you to capture the parameters used at each step of your research. +- **Make It Searchable:** This will make your study more **discoverable** and ensure that your methods are clear and reproducible. + +## Keep It Simple for Your Data Analysis Workflows + +The same approach applies to your data analysis workflows: + +- **Treat Data Analysis as Protocols:** Regardless of whether your data analysis involves clickable software or custom code, treat it like a **protocol**. For now, just store the results in your `dataset` folder. +- **Iterate as You Go:** You don’t need to go into deep detail at first. Just focus on capturing the core analysis steps, and refine them later as your project progresses. + +## Making Data Analysis More Reproducible: Use CWL, Containers, and Dependency Management + +If you want to make your data analysis more **reproducible** and ensure that your workflows are **easily reusable**, consider wrapping your analysis tools in **CWL** (Common Workflow Language) and using **containers**: + +- **CWL for Reproducibility:** Use CWL to describe your computational workflows in a standardized way. This ensures that others can run your analysis with the same inputs and parameters, regardless of their system. +- **Containerization:** Leverage Docker or Singularity containers to encapsulate all software dependencies. This makes it easier to share your workflows and ensures they run consistently across different environments. +- **Manage Dependencies:** Use tools like Conda or Docker to manage your software dependencies, avoiding issues with mismatched versions or missing libraries. + +## **Conclusion: The ARC is a Living FAIR Digital Object** + +The process of creating an ARC is **gradual** and **evolving**. Start simple, and focus on getting the basics in place. Over time, you can refine and enhance your ARC to improve its usefulness and functionality, making it a valuable tool for organizing, sharing, and reproducing your research. From ed2a217747937a9c43adb472e8615d42b9abba64 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 18:04:54 +0100 Subject: [PATCH 22/29] move best practice for data annotation guide --- .../best-practices-for-data-annotation.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/content/docs/{fundamentals => guides}/best-practices-for-data-annotation.md (100%) diff --git a/src/content/docs/fundamentals/best-practices-for-data-annotation.md b/src/content/docs/guides/best-practices-for-data-annotation.md similarity index 100% rename from src/content/docs/fundamentals/best-practices-for-data-annotation.md rename to src/content/docs/guides/best-practices-for-data-annotation.md From 816758dbe1fa0bc85f6d8e37ed4132129c87a861 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 18:07:17 +0100 Subject: [PATCH 23/29] fix link --- src/content/docs/guides/arc-practical-entry.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/content/docs/guides/arc-practical-entry.mdx b/src/content/docs/guides/arc-practical-entry.mdx index 67f196690..2e6496ed0 100644 --- a/src/content/docs/guides/arc-practical-entry.mdx +++ b/src/content/docs/guides/arc-practical-entry.mdx @@ -32,7 +32,7 @@ An ARC’s core principle is that "everything is a file." It’s common to work - **Safe Backup via [DataHUB](/nfdi4plants.knowledgebase/datahub):** Once you upload your ARC to the [DataHUB](/nfdi4plants.knowledgebase/datahub), you’ll also have a secure backup of your files. :::tip -If you’re dealing with large files (e.g., raw sequencing data), you can initially store them anywhere. Just make sure they’re tracked with [Git LFS (Large File Storage)](/nfdi4plants.knowledgebase/git/git-lfs.mdx). This way, you can later move the LFS pointers into your ARC without dealing with the actual large files. +If you’re dealing with large files (e.g., raw sequencing data), you can initially store them anywhere. Just make sure they’re tracked with [Git LFS (Large File Storage)](/nfdi4plants.knowledgebase/git/git-lfs). This way, you can later move the LFS pointers into your ARC without dealing with the actual large files. ::: ## Add Metadata to Make Your ARC More Shareable and Citable From 5db7182d25c203bf503c4e2f24bd6271df7a3c1f Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 18:17:31 +0100 Subject: [PATCH 24/29] move reproduce draft --- .../docs/fundamentals/reproduce-reuse.md | 80 ++++++++++++ src/content/docs/vault/cwl.md | 114 ------------------ src/content/docs/vault/reproduce-reuse.md | 69 ----------- 3 files changed, 80 insertions(+), 183 deletions(-) create mode 100644 src/content/docs/fundamentals/reproduce-reuse.md delete mode 100644 src/content/docs/vault/cwl.md delete mode 100644 src/content/docs/vault/reproduce-reuse.md diff --git a/src/content/docs/fundamentals/reproduce-reuse.md b/src/content/docs/fundamentals/reproduce-reuse.md new file mode 100644 index 000000000..eef925141 --- /dev/null +++ b/src/content/docs/fundamentals/reproduce-reuse.md @@ -0,0 +1,80 @@ +--- +title: Reproduce and reuse +lastUpdated: 2022-09-23 +authors: + - dominik-brilhaus +draft: true +pagefind: false +--- + +This guide outlines key principles and practical steps for achieving reproducibility in both wet-lab and computational (dry-lab) environments. It aims to help researchers in biological sciences understand and implement reproducibility in their work, ensuring that experimental outcomes, data, and analyses can be reliably repeated by others. + +## Reproducibility in Science: Wet-Lab vs Dry-Lab + +| Wet Lab | Dry Lab | +| ------- | ------- | +| Company RNA extraction kit with all buffers and most materials and tools | Established/commercial software; somewhat contained, isolated, self-sustained | +| "Manual" protocol where you mix buffers together yourself | Scripts or combinations of scripts (pipelines) with varying inputs (reference datasets) and tool dependencies (code interpreters, packages, functions) | +| Version, batch, or lot number of materials | Software/package version | +| Laboratory environment (humidity, temperature, equipment) | Operating system (Linux, Windows, Mac) | + +## Challenges in Wet-Lab Reproducibility + +In the wet-lab, many factors influence reproducibility, making it difficult to recreate the exact same results. These factors include: + +- **Biological variance**: Even with the same protocols and conditions, biological systems often exhibit inherent variability. +- **Hands-on factors**: More individuals handling the experiment can introduce variability. +- **Environmental factors**: Humidity, temperature, and even the specific equipment used (e.g., centrifuges, growth chambers) can affect results. + +## Reproducibility in Computational Analyses + +Reproducibility in computational analyses generally focuses on two key aspects: + +1. **Exact output reproduction**: Ensuring that the same input data will consistently yield the same result when the analysis is rerun. +2. **Flexible workflow application**: Ensuring that workflows and analysis pipelines can be applied to different datasets, producing analogous results that can be fed into other analyses or workflows (e.g., generating similar figures). + +## How We Typically Work with Scripts in Computational Workflows + +In computational biology, scripts are often: + +- **Interactive and iterative**: Researchers frequently modify and rerun scripts in response to their data or research questions. +- **Adapted for specific needs**: Researchers often adapt generic scripts to their specific datasets, tweaking them as they go. +- **Hard-coded**: Inputs, outputs, and parameters are sometimes hard-coded directly into the script, which can lead to issues when sharing or transferring the script to others. + +## Common Problems with Reproducibility in Computational Workflows + +One of the main challenges in reproducibility is sharing scripts with others: + +- **Missing dependencies**: When passing a script to a colleague, it might not work because of missing software dependencies, different versions of libraries, or changed file paths. +- **Environmental differences**: Different operating systems, system configurations, or setups may lead to issues in running the script as intended. + +## Common Sources for Scripts + +Researchers typically source scripts from: + +- **Workshops or summer schools**: Scripts often come from educational events and are adapted for specific use. +- **Colleagues**: Researchers share their scripts with peers, who then modify them for their own needs. +- **Manuals or tutorials**: Many scripts are adapted from tutorials available online (e.g., from GitHub repositories). +- **Community forums**: Script snippets often come from community-driven sites like Stack Overflow. + +## Software Dependencies and Environment Management + +Reproducibility can break down due to the numerous dependencies and system requirements involved in computational workflows. These include: + +- **Operating systems**: Different platforms (Linux, Windows, Mac) can affect how software runs. +- **Programming environments**: Variations in the programming language (e.g., Python, R, Julia) or the environment (e.g., Shell, Jupyter notebooks) can cause inconsistencies. +- **Package versions**: Even the same software package can behave differently between versions, leading to unexpected results. +- **Virtual environments**: Without using tools like virtual environments or containers, different users might have conflicting software setups. + +## Solutions for Reproducibility + +Several tools and approaches can help address these issues and improve reproducibility: + +- **Containers**: Using Docker or Singularity allows you to package software, dependencies, and environments into a portable container that can be executed consistently across different systems. +- **Workflow languages**: Tools like **CWL** (Common Workflow Language), **Snakemake**, and **Nextflow** help create standardized workflows that are environment-agnostic, specifying input/output parameters and dependencies in a way that’s easy to share and reproduce. + +## Towards a Reproducible Research Environment + +Reproducibility is a critical principle in both biological and computational research. By carefully structuring your workflows, using version control, managing dependencies with tools like containers and CWL, and applying FAIR principles to your data, you can ensure that your research can be reliably reproduced and shared. + +By adopting these practices, you’ll not only improve the robustness and transparency of your own work, but also make it easier for others to build upon your research in the future. diff --git a/src/content/docs/vault/cwl.md b/src/content/docs/vault/cwl.md deleted file mode 100644 index a439387c9..000000000 --- a/src/content/docs/vault/cwl.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: Reproduce and reuse -lastUpdated: 2022-08-08 -authors: - - dominik-brilhaus -draft: true -hidden: true -pagefind: false ---- - -> Note: This is just a first collection of thoughts. -> Could be partitioned into fundamentals/implementation/tutorial - -## Fundamentals: (code / software) reproducibility - - -Reproducibility in science overall - -wet lab | dry lab ---- | --- -company RNA extraction kit with all buffers and most of materials and tools| established / (commercial) software; somewhat contained, isolated, self-sustained -"manual" protocol where you buy and mix buffers together yourself | script or combinations of scripts (pipeline) with varying inputs (reference data sets) and tool dependencies (code interpreters, packages, functions) -version, batch or LOT number | software / package version -laboratory environment | operating system - - -In the wet-lab many more factors affect reproducibility, making it close to impossible to reproduce the exact same outcomes (results, datasets) -- biological variance -- hands-on factor (more hands, bigger variance) -- environment (humidity, temperature), but also standard devices (growth chamber, centrifuge) - - - -- Reproducibility of computational analyses - - a) you can "reproduce" that exact same output (run result) using the exact same inputs - - b) you can apply the analysis onto other data to produce analogous outputs, that can be fed into other workflows (e.g. generate similar figures) - -- How we usually (learn to) work with scripts - - interactive, iterative - - adapt script to specific needs - - write (hard-code) inputs, outputs into script - -- Problem - - hand script to colleague - - script not working due to missing (software) dependencies, changed (absolute) paths to environments / inputs / other dependencies (e.g. database resources) - -- Example sources for scripts - - workshop / summer school - - colleagues - - manual / tutorial to a tool (downloaded and adapted from GitHub) - - copy/pasted from stack overflow - -- Software dependencies - - on multiple levels / in different shapes - - operating system (Linux, Windows, Mac) - - programming environment / interpreter (shell, python, r, julia, f#) - - packages / libraries within the programming environment - - version of one of above - - (use of) virtual environments - -- Towards solutions - - containers - - docker, singularity - - workflow languages - - CWL, snakemake, neftflow - - environment-agnostic - - formulate ins, outs, parameters - - - - -## Implementation: Make your ARC reproducible / executable with CWL - -1. add workflows / scripts to `workflows` -2. Make workflows CWL-executable, by adding (parallel to the workflow / in the same workflows subdir) a .cwl file that - - describes the expected inputs, outputs, and parameters -3. Execute the workflow - 1. "directly", calling the parameters via CLI - - ```bash - cwltool my_workflow.cwl -p1 parameter1 -p2 parameter2 - ``` - - 2. referencing to a YAML file, that collects the required parameters - ```bash - cwltool my_workflow.cwl my_workflow_parameters.yml - ``` - -- use of paths / working directories -- runs folder -- Workflow metadata: my_workflow_parameters.yml - -## Tutorial: CWL Generator quickstart - -### Install - -[gh-CWLgenerator][https://github.com/nfdi4plants/CWLGenerator] - -### Dependencies - -- Node.js (required for CWL Generator) -- cwltool / cwl-runner -- Docker (?) - -### Recommendations - -- VS code extension [CWL (Rabix/Benten)](https://marketplace.visualstudio.com/items?itemName=sbg-rabix.benten-cwl) - - -### Note / Typical errors - -- (re)moved a required input or output -- cwltool can neither resolve "~" nor $HOME ?! -- let recurrent variables (script name, outfolder, etc.) come first diff --git a/src/content/docs/vault/reproduce-reuse.md b/src/content/docs/vault/reproduce-reuse.md deleted file mode 100644 index be95aa100..000000000 --- a/src/content/docs/vault/reproduce-reuse.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -title: Reproduce and reuse -lastUpdated: 2022-09-23 -authors: - - dominik-brilhaus -draft: true -hidden: true -pagefind: false ---- - -> This article is work-in-progress. - -Key aspects of the [FAIR principles](/nfdi4plants.knowledgebase/fundamentals/fair-data-principles) and driver for the development of good [RDM](/nfdi4plants.knowledgebase/fundamentals/research-data-management) are *reproducibility* and *re-usability* (FAI**R**) of scientific outputs as well as workflows leading to these outputs. Although here we focus more on data and the "computational side", we would like to emphasize some analogies between **Data** science and **PLANT** science. Especially as some requirements in both environments can at least in part be met with similar approaches. - -Consider our PhD Viola (see [metadata](/nfdi4plants.knowledgebase/fundamentals/metadata)). In the wet lab, she extracts RNA from her plant samples using a ready-to-use commercial extraction kit with all buffers and some required materials and tools included. Similarly in the dry lab she would use an established, commercial office software that is mostly contained/isolated, for small spread-sheet calculations. There is no commercial kit available to extract metabolites suitable with the special plant species Viola is interested in. So she uses a "manual" protocol established in her lab, for which she orders and prepares buffers and solutions herself and gathers the required devices, tubes and materials. Once she receives her RNA-Seq data, she sets up her own combinations of scripts (pipeline) with varying inputs (reference data sets) and tool dependencies (code interpreters, packages, functions). In the end, Viola's complete workspace, be it the laboratory environment or her computer's operating system, comes with its specific setup, tools, resources and limitations. And her research routine would likely differ if she were to pursue it in a different lab or using another computer. - -For both types of workflows, there are (clearly) defined inputs and outputs, e.g. the state of the or the data format. And Viola makes sure to document as much metadata as possible to make her workflows reproducible, including e.g. version, batch or LOT numbers of a kit or chemical and the versions of software and packages. Also trouble-shooting with a colleague, company, data steward or seeking help in online forums, is always easier if you share information about your setting. - - - -## On the shoulders of giants - - -"In real life" you can take a sample once and only once. You can take replicate samples – technical (same plant different leaf) or biological (different plant) –, but in the end this is a new and different sample. In the wet-lab many more factors affect reproducibility, making it close to impossible to reproduce the exact same outcome (results, datasets). These include biological variance, hands-on factors (more hands, bigger variance), the environment (humidity, temperature), but also deviations in standard devices (growth chamber, centrifuge). - - Still for other researchers to be able to re-use (i.e. build on) your findings, it will be helpful to document, metadata... - - -1. re-use an outcome (data or sample) -2. reproduce an outcome (peer-review) -3. re-use a workflow (lab protocol or analysis) - - - -- Reproducibility of computational analyses - - a) you can "reproduce" that exact same output (run result) using the exact same inputs - - b) you can apply the analysis onto other data to produce analogous outputs, that can be fed into other workflows (e.g. generate similar figures) - -- How we usually (learn to) work with scripts - - interactive, iterative - - adapt script to specific needs - - write (hard-code) inputs, outputs into script - -- Problem - - hand script to colleague - - script not working due to missing (software) dependencies, changed (absolute) paths to environments / inputs / other dependencies (e.g. database resources) - -- Example sources for scripts - - workshop / summer school - - colleagues - - manual / tutorial to a tool (downloaded and adapted from GitHub) - - copy/pasted from stack overflow - -- Software dependencies - - on multiple levels / in different shapes - - operating system (Linux, Windows, Mac) - - programming environment / interpreter (shell, python, r, julia, f#) - - packages / libraries within the programming environment - - version of one of above - - (use of) virtual environments - -- Towards solutions - - containers - - docker, singularity - - workflow languages - - CWL, snakemake, nextflow - - environment-agnostic - - formulate ins, outs, parameters - - workflow management systems - - galaxy From 95b236f10c4aff4bac736665c32d910fad75424a Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 18:41:18 +0100 Subject: [PATCH 25/29] reorder cwl articles --- src/content/docs/cwl/cwl-examples.md | 2 ++ src/content/docs/cwl/cwl-introduction.md | 4 +++- src/content/docs/cwl/cwl-metadata.md | 2 ++ src/content/docs/cwl/cwl-runner-installation.md | 4 ++-- src/content/docs/cwl/index.mdx | 2 ++ 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/content/docs/cwl/cwl-examples.md b/src/content/docs/cwl/cwl-examples.md index 3f15c6bcd..ebb5f667c 100644 --- a/src/content/docs/cwl/cwl-examples.md +++ b/src/content/docs/cwl/cwl-examples.md @@ -3,6 +3,8 @@ title: "CWL Examples" lastUpdated: 2024-02-05 authors: - caro-ott +sidebar: + order: 4 --- # CWL Examples diff --git a/src/content/docs/cwl/cwl-introduction.md b/src/content/docs/cwl/cwl-introduction.md index cade3a10a..eb5d243a9 100644 --- a/src/content/docs/cwl/cwl-introduction.md +++ b/src/content/docs/cwl/cwl-introduction.md @@ -3,11 +3,13 @@ title: "CWL Introduction" lastUpdated: 2024-01-18 authors: - caro-ott +sidebar: + order: 1 --- # What is CWL? -CWL is short for Common Workflow Language. +CWL is short for **Common Workflow Language**. It is an open standard for describing how to run command line tools and connect them to create workflows, which can then be incorporated in other workflows if needed (nested workflows). Descriptions in CWL are portable across a variety of platforms that support the CWL diff --git a/src/content/docs/cwl/cwl-metadata.md b/src/content/docs/cwl/cwl-metadata.md index 4fb13aab9..73f4aa080 100644 --- a/src/content/docs/cwl/cwl-metadata.md +++ b/src/content/docs/cwl/cwl-metadata.md @@ -3,6 +3,8 @@ title: "CWL Metadata" lastUpdated: 2024-02-05 authors: - caro-ott +sidebar: + order: 3 --- # CWL Metadata diff --git a/src/content/docs/cwl/cwl-runner-installation.md b/src/content/docs/cwl/cwl-runner-installation.md index f1754d262..d785ef640 100644 --- a/src/content/docs/cwl/cwl-runner-installation.md +++ b/src/content/docs/cwl/cwl-runner-installation.md @@ -3,10 +3,10 @@ title: "CWL Runner Installation" lastUpdated: 2024-01-18 authors: - caro-ott +sidebar: + order: 2 --- -# CWL runner installation - The recommended CWL runner is [cwltool](https://github.com/common-workflow-language/cwltool), the reference implementation for the CWL standards. diff --git a/src/content/docs/cwl/index.mdx b/src/content/docs/cwl/index.mdx index ff25a627e..73877e7d1 100644 --- a/src/content/docs/cwl/index.mdx +++ b/src/content/docs/cwl/index.mdx @@ -3,6 +3,8 @@ title: Computational Workflows lastUpdated: 2023-02-05 authors: - kevin-frey +sidebar: + order: 0 --- import { CardGrid } from '@astrojs/starlight/components'; import { LinkCard } from '@astrojs/starlight/components'; From 624fd72f6e7554314d0a515439b403f17c89e29c Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Wed, 13 Nov 2024 18:48:30 +0100 Subject: [PATCH 26/29] polish cwl installation --- .../docs/cwl/cwl-runner-installation.md | 48 ------------ .../docs/cwl/cwl-runner-installation.mdx | 78 +++++++++++++++++++ 2 files changed, 78 insertions(+), 48 deletions(-) delete mode 100644 src/content/docs/cwl/cwl-runner-installation.md create mode 100644 src/content/docs/cwl/cwl-runner-installation.mdx diff --git a/src/content/docs/cwl/cwl-runner-installation.md b/src/content/docs/cwl/cwl-runner-installation.md deleted file mode 100644 index d785ef640..000000000 --- a/src/content/docs/cwl/cwl-runner-installation.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: "CWL Runner Installation" -lastUpdated: 2024-01-18 -authors: - - caro-ott -sidebar: - order: 2 ---- - -The recommended CWL runner is [cwltool](https://github.com/common-workflow-language/cwltool), the -reference implementation for the CWL standards. - -## Installation Windows - -The installation can be done following the guide [here](https://cwltool.readthedocs.io/en/latest/#ms-windows-users). - - - Install Windows Subsystem for Linux from the Microsoft Store - - ![WSL](@images/guides/cwl/wsl.png) - - - Install Debian from the Microsoft Store - - ![Debian](@images/guides/cwl/debian.png) - - Set Debian as your default WSL 2 distro: `wsl --set-default debian` - - Install [Docker Desktop for Windows](https://desktop.docker.com/win/main/amd64/Docker%20Desktop%20Installer.exe) - - Start Docker Desktop and Navigate to Settings - - Select "Use WSL 2 based engine" in the general tab and apply - ![Docker WSL2](@images/guides/cwl/docker-wsl2.png) - - Select "Enable Integration with my default distro" in the resources tab under WSL Integration - ![Docker WSL Integration](@images/guides/cwl/docker-wsl-integration.png) - - Start WSL - - Follow the Instructions for Linux (Debian/Ubuntu) - - ## Installation Linux (Debian/Ubuntu) - - - Run `sudo apt-get update` - - Install Python 3 if it is not already preinstalled `sudo apt install python3` - - Install python virtual environment `sudo apt install python3.[your version here]-venv` - - Create a virtual environment `python3 -m venv env` (named env here, name can vary) - - Activate the virtual environment `source env/bin/activate` - - Install `cwltool` with pip `pip install cwltool` - - ## cwltool usage - - - If you are on Windows, start the WSL - - Activate the virtual environment `source env/bin/activate` - - Navigate to the results destination directory - - Run `cwltool` by specifying the CWL `Workflow` or `CommandLineTool` description file path and the (optional) inputs file path: `cwltool path/to/cwlfile.cwl path/to/jobfile.yml` (you can use relative or full paths) diff --git a/src/content/docs/cwl/cwl-runner-installation.mdx b/src/content/docs/cwl/cwl-runner-installation.mdx new file mode 100644 index 000000000..6859f47c7 --- /dev/null +++ b/src/content/docs/cwl/cwl-runner-installation.mdx @@ -0,0 +1,78 @@ +--- +title: "CWL Runner Installation" +lastUpdated: 2024-01-18 +authors: + - caro-ott +sidebar: + order: 2 +--- + +The recommended CWL runner is [cwltool](https://github.com/common-workflow-language/cwltool), the reference implementation for the CWL standards. + +import { Steps } from '@astrojs/starlight/components'; +import { Tabs, TabItem } from '@astrojs/starlight/components'; + + + + + +The installation on Windows can be done following the guide [here](https://cwltool.readthedocs.io/en/latest/#ms-windows-users). + + + +1. Install Windows Subsystem for Linux from the Microsoft Store + + ![WSL](@images/guides/cwl/wsl.png) + +2. Install Debian from the Microsoft Store + + ![Debian](@images/guides/cwl/debian.png) + +3. Set Debian as your default WSL 2 distro: `wsl --set-default debian` +4. Install [Docker Desktop for Windows](https://desktop.docker.com/win/main/amd64/Docker%20Desktop%20Installer.exe) + - Start Docker Desktop and Navigate to Settings + - Select "Use WSL 2 based engine" in the general tab and apply + ![Docker WSL2](@images/guides/cwl/docker-wsl2.png) + - Select "Enable Integration with my default distro" in the resources tab under WSL Integration + ![Docker WSL Integration](@images/guides/cwl/docker-wsl-integration.png) +5. Start WSL +6. Follow the Instructions for Linux (Debian/Ubuntu) + + + + +{/* + + + */} + + + +For installation on Linux (Debian/Ubuntu): + + + +1. Run `sudo apt-get update` +2. Install Python 3 if it is not already preinstalled `sudo apt install python3` +3. Install python virtual environment `sudo apt install python3.[your version here]-venv` +4. Create a virtual environment `python3 -m venv env` (named env here, name can vary) +5. Activate the virtual environment `source env/bin/activate` +6. Install `cwltool` with pip `pip install cwltool` + + + + + + + + +## cwltool usage + +- If you are on Windows, start the WSL +- Activate the virtual environment `source env/bin/activate` +- Navigate to the results destination directory +- Run `cwltool` by specifying the CWL `Workflow` or `CommandLineTool` description file path and the (optional) inputs file path (you can use relative or full paths): + +```bash +cwltool path/to/cwlfile.cwl path/to/jobfile.yml +``` From 662d09c749aaf7dd04c9e8707017a1d7201ae7c1 Mon Sep 17 00:00:00 2001 From: Kevin F Date: Thu, 14 Nov 2024 09:22:15 +0100 Subject: [PATCH 27/29] fix "edit this page" url :bug: --- astro.config.mts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astro.config.mts b/astro.config.mts index 22c6cef24..ba03f006e 100644 --- a/astro.config.mts +++ b/astro.config.mts @@ -27,7 +27,7 @@ export default defineConfig({ Footer: '@components/starlight/Footer.astro', }, editLink: { - baseUrl: 'https://github.com/nfdi4plants/nfdi4plants.knowledgebase/edit/main/docs/' + baseUrl: 'https://github.com/nfdi4plants/nfdi4plants.knowledgebase/edit/main/' }, social: { github: 'https://github.com/nfdi4plants/nfdi4plants.knowledgebase', From 7d5afd762ac4f4b851b0f65e895e6eedc605126f Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Thu, 14 Nov 2024 09:31:43 +0100 Subject: [PATCH 28/29] add macos installation --- .../docs/cwl/cwl-runner-installation.mdx | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/content/docs/cwl/cwl-runner-installation.mdx b/src/content/docs/cwl/cwl-runner-installation.mdx index 6859f47c7..7b8cc9aa9 100644 --- a/src/content/docs/cwl/cwl-runner-installation.mdx +++ b/src/content/docs/cwl/cwl-runner-installation.mdx @@ -3,15 +3,20 @@ title: "CWL Runner Installation" lastUpdated: 2024-01-18 authors: - caro-ott + - dominik-brilhaus sidebar: order: 2 --- -The recommended CWL runner is [cwltool](https://github.com/common-workflow-language/cwltool), the reference implementation for the CWL standards. - import { Steps } from '@astrojs/starlight/components'; import { Tabs, TabItem } from '@astrojs/starlight/components'; +The recommended CWL runner is [cwltool](https://github.com/common-workflow-language/cwltool), the reference implementation for the CWL standards. + +:::tip +- Please explore the [cwltool docs](https://cwltool.readthedocs.io/en/latest/) for latest installation instructions. +- We also recommend to install a software container engine (e.g. [Docker](https://docs.docker.com/engine/install/) or [Podman](https://podman.io/getting-started/installation)). +::: @@ -41,11 +46,6 @@ The installation on Windows can be done following the guide [here](https://cwlto
-{/* - - - */} - For installation on Linux (Debian/Ubuntu): @@ -53,7 +53,7 @@ For installation on Linux (Debian/Ubuntu): 1. Run `sudo apt-get update` -2. Install Python 3 if it is not already preinstalled `sudo apt install python3` +2. Install Python 3 if it is not already installed `sudo apt install python3` 3. Install python virtual environment `sudo apt install python3.[your version here]-venv` 4. Create a virtual environment `python3 -m venv env` (named env here, name can vary) 5. Activate the virtual environment `source env/bin/activate` @@ -63,9 +63,21 @@ For installation on Linux (Debian/Ubuntu): + + + +1. Install [conda-forge](https://conda-forge.org/) +2. Install cwltool via `conda install -c conda-forge cwltool` + + + + + + + ## cwltool usage - If you are on Windows, start the WSL From e49127ef5f539e220d91cffed2700b11bbd9512a Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus Date: Thu, 14 Nov 2024 09:53:48 +0100 Subject: [PATCH 29/29] add minimal example --- .../docs/cwl/cwl-runner-installation.mdx | 58 +++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/src/content/docs/cwl/cwl-runner-installation.mdx b/src/content/docs/cwl/cwl-runner-installation.mdx index 7b8cc9aa9..5c1f7d181 100644 --- a/src/content/docs/cwl/cwl-runner-installation.mdx +++ b/src/content/docs/cwl/cwl-runner-installation.mdx @@ -75,16 +75,66 @@ For installation on Linux (Debian/Ubuntu): - - - ## cwltool usage - If you are on Windows, start the WSL - Activate the virtual environment `source env/bin/activate` -- Navigate to the results destination directory - Run `cwltool` by specifying the CWL `Workflow` or `CommandLineTool` description file path and the (optional) inputs file path (you can use relative or full paths): ```bash cwltool path/to/cwlfile.cwl path/to/jobfile.yml ``` + +### Minimal example + +Here is a very simplified example to check, that your cwltool installation functions + + + +1. Store the following as `echo-tool.cwl` + + ```yaml + #!/usr/bin/env cwl-runner + + cwlVersion: v1.2 + + class: CommandLineTool + + baseCommand: [echo] + + stdout: message.txt + + inputs: + message: + type: string + inputBinding: + position: 1 + + outputs: + output: + type: stdout + ``` + +2. In the same folder, store the following as `job.yml` + + ```yaml + message: "I love ARCs and CWL" + ``` + +3. Now you can execute the tool + - providing an `input` directly via CLI: + + ``` + cwltool echo-tool.cwl --message "ARCs are great" + ``` + or + + - providing the `input` via the `job.yml`: + + ``` + cwltool echo-tool.cwl job.yml + ``` + +4. Both create an `output` file called `message.txt` with your specified message. + +