Skip to content

Commit

Permalink
feat(smrtlink): adds v25 SMRTLink config
Browse files Browse the repository at this point in the history
  • Loading branch information
BenTopping committed Jan 9, 2025
1 parent 83860ce commit b411d5a
Show file tree
Hide file tree
Showing 9 changed files with 352 additions and 260 deletions.
5 changes: 3 additions & 2 deletions app/exchanges/run_csv/pacbio_sample_sheet.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ def generate_smrt_cell_settings(well) # rubocop:disable Metrics/MethodLength
'Indexes' => well.barcode_set, # 244d96c6-f3b2-4997-5ae3-23ed33ab925f
'Sample is indexed' => well.tagged?, # Set to True to Multiplex
'Bio Sample Name' => well.tagged? ? nil : well.bio_sample_name,
'Use Adaptive Loading' => false, # this will likely be a well default in future
'Use Adaptive Loading' => well.use_adaptive_loading,
'Consensus Mode' => 'molecule', # (default to molecule do we need a custom field)
'Same Barcodes on Both Ends of Sequence' => well.same_barcodes_on_both_ends_of_sequence
'Same Barcodes on Both Ends of Sequence' => well.same_barcodes_on_both_ends_of_sequence,
'Full Resolution Base Qual' => well.full_resolution_base_qual
}
end

Expand Down
31 changes: 31 additions & 0 deletions app/exchanges/run_csv/pacbio_sample_sheet_v25.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true

# A class spefically for SMRT-Link v25 Sample Sheets, based on the PacbioSampleSheet class
# See https://www.pacb.com/wp-content/uploads/SMRT-Link-User-Guide-v25.1.pdf (page 28) for details.

module RunCsv
# RunCsv::PacbioSampleSheet
class PacbioSampleSheetV25 < PacbioSampleSheet
# Generate a hash of settings for a single cell
# Overrides the method in the parent class
# Only difference is removal of 'Polymerase Kit' key
def generate_smrt_cell_settings(well) # rubocop:disable Metrics/MethodLength
{
'Well Name' => well.used_aliquots.first.source.tube.barcode, # TRAC-2-7242
'Library Type' => well.library_type, # Standard
'Movie Acquisition Time (hours)' => well.movie_acquisition_time, # 24
'Insert Size (bp)' => well.insert_size, # 500
'Assign Data To Project' => 1, # (maybe we need to assign a run a project in traction)?
'Library Concentration (pM)' => well.library_concentration, # 250
'Include Base Kinetics' => well.include_base_kinetics,
'Indexes' => well.barcode_set, # 244d96c6-f3b2-4997-5ae3-23ed33ab925f
'Sample is indexed' => well.tagged?, # Set to True to Multiplex
'Bio Sample Name' => well.tagged? ? nil : well.bio_sample_name,
'Use Adaptive Loading' => well.use_adaptive_loading,
'Consensus Mode' => 'molecule', # (default to molecule do we need a custom field)
'Same Barcodes on Both Ends of Sequence' => well.same_barcodes_on_both_ends_of_sequence,
'Full Resolution Base Qual' => well.full_resolution_base_qual
}
end
end
end
4 changes: 4 additions & 0 deletions app/resources/v1/pacbio/runs/well_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ class WellResource < JSONAPI::Resource
# @return [String] the polymerase kit
# @!attribute [rw] library_type
# @return [String] the library type
# @!attribute [rw] use_adaptive_loading
# @return [Boolean] whether to use adaptive loading
# @!attribute [rw] full_resolution_base_qual
# @return [Boolean] whether to apply full resolution base qual
attributes :row, :column, :comment, :pacbio_plate_id, :position,
*Rails.configuration.pacbio_smrt_link_versions.options.keys

Expand Down
28 changes: 28 additions & 0 deletions config/pacbio_smrt_link_versions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,34 @@ default: &default
- v13_revio
- v13_1_revio
- v25_1_revio
use_adaptive_loading:
key: use_adaptive_loading
label: "Use Adaptive Loading"
default_value: "False"
validations:
presence: {}
inclusion:
in: *true_false
data_type: list
select_options: *select_true_false
versions:
- v13_revio
- v13_1_revio
- v25_1_revio
full_resolution_base_qual:
key: full_resolution_base_qual
label: "Full Resolution Base Qual"
default_value: "False"
validations:
presence: {}
inclusion:
in: *true_false
data_type: list
select_options: *select_true_false
versions:
- v13_revio
- v13_1_revio
- v25_1_revio

development: *default
test: *default
Expand Down
3 changes: 2 additions & 1 deletion config/pipelines/pacbio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ default: &default

v13_1_revio: *v13_revio

v25_1_revio: *v13_revio
v25_1_revio:
sample_sheet_class: PacbioSampleSheetV25

# For any EMQ message creation, the section and key name must be same as schema subject in bunny.yml
volume_tracking:
Expand Down
261 changes: 4 additions & 257 deletions spec/exchanges/run_csv/pacbio_sample_sheet_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@
'Library Concentration (pM)' => well.library_concentration.to_s,
'Include Base Kinetics' => well.include_base_kinetics.downcase == 'true', # is a string
'Polymerase Kit' => well.polymerase_kit,
'Use Adaptive Loading' => false,
'Use Adaptive Loading' => well.use_adaptive_loading == 'true',
'Consensus Mode' => 'molecule',
'Full Resolution Base Qual' => well.full_resolution_base_qual == 'true',

# specific to tagged wells
'Bio Sample Name' => '',
Expand Down Expand Up @@ -254,263 +255,9 @@
'Library Concentration (pM)' => well.library_concentration.to_s,
'Include Base Kinetics' => well.include_base_kinetics.downcase == 'true', # is a string
'Polymerase Kit' => well.polymerase_kit,
'Use Adaptive Loading' => false,
'Consensus Mode' => 'molecule',

# specific to untagged wells
'Bio Sample Name' => well.bio_sample_name,
'Sample is indexed' => false,
'Indexes' => '', # well.barcode_set
'Same Barcodes on Both Ends of Sequence' => '' # well.same_barcodes_on_both_ends_of_sequence.to_s
)
end
end

it 'must not have sample rows' do
expect(parsed_sample_sheet['Samples']).to be_empty
end
end
end

context 'v25_1_revio' do
before do
run.smrt_link_version = Pacbio::SmrtLinkVersion.find_by(name: 'v25_1_revio')
end

it 'must return a string' do
expect(sample_sheet_string.class).to eq String
end

it 'must have the three required sections' do
expect(sample_sheet_string).to include('[Run Settings]')
expect(sample_sheet_string).to include('[SMRT Cell Settings]')
expect(sample_sheet_string).to include('[Samples]')
end

it 'must have the three required sections in the correct order' do
run_settings_index = sample_sheet_string.index('[Run Settings]')
cell_settings_index = sample_sheet_string.index('[SMRT Cell Settings]')
samples_index = sample_sheet_string.index('[Samples]')

expect(run_settings_index).to be < cell_settings_index
expect(cell_settings_index).to be < samples_index
end

it 'must have the correct run settings' do
expect(parsed_sample_sheet['Run Settings']).to eq(
{
'Instrument Type' => 'Revio',
'Run Name' => run.name,
'Run Comments' => run.comments,
'Plate 1' => run.plates[0].sequencing_kit_box_barcode,
'Plate 2' => run.plates[1].sequencing_kit_box_barcode,
'CSV Version' => '1'
}
)
end

it 'must have the cells used listed on the same line as the section header' do
# get the line from sample_sheet_string containing [SMRT Cell Settings]
smrt_cell_settings_line = sample_sheet_string.lines.find { |line| line.include?('[SMRT Cell Settings]') }.strip
expect(smrt_cell_settings_line).to eq('[SMRT Cell Settings],1_A01,1_B01,2_A01')
end

it 'must have the correct SMRT cell settings' do
smrt_cell_settings = parsed_sample_sheet['SMRT Cell Settings']

# create a hash of plate_well_name => well for easy comparison
plate_wells = run.plates.flat_map(&:wells).each_with_object({}) do |well, hash|
plate_well_name = "#{well.plate.plate_number}_#{well.position_leading_zero}"
hash[plate_well_name] = well
end

# confirm that the wells are as expected
plate_well_names = plate_wells.keys
expect(plate_well_names).to contain_exactly('1_A01', '1_B01', '2_A01')
expect(smrt_cell_settings.keys).to match_array(plate_well_names)

plate_well_names.each do |plate_well_name|
well = plate_wells[plate_well_name]
expected_settings = {
# for all wells
'Well Name' => well.used_aliquots.first.source.tube.barcode,
'Library Type' => 'Standard',
'Movie Acquisition Time (hours)' => well.movie_acquisition_time.to_s,
'Insert Size (bp)' => well.insert_size.to_s,
'Assign Data To Project' => '1',
'Library Concentration (pM)' => well.library_concentration.to_s,
'Include Base Kinetics' => well.include_base_kinetics.downcase == 'true', # is a string
'Polymerase Kit' => well.polymerase_kit,
'Use Adaptive Loading' => false,
'Consensus Mode' => 'molecule',

# specific to tagged wells
'Bio Sample Name' => '',
'Sample is indexed' => true,
'Indexes' => well.barcode_set,
'Same Barcodes on Both Ends of Sequence' => true
}

expect(smrt_cell_settings[plate_well_name]).to eq(expected_settings)
end
end

context 'when the libraries are tagged' do
let(:well1) do
create(
:pacbio_well,
pre_extension_time: 2,
generate_hifi: 'In SMRT Link',
ccs_analysis_output: 'Yes',
row: 'A',
column: 1
)
end
let(:well2) do
create(
:pacbio_well,
pre_extension_time: 2,
generate_hifi: 'In SMRT Link',
ccs_analysis_output: 'No',
row: 'A',
column: 1
)
end
let(:well3) do
create(
:pacbio_well,
pre_extension_time: 2,
generate_hifi: 'In SMRT Link',
ccs_analysis_output: 'No',
row: 'B',
column: 1
)
end
let(:plate1_wells) { [well1] }
let(:plate2_wells) { [well2, well3] }
let(:plate1) { build(:pacbio_plate, wells: plate1_wells, plate_number: 1) }
let(:plate2) { build(:pacbio_plate, wells: plate2_wells, plate_number: 2) }
let(:run) { create(:pacbio_revio_run, plates: [plate1, plate2]) }

it 'must have the correct headers' do
# get the line from sample_sheet_string after the one containing [Samples]
sample_sheet_lines = sample_sheet_string.lines
samples_section_index = sample_sheet_lines.find_index { |line| line.include?('[Samples]') }
headers_line = sample_sheet_lines[samples_section_index + 1]
headers = headers_line.strip.split(',')
expected_headers = ['Bio Sample Name', 'Plate Well', 'Adapter', 'Adapter2']
expect(headers).to eq(expected_headers)
end

it 'must have the correct sample rows' do
# 5 pools per well
sample_data_1 = parsed_sample_sheet['Samples'][0]
sample_data_2 = parsed_sample_sheet['Samples'][1 * 5]
sample_data_3 = parsed_sample_sheet['Samples'][2 * 5]

# iterate through the samples under test
sample_expectations = [
[sample_data_1, well1],
[sample_data_2, well2],
[sample_data_3, well3]
]
sample_expectations.each do |sample_data, well|
expect(sample_data).to eq(
{
'Bio Sample Name' => well.base_used_aliquots.first.bio_sample_name,
'Plate Well' => well.plate_well_position,
'Adapter' => well.base_used_aliquots.first.tag.group_id,
'Adapter2' => well.base_used_aliquots.first.tag.group_id
}
)
end
end
end

context 'when the libraries are untagged' do
let(:pool1) { create_list(:pacbio_pool, 1, :untagged) }
let(:pool2) { create_list(:pacbio_pool, 1, :untagged) }
let(:pool3) { create_list(:pacbio_pool, 1, :untagged) }
let(:well1) do
create(
:pacbio_well,
pre_extension_time: 2,
generate_hifi: 'In SMRT Link',
ccs_analysis_output: 'Yes',
pools: pool1, # untagged pool
row: 'A',
column: 1
)
end
let(:well2) do
create(
:pacbio_well,
pre_extension_time: 2,
generate_hifi: 'In SMRT Link',
ccs_analysis_output: 'No',
pools: pool2, # untagged pool
row: 'A',
column: 1
)
end
let(:well3) do
create(
:pacbio_well,
pre_extension_time: 2,
generate_hifi: 'In SMRT Link',
ccs_analysis_output: 'No',
pools: pool3, # untagged pool
row: 'B',
column: 1
)
end
let(:plate1_wells) { [well1] }
let(:plate2_wells) { [well2, well3] }
let(:plate1) { build(:pacbio_plate, wells: plate1_wells, plate_number: 1) }
let(:plate2) { build(:pacbio_plate, wells: plate2_wells, plate_number: 2) }
let(:run) { create(:pacbio_revio_run, plates: [plate1, plate2]) }

it 'must have the correct headers' do
# get the line from sample_sheet_string after the one containing [Samples]
sample_sheet_lines = sample_sheet_string.lines
samples_section_index = sample_sheet_lines.find_index { |line| line.include?('[Samples]') }
headers_line = sample_sheet_lines[samples_section_index + 1]
headers = headers_line.strip.split(',')
expected_headers = ['Bio Sample Name', 'Plate Well', 'Adapter', 'Adapter2']
expect(headers).to eq(expected_headers)
end

it 'must have the wells added to the SMRT Cell Settings section' do
smrt_cell_settings = parsed_sample_sheet['SMRT Cell Settings']

# create a hash of plate_well_name => well for easy comparison
plate_wells = run.plates.flat_map(&:wells).each_with_object({}) do |well, hash|
plate_well_name = "#{well.plate.plate_number}_#{well.position_leading_zero}"
hash[plate_well_name] = well
end

# confirm that the wells are as expected
plate_well_names = plate_wells.keys
expect(plate_well_names).to contain_exactly('1_A01', '2_A01', '2_B01')
expect(smrt_cell_settings.keys).to match_array(plate_well_names)

# iterate through the wells under test
plate_well_names.each do |plate_well_name|
well_data = smrt_cell_settings[plate_well_name]
well = plate_wells[plate_well_name]

expect(well_data).to eq(
# for all wells
'Well Name' => well.used_aliquots.first.source.tube.barcode,
'Library Type' => 'Standard',
'Movie Acquisition Time (hours)' => well.movie_acquisition_time.to_s,
'Insert Size (bp)' => well.insert_size.to_s,
'Assign Data To Project' => '1',
'Library Concentration (pM)' => well.library_concentration.to_s,
'Include Base Kinetics' => well.include_base_kinetics.downcase == 'true', # is a string
'Polymerase Kit' => well.polymerase_kit,
'Use Adaptive Loading' => false,
'Use Adaptive Loading' => well.use_adaptive_loading == 'true',
'Consensus Mode' => 'molecule',
'Full Resolution Base Qual' => well.full_resolution_base_qual == 'true',

# specific to untagged wells
'Bio Sample Name' => well.bio_sample_name,
Expand Down
Loading

0 comments on commit b411d5a

Please sign in to comment.