Skip to content

Latest commit

 

History

History
68 lines (53 loc) · 1.79 KB

README.md

File metadata and controls

68 lines (53 loc) · 1.79 KB

Config

  • When treccast/main.py is run, it will load the default configuration from defaults/general.yaml and defaults/{{year}}.yaml. NB! Do not modify these files unless you are certain the current pipeline will not break
  • Defaults can be overwritten by creating a new configuration file and specifying desired parameters.
# General config
# Number of paragraphs to retrieve
k: 1000
# Number of previous turns results to add to the pool of results
num_prev_turns: 0


# Output path
# Name for the output file without extension
output_name: filename


# Rewrite defaults
# Value for organizers query rewrites 
# choose between null|automatic|manual (null defaults to raw)
query_rewrite: null

# If true and path is specified, loads custom rewrites from file
rewrite: False
rewrite_path: null

# Query expansion by pseudo relevance feedback
prf:
  # currently implemented type "RM3"
  type: null
  # how many documents to use for feedback
  num_documents: 10
  # how many highest scoring tokens to add to the query
  num_tokens: 10

# Retrieval defaults
# If first pass file is specified, loads results from those, otherwise
# retrieve passages from elasticsearch index
first_pass_file: null
es:
 host_name: "localhost:9204"
 k1: 1.2
 b: 0.75
 index_name: "ms_marco_kilt_wapo_clean"
 field: "catch_all"


# Re-ranking parameters
# Choose option (t5)
reranker: null

# Reranking with duoT5
# Change to True to use pairwise duoT5 reranker and specify the top k documents
# for reranking. 
duot5: False
duot5_topk: 50

# ANCE dense retrieval
# Change to yes and specify the path to ANN index.
ance: no
ance_index: 

# Rewriter for re-ranking
# Specify the path to the rewrites that you want to use for re-ranking stage if
# they should be different from the ones used for first-pass retrieval.
reranker_rewrite_path: