From 1ffe19c6e66f01d620d5fc1615b68859d3f500d9 Mon Sep 17 00:00:00 2001 From: Aethor Date: Thu, 18 Jul 2024 17:26:17 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20CompNet/?= =?UTF-8?q?Renard@bd3d6d3e50e2105461386e34c8aa3e9c15e6329d=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- _sources/contributing.rst.txt | 7 +- _sources/extending.rst.txt | 6 +- _sources/pipeline.rst.txt | 47 +++- contributing.html | 7 +- extending.html | 6 +- genindex.html | 30 ++- objects.inv | Bin 1832 -> 1896 bytes pipeline.html | 46 +++- reference.html | 408 +++++++++++++++++++++++----------- searchindex.js | 2 +- 10 files changed, 413 insertions(+), 146 deletions(-) diff --git a/_sources/contributing.rst.txt b/_sources/contributing.rst.txt index b365357..940d906 100644 --- a/_sources/contributing.rst.txt +++ b/_sources/contributing.rst.txt @@ -36,4 +36,9 @@ the ``tests`` directory. We use ``pytest`` to test code, and also use ``hypothesis`` when applicable. If you open a patch, make sure that all tests are passing. In particular, do not rely on the CI, as it does not run time costly tests! Check for yourself locally, using -``RENARD_TEST_ALL=1 python -m pytest tests`` +``RENARD_TEST_ALL=1 python -m pytest tests``. Note that there are +specific tests and environment variable for optional dependencies such +as *stanza* (``RENARD_TEST_STANZA_OPTDEP``). These must be explicitely +set to ``1`` if you want to test optional dependencies, as +``RENARD_TEST_ALL=1`` does not enable test on these optional +dependencies. diff --git a/_sources/extending.rst.txt b/_sources/extending.rst.txt index b52edb8..2431c47 100644 --- a/_sources/extending.rst.txt +++ b/_sources/extending.rst.txt @@ -8,8 +8,10 @@ Creating new steps Usually, steps must implement at least four functions : -- :meth:`.PipelineStep.__init__`: is used to pass options at step init time -- :meth:`.PipelineStep.__call__`: is called at pipeline run time +- :meth:`.PipelineStep.__init__`: is used to pass options at step init + time. Options passed at step init time should be valid for a + collection of texts, and not be text specific. +- :meth:`.PipelineStep.__call__`: is called at pipeline run time. - :meth:`.PipelineStep.needs`: declares the set of informations needed from the pipeline state by this step. Each returned string should be an attribute of :class:`.PipelineState`. diff --git a/_sources/pipeline.rst.txt b/_sources/pipeline.rst.txt index daeb95e..74af798 100644 --- a/_sources/pipeline.rst.txt +++ b/_sources/pipeline.rst.txt @@ -68,7 +68,7 @@ In that case, the ``tokens`` requirements is fulfilled at run time. If you don't pass the parameter, Renard will throw the following exception: ->>> ValueError: ["step 1 (NLTKNamedEntityRecognizer) has unsatisfied needs (needs : {'tokens'}, available : {'text'})"] +>>> ValueError: ["step 1 (NLTKNamedEntityRecognizer) has unsatisfied needs. needs: {'tokens'}. available: {'text'}). missing: {'tokens'}."] For simplicity, one can use one of the preconfigured pipelines: @@ -252,6 +252,51 @@ graph to a directory. Meanwhile, dynamic graph to the Gephi format. +Custom Segmentation +------------------- + +The ``dynamic_window`` parameter of +:class:`.CoOccurencesGraphExtractor` determines the segmentation of +the dynamic networks, in number of interactions. In the example above, +a new graph will be created for each 20 interactions. + +While one can rely on the arguments of the graph extractor of the +pipeline to determine the dynamic window, Renard allows to specify a +custom segmentation of a text with the ``dynamic_blocks`` +argument. When running a pipeline, you can cut your text however you +want and pass this argument instead of the usual text: + + +.. code-block:: python + + from renard.pipeline import Pipeline + from renard.pipeline.tokenization import NLTKTokenizer + from renard.pipeline.ner import NLTKNamedEntityRecognizer + from renard.pipeline.character_unification import GraphRulesCharacterUnifier + from renard.pipeline.graph_extraction import CoOccurrencesGraphExtractor + from renard.utils import block_bounds + + with open("./my_doc.txt") as f: + text = f.read() + + # let's suppose the 'cut_into_chapters' function cut the text into chapters. + chapters = cut_into_chapters(text) + + pipeline = Pipeline( + [ + NLTKTokenizer(), + NLTKNamedEntityRecognizer(), + GraphRulesCharacterUnifier(), + CoOccurrencesGraphExtractor(co_occurrences_dist=25, dynamic=True) + ] + ) + + # the 'block_bounds' function automatically extracts the boundaries of your + # block of text. + out = pipeline(text, dynamic_blocks=block_bounds(chapters)) + + + Multilingual Support ==================== diff --git a/contributing.html b/contributing.html index e637da4..33f032c 100644 --- a/contributing.html +++ b/contributing.html @@ -108,7 +108,12 @@

Code Quality Guidelineshypothesis when applicable. If you open a patch, make sure that all tests are passing. In particular, do not rely on the CI, as it does not run time costly tests! Check for yourself locally, using -RENARD_TEST_ALL=1 python -m pytest tests

+RENARD_TEST_ALL=1 python -m pytest tests. Note that there are +specific tests and environment variable for optional dependencies such +as stanza (RENARD_TEST_STANZA_OPTDEP). These must be explicitely +set to 1 if you want to test optional dependencies, as +RENARD_TEST_ALL=1 does not enable test on these optional +dependencies.

diff --git a/extending.html b/extending.html index 1431c68..d23d424 100644 --- a/extending.html +++ b/extending.html @@ -84,8 +84,10 @@

Extending Renard

Usually, steps must implement at least four functions :

- + @@ -284,15 +290,15 @@

B

C

@@ -492,6 +500,8 @@

P

  • Pipeline (class in renard.pipeline.core) +
  • +
  • PipelineParameter (renard.pipeline.core.Pipeline attribute)
  • PipelineState (class in renard.pipeline.core)
  • diff --git a/objects.inv b/objects.inv index 26ef636df157117ebe64626835d8bdc66ca46e1b..c275dd82fb94b524c449ec4eb1d5b8f0d9b65444 100644 GIT binary patch delta 1801 zcmV+k2ln`=4(JY$bbq~@&vxS`6vp>{3iq_z=%llpU6V6`>`~svYwxm_fb6pBP|9|-<>kOpWbGj{aM<7~| zHB_520_Z+Nu?9t`*0NmI8dL=8YpFNHbP@?+bT4X+p>U9xA?llD@2IA?f|)TA@>e0B z#53?)MwKd8WlpXBC)2ANs`Jeq&qY!4wUx?k6~CTgBr-yB$`~QdWr5y^Vp6e^ml`2< zfA?th6zI_3WPfOhS2r`kTwM7L2;~K#tXy-#=n@#hKG|knxc-op6kC!D&e)sl>o*7# z6SNorWLXLX#C`&x~5KW za=nrpX0!a7{DK#5$R<^6mU==Z7<-%e0VasAY;zO!m@V6iFO*H+3xGmRt|BU1gEI1c zI+BgYwyDT2o9QVEUxy1*1u z=5?jivw!7vmDq_QBxE0LPUyNBG7}D(VpFPP1Tud*h9IZIF$9?zS;#~CALiHW+V&qF z%xIe)xG2?csxd@#EP6qS_<08k7o3=ll_@NU(om^L-f?_th>=~)H;_LQlPQ!%97uw! zd9(eC;w(%?9c8Ho5Hn3*f`-W&w^~G{*sHu5t+L9Q0QqpA%zM zBAzK)C1Q1lj7ui+=S0lI1B?W;on1Z;PeFI91m0PLCi&sXNF=XfLJw!N$zb-1S0#g_ zP-Uq_nc}iP*FvWnX=RLK8>PmPN-!!DT2fU&pOZ~z4LhBPWJ6(sUO^SNkU^Cun`uc& zmVX{girgig0C78%6HMfe=Y$uruQ_2Q*uAV&b^}u1ioX6MD8asj@9almQI;FU;{7*< zjW^>69qptMW}K}?z-c_TsA#_L@Q}=H7qPLv^AU)0poc)-eM@Xwv~{~|GAt+4s3w^K z9y-caAzJQD;3t~Z1_&^pU6Pd%jqQy-ZQPllTsiVVrcFOQDs*)@|jHsk?*MEov z-d^0SvUW|or&}oQ?2+025puD%k2Zx4@hA&Dq?f3;A-q71OA5!~OZ)uW!NX$d$S9|c zBPViJY^1XA`@2VnMcA}&i@eqGl7YJ=`I3US@*xFxW8;{jHs#6;P8>wjHR2SoVSIS{ zBUMB?%ZiFfW7LRYJWEAS?eoaEsegT*8#%Qr>tA7V8`VDU?Ehr->x}HSR$6Qq^-`Ih z*7kxQ`tO7Dm=HADVSyqz!v%?;{1>Evw_}h@q&u5CO{4$~E-m}Z{{OD|H`q#}EVOqn zQE_G&r=r|3j>OwxoEtg*_I6%rk0uEY)uJ{*Q8op5ZV$rS$5P!fK#9IL6 z6>mD;Htp@y{+%Ri9h<){Tsd-%ds6Y^>di+Et1mxp$K)%6Gmj@c!aJJsm5@bq3xl%? znYO&dWQ(@Ip6%X$ctAcg=!?ah&|d)QLSF%#KYZ!w43f2fO*rF1H-|dPzN|&eo(b{F zRr@kp`v9;Bj8E;`X`$ijJbwsnj!`7e4r9o;I5a{=S!;xcH`53dncdiDvJh{w4(;4W zzH1*BlR%Z=bjkMKUY4LlElrZ1ZT6Kg)1y>dmHpO#eow!F*5K=CZ(OJe1HiN>$WST0 zz`@H)$=UV}JTLR-{*CD5^h%ih+L8f@m+*GX*;}jr?(4gPm@jUs+<(|RgEyk)E}Ym` ze@Cy`wQ{G%B`;t{6fl#qcu%bsU}ed&*9~ZDpHN*c-80V1Wng0kRGOYwCi?=(LmO9= zd-OIJyiM)+daiWwwh*};?8EY=_WbssWClA8ym-;3FLVv=`Z|wI$7X92YV9aAZJJYj z+o~&djefTuXG66XeSc8n6R=!tVXj5X{qLWD)!fvp?RKUK?LQcR$s6qJ^!CD!I74|S zIFy)XdRSYInhoV$v!P_RwJ+qKTv)Zg>b`JoK03R*OYGL9Lz#Y^YN|hNVBRHAvjf-# rm505F1^ig7ANWJHV>g%|v;BAGe0MD4%3RHb`~HEhVfH^z`vT)5$-{#t delta 1736 zcmV;(1~>WW4yX>0bbrO0&z9RJ6vp>{3iq_z(Mhw+u9;5Lo+i`j%=Bc*!P2$O84L=b z#LlX((bwyf6of4Qm*Ilam2C{aPZA%O3z8HFs;Xk4XuTwjVO&q^&6}018V=J}Qw4KS zAWA5fX2rkvixpfFw3H!FUohTckU`$(!^q0S!JQa^hEHQq zmsBsu;6_gr1 z*__8u7-LK6rmm%Oy~n9oWIDBbYE$M>V#rx?=J0VzBWL6|uaVJR=LN>_@p+}(7Cx_% z>Rzd!_iM+$SAOo>nLis}MT+6C7kOI|+b^(>dd9pcaQvbJrlreP-+KsG!yCi{@ z!BtyShJof$HfGjs?&a3hI?tJQ%zU&istYh*qfJy)6F}bNnP#g2a2}66)wJAf@sQ1JSFx$S z3lT_iV1Pim{g=43Xyq5gdr9>;s#!)rfR1)mh*2BjH;dhG81KAN#@=;$`eYF#lRVf% zBmd0_G;$8Pb_5Y|A^2FXGf$7IdwOAQ+<%gzy`d%lD9c8*?R2|Sedqs_v7sskjSg3l zBDjzhC(_j;2Tz)O8%h5x%l{!fzEl_gwe3T@HnxSM%Aw*|ltNm_jFQ5K%_y0~jOmP) zut7^xf1Ps2qRZ@@!Dvt=TQnL_$>pvA3B0|8ozvY6dQVqS-MJ0A%_Ed@;T}Ya?SJAy z7P?EXQE^>(g_@Rv4#Q{eX|=sayYi7yP8&v!2>sj~VQo!3W zN+!{r?R_m$fdQYE{po)H-2RNV(jW`%ooiH_S%#@7cMK!(b{OU+_P@J%$lOAhP@9Ht zG}CP)eA*WOn<9~rve7^?s zk=+`~Pg^mC%HYfs2v6{ip<*so)!r}>yhf%i$C%=&D{wo>_wOH&4-^KElHp-^b#d1?iYhtEXy7;yK z{*HbHV<6Ob!MIqfM1W~kkzJ+y0(&n6WoNs)@`9Bw{aeP_>9w@0su>58E)ndQi?>_D z-Pa!qV!ZgF;&*A8s9;SraPW7#f(D5PXrRgxYMXU{60wbIDgx$#Qh$mrr4fs%Hr1O- zmhSTHm$$X2w|gZMSQ`-4QJcQdH@NHTJhlfmS=iv^u14FY-Mx0Lwxzz&Z|>`4SFJV5<`yV~E^%|GPx54r|3$XL0d+FPq_yK2Ez7^b+*k)!Itq+>*%D2sS zC6kqVZT#fK>dnpe4?*ANgEx>r#ch<|m+2#aTm5+n({17~*@InCdDz&|B@V^Graph Extraction
-
  • Dynamic Graphs
  • +
  • Dynamic Graphs +
  • Multilingual Support
  • @@ -152,7 +155,7 @@

    The Pipelinetokens requirements is fulfilled at run time. If you don’t pass the parameter, Renard will throw the following exception:

    -
    >>> ValueError: ["step 1 (NLTKNamedEntityRecognizer) has unsatisfied needs (needs : {'tokens'}, available : {'text'})"]
    +
    >>> ValueError: ["step 1 (NLTKNamedEntityRecognizer) has unsatisfied needs. needs: {'tokens'}. available: {'text'}). missing: {'tokens'}."]
     

    For simplicity, one can use one of the preconfigured pipelines:

    @@ -318,6 +321,45 @@

    Dynamic GraphsPipelineState.export_graph_to_gexf() correctly exports the dynamic graph to the Gephi format.

    +
    +

    Custom Segmentation

    +

    The dynamic_window parameter of +CoOccurencesGraphExtractor determines the segmentation of +the dynamic networks, in number of interactions. In the example above, +a new graph will be created for each 20 interactions.

    +

    While one can rely on the arguments of the graph extractor of the +pipeline to determine the dynamic window, Renard allows to specify a +custom segmentation of a text with the dynamic_blocks +argument. When running a pipeline, you can cut your text however you +want and pass this argument instead of the usual text:

    +
    from renard.pipeline import Pipeline
    +from renard.pipeline.tokenization import NLTKTokenizer
    +from renard.pipeline.ner import NLTKNamedEntityRecognizer
    +from renard.pipeline.character_unification import GraphRulesCharacterUnifier
    +from renard.pipeline.graph_extraction import CoOccurrencesGraphExtractor
    +from renard.utils import block_bounds
    +
    +with open("./my_doc.txt") as f:
    +    text = f.read()
    +
    +# let's suppose the 'cut_into_chapters' function cut the text into chapters.
    +chapters = cut_into_chapters(text)
    +
    +pipeline = Pipeline(
    +    [
    +        NLTKTokenizer(),
    +        NLTKNamedEntityRecognizer(),
    +        GraphRulesCharacterUnifier(),
    +        CoOccurrencesGraphExtractor(co_occurrences_dist=25, dynamic=True)
    +    ]
    +)
    +
    +# the 'block_bounds' function automatically extracts the boundaries of your
    +# block of text.
    +out = pipeline(text, dynamic_blocks=block_bounds(chapters))
    +
    +
    +

    Multilingual Support

    diff --git a/reference.html b/reference.html index 2572883..b907050 100644 --- a/reference.html +++ b/reference.html @@ -148,6 +148,14 @@

    Pipeline +
    +PipelineParameter
    +

    all the possible parameters of the whole pipeline, that are +shared between steps

    +

    alias of Literal[‘lang’, ‘progress_reporter’, ‘character_ner_tag’]

    +
    +
    __call__(text=None, ignored_steps=None, **kwargs)
    @@ -203,9 +211,10 @@

    Pipeline -
    -_pipeline_init_steps(ignored_steps=None)
    -
    +
    +_pipeline_init_steps_(ignored_steps=None)
    +

    Initialise steps with global pipeline parameters.

    +
    Parameters

    ignored_steps (Optional[List[str]]) – a list of steps production. All steps with a production in ignored_steps will be ignored.

    @@ -278,15 +287,15 @@

    Pipeline

    -class renard.pipeline.core.PipelineState(text, chapters=None, tokens=None, chapter_tokens=None, sentences=None, quotes=None, speakers=None, sentences_polarities=None, entities=None, corefs=None, characters=None, character_network=None)
    +class renard.pipeline.core.PipelineState(text, dynamic_blocks=None, tokens=None, char2token=None, sentences=None, quotes=None, speakers=None, sentences_polarities=None, entities=None, corefs=None, characters=None, character_network=None)

    The state of a pipeline, annotated in a Pipeline lifetime

    Parameters
    @@ -459,7 +472,7 @@

    Pipeline State
    -plot_graph_to_file(path, name_style='most_frequent', layout=None, fig=None)
    +plot_graph_to_file(path, name_style='most_frequent', layout=None, fig=None, node_kwargs=None, edge_kwargs=None, label_kwargs=None)

    Plot self.character_graph using reasonable parameters, and save the produced figure to a file

    @@ -470,6 +483,9 @@

    Pipeline StateUnion[Dict[Character, Tuple[float, float]], Dict[Character, ndarray], None]) – pre-computed graph layout

  • fig (Optional[Figure]) – if specified, this matplotlib figure will be used for plotting

  • +
  • node_kwargs (Optional[Dict[str, Any]]) – passed to nx.draw_networkx_nodes()

  • +
  • edge_kwargs (Optional[Dict[str, Any]]) – passed to nx.draw_networkx_nodes()

  • +
  • label_kwargs (Optional[Dict[str, Any]]) – passed to nx.draw_networkx_labels()

  • path (str) –

  • @@ -478,7 +494,7 @@

    Pipeline State
    -plot_graphs_to_dir(directory, name_style='most_frequent', cumulative=False, stable_layout=False, layout=None)
    +plot_graphs_to_dir(directory, name_style='most_frequent', cumulative=False, stable_layout=False, layout=None, node_kwargs=None, edge_kwargs=None, label_kwargs=None)

    Plot self.character_graph using reasonable default parameters, and save the produced figures in the specified directory.

    @@ -494,6 +510,9 @@

    Pipeline StateUnion[Dict[Character, Tuple[float, float]], Dict[Character, ndarray], None]) – pre-computed graph layout

    +
  • node_kwargs (Optional[List[Dict[str, Any]]]) – passed to nx.draw_networkx_nodes()

  • +
  • edge_kwargs (Optional[List[Dict[str, Any]]]) – passed to nx.draw_networkx_nodes()

  • +
  • label_kwargs (Optional[List[Dict[str, Any]]]) – passed to nx.draw_networkx_labels()

  • directory (str) –

  • @@ -578,16 +597,24 @@

    Pipeline Steps
    -_pipeline_init_(lang, progress_reporter)
    -

    Set the step configuration that is common to the whole pipeline.

    +_pipeline_init_(lang, progress_reporter, **kwargs) +

    Set the step configuration that is common to the whole +pipeline.

    Parameters
      -
    • lang (str) – ISO 639-3 language string

    • -
    • progress_report

    • +
    • lang (str) – the lang of the whole pipeline

    • progress_reporter (ProgressReporter) –

    • +
    • kwargs – additional pipeline parameters.

    +
    Return type
    +

    Optional[Dict[Literal[‘lang’, ‘progress_reporter’, ‘character_ner_tag’], Any]]

    +
    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +

    @@ -743,16 +770,14 @@

    NLTKTokenizer
    class renard.pipeline.tokenization.NLTKTokenizer
    -

    Construct a nltk word tokenizer

    +

    A NLTK-based tokenizer

    -__call__(text, chapters=None, **kwargs)
    -
    +__call__(text, **kwargs) +

    Call self as a function.

    +
    Parameters
    -
      -
    • text (str) –

    • -
    • chapters (Optional[List[str]]) –

    • -
    +

    text (str) –

    Return type

    Dict[str, Any]

    @@ -766,6 +791,26 @@

    NLTKTokenizerPipelineStep with a given configuration.

    +
    +
    +_pipeline_init_(lang, **kwargs)
    +

    Set the step configuration that is common to the whole +pipeline.

    +
    +
    Parameters
    +
      +
    • lang (str) – the lang of the whole pipeline

    • +
    • progress_reporter

    • +
    • kwargs – additional pipeline parameters.

    • +
    +
    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +
    +
    +
    +
    needs()
    @@ -958,16 +1003,21 @@

    BertNamedEntityRecognizer
    -_pipeline_init_(lang, progress_reporter)
    -

    Set the step configuration that is common to the whole pipeline.

    +_pipeline_init_(lang, **kwargs) +

    Set the step configuration that is common to the whole +pipeline.

    Parameters
      -
    • lang (str) – ISO 639-3 language string

    • -
    • progress_report

    • -
    • progress_reporter (ProgressReporter) –

    • +
    • lang (str) – the lang of the whole pipeline

    • +
    • progress_reporter

    • +
    • kwargs – additional pipeline parameters.

    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +

    @@ -1185,7 +1235,7 @@

    Coreference ResolutionBertCoreferenceResolver

    -class renard.pipeline.corefs.BertCoreferenceResolver(model=None, hugginface_model_id=None, batch_size=1, device='auto', tokenizer=None, block_size=512)
    +class renard.pipeline.corefs.BertCoreferenceResolver(model=None, hugginface_model_id=None, batch_size=1, device='auto', tokenizer=None, block_size=512, hierarchical_merging=False)

    A coreference resolver using BERT. Loosely based on ‘End-to-end Neural Coreference Resolution’ (Lee et at. 2017) and ‘BERT for coreference resolution’ (Joshi et al. 2019).

    @@ -1198,6 +1248,7 @@

    BertCoreferenceResolver

    device (Literal[‘auto’, ‘cuda’, ‘cpu’]) –

  • tokenizer (Optional[PreTrainedTokenizerFast]) –

  • block_size (int) –

  • +
  • hierarchical_merging (bool) –

  • @@ -1217,7 +1268,7 @@

    BertCoreferenceResolver
    -__init__(model=None, hugginface_model_id=None, batch_size=1, device='auto', tokenizer=None, block_size=512)
    +__init__(model=None, hugginface_model_id=None, batch_size=1, device='auto', tokenizer=None, block_size=512, hierarchical_merging=False)

    Note

    In the future, only mentions_per_tokens, @@ -1234,6 +1285,10 @@

    BertCoreferenceResolver

    device (Literal[‘auto’, ‘cuda’, ‘cpu’]) – computation device

  • block_size (int) – size of blocks to pass to the coreference model

  • +
  • hierarchical_merging (bool) – if True, attempts to use +tibert’s hierarchical merging feature. In that case, +blocks of size block_size are merged to perform +inference on the whole document.

  • model (Optional[BertForCoreferenceResolution]) –

  • hugginface_model_id (Optional[str]) –

  • tokenizer (Optional[PreTrainedTokenizerFast]) –

  • @@ -1244,16 +1299,21 @@

    BertCoreferenceResolver
    -_pipeline_init_(lang, progress_reporter)
    -

    Set the step configuration that is common to the whole pipeline.

    +_pipeline_init_(lang, **kwargs) +

    Set the step configuration that is common to the whole +pipeline.

    Parameters
      -
    • lang (str) – ISO 639-3 language string

    • -
    • progress_report

    • -
    • progress_reporter (ProgressReporter) –

    • +
    • lang (str) – the lang of the whole pipeline

    • +
    • progress_reporter

    • +
    • kwargs – additional pipeline parameters.

    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +

    @@ -1325,14 +1385,14 @@

    SpacyCorefereeCoreferenceResolver
    -__call__(text, tokens, chapter_tokens=None, **kwargs)
    +__call__(text, tokens, dynamic_blocks_tokens=None, **kwargs)

    Call self as a function.

    Parameters
    • text (str) –

    • tokens (List[str]) –

    • -
    • chapter_tokens (Optional[List[List[str]]]) –

    • +
    • dynamic_blocks_tokens (Optional[List[List[str]]]) –

    Return type
    @@ -1379,15 +1439,20 @@

    SpacyCorefereeCoreferenceResolver
    _pipeline_init_(lang, progress_reporter)
    -

    Set the step configuration that is common to the whole pipeline.

    +

    Set the step configuration that is common to the whole +pipeline.

    Parameters
      -
    • lang (str) – ISO 639-3 language string

    • -
    • progress_report

    • +
    • lang (str) – the lang of the whole pipeline

    • progress_reporter (ProgressReporter) –

    • +
    • kwargs – additional pipeline parameters.

    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +

    @@ -1706,6 +1771,27 @@

    NaiveCharacterUnifier

    +
    +
    +_pipeline_init_(lang, character_ner_tag, **kwargs)
    +

    Set the step configuration that is common to the whole +pipeline.

    +
    +
    Parameters
    +
      +
    • lang (str) – the lang of the whole pipeline

    • +
    • progress_reporter

    • +
    • kwargs – additional pipeline parameters.

    • +
    • character_ner_tag (str) –

    • +
    +
    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +
    +
    +
    +
    needs()
    @@ -1836,16 +1922,22 @@

    GraphRulesCharacterUnifier
    -_pipeline_init_(lang, progress_reporter)
    -

    Set the step configuration that is common to the whole pipeline.

    +_pipeline_init_(lang, character_ner_tag, **kwargs) +

    Set the step configuration that is common to the whole +pipeline.

    Parameters
      -
    • lang (str) – ISO 639-3 language string

    • -
    • progress_report

    • -
    • progress_reporter (ProgressReporter) –

    • +
    • lang (str) – the lang of the whole pipeline

    • +
    • progress_reporter

    • +
    • kwargs – additional pipeline parameters.

    • +
    • character_ner_tag (str) –

    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +

    @@ -2000,16 +2092,21 @@

    Speaker Attribution
    -_pipeline_init_(lang, progress_reporter)
    -

    Set the step configuration that is common to the whole pipeline.

    +_pipeline_init_(lang, **kwargs) +

    Set the step configuration that is common to the whole +pipeline.

    Parameters
      -
    • lang (str) – ISO 639-3 language string

    • -
    • progress_report

    • -
    • progress_reporter (ProgressReporter) –

    • +
    • lang (str) – the lang of the whole pipeline

    • +
    • progress_reporter

    • +
    • kwargs – additional pipeline parameters.

    +
    Returns
    +

    a step can return a dictionary of pipeline params if +it wish to modify some of these.

    +

    @@ -2044,7 +2141,7 @@

    Graph Extraction

    -class renard.pipeline.graph_extraction.CoOccurrencesGraphExtractor(co_occurrences_dist, dynamic=False, dynamic_window=None, dynamic_overlap=0, co_occurences_dist=None)
    +class renard.pipeline.graph_extraction.CoOccurrencesGraphExtractor(co_occurrences_dist=None, dynamic=False, dynamic_window=None, dynamic_overlap=0, additional_ner_classes=None)

    A simple character graph extractor using co-occurences

    Parameters
    @@ -2053,21 +2150,26 @@

    CoOccurrencesGraphExtractorbool) –

  • dynamic_window (Optional[int]) –

  • dynamic_overlap (int) –

  • -
  • co_occurences_dist (Union[int, Tuple[int, Literal[‘tokens’, ‘sentences’]], None]) –

  • +
  • additional_ner_classes (Optional[List[str]]) –

  • -__call__(characters, sentences, chapter_tokens=None, sentences_polarities=None, **kwargs)
    -

    Extract a characters graph

    +__call__(characters, sentences, char2token=None, dynamic_blocks=None, sentences_polarities=None, entities=None, co_occurrences_blocks=None, **kwargs) +

    Extract a co-occurrence character network.

    Parameters
      +
    • co_occurrences_blocks (Optional[Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]]) – custom blocks where +co-occurrences should be recorded. For example, this can +be used to perform chapter level co-occurrences.

    • characters (Set[Character]) –

    • sentences (List[List[str]]) –

    • -
    • chapter_tokens (Optional[List[List[str]]]) –

    • +
    • char2token (Optional[List[int]]) –

    • +
    • dynamic_blocks (Optional[Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]]) –

    • sentences_polarities (Optional[List[float]]) –

    • +
    • entities (Optional[List[NEREntity]]) –

    Return type
    @@ -2083,7 +2185,7 @@

    CoOccurrencesGraphExtractor
    -__init__(co_occurrences_dist, dynamic=False, dynamic_window=None, dynamic_overlap=0, co_occurences_dist=None)
    +__init__(co_occurrences_dist=None, dynamic=False, dynamic_window=None, dynamic_overlap=0, additional_ner_classes=None)
    Parameters

  • dynamic_window (Optional[int]) – dynamic window, in number of interactions. a dynamic window of n means that each returned graph will be formed by n interactions.

  • dynamic_overlap (int) – overlap, in number of interactions.

  • -
  • co_occurences_dist (Union[int, Tuple[int, Literal[‘tokens’, ‘sentences’]], None]) – same as co_occurrences_dist. -Included because of retro-compatibility, as it was a -previously included typo.

  • +
  • additional_ner_classes (Optional[List[str]]) – if specified, will include +entities other than characters in the final graph. No +attempt will be made at unifying the entities (for example, +“New York” will be distinct from “New York City”).

  • -
    -_extract_dynamic_graph(mentions, window, overlap, chapter_tokens, sentences, sentences_polarities)
    -
    -

    Note

    -

    only one of window or chapter_tokens should be specified

    -
    +
    +_create_co_occurrences_blocks(sentences, mentions)
    +

    Create co-occurrences blocks using +self.co_occurrences_dist. All entities within a block are +considered as co-occurring.

    Parameters
      -
    • mentions (List[Tuple[Character, NEREntity]]) – A list of character mentions, ordered by appearance

    • -
    • window (Optional[int]) – dynamic window, in tokens.

    • -
    • overlap (int) – window overlap

    • -
    • chapter_tokens (Optional[List[List[str]]]) – list of tokens for each chapter. If -given, one graph will be extracted per chapter.

    • sentences (List[List[str]]) –

    • -
    • sentences_polarities (Optional[List[float]]) –

    • +
    • mentions (List[Tuple[Any, NEREntity]]) –

    Return type
    -

    List[Graph]

    +

    Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]

    -
    -_extract_gephi_dynamic_graph(mentions, sentences)
    -
    +
    +_extract_dynamic_graph(mentions, window, overlap, dynamic_blocks, sentences, sentences_polarities, co_occurrences_blocks)
    +
    +

    Note

    +

    only one of window or dynamic_blocks_tokens should be specified

    +
    +
    Parameters
      -
    • mentions (List[Tuple[Character, NEREntity]]) – A list of character mentions, ordered by appearance

    • +
    • mentions (List[Tuple[Any, NEREntity]]) – A list of entity mentions, ordered by +appearance, each of the form (KEY MENTION). KEY +determines the unicity of the entity.

    • +
    • window (Optional[int]) – dynamic window, in tokens.

    • +
    • overlap (int) – window overlap

    • +
    • dynamic_blocks (Optional[Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]]) – boundaries of each dynamic block

    • +
    • co_occurrences_blocks (Optional[Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]]) – boundaries of each co-occurrences blocks

    • sentences (List[List[str]]) –

    • +
    • sentences_polarities (Optional[List[float]]) –

    Return type
    -

    Graph

    +

    List[Graph]

    -_extract_graph(mentions, sentences, sentences_polarities)
    +_extract_graph(mentions, sentences, sentences_polarities, co_occurrences_blocks)
    Parameters
      -
    • mentions (List[Tuple[Character, NEREntity]]) – A list of character mentions, ordered by -appearance

    • +
    • mentions (List[Tuple[Any, NEREntity]]) – A list of entity mentions, ordered by +appearance, each of the form (KEY MENTION). KEY +determines the unicity of the entity.

    • sentences (List[List[str]]) – if specified, sentences_polarities must be specified as well.

    • sentences_polarities (Optional[List[float]]) – if specified, sentences must @@ -2181,32 +2290,11 @@

      CoOccurrencesGraphExtractor -
      -_mentions_interact(mention_1, mention_2, sentences=None)
      -

      Check if two mentions are close enough to be in interactions.

      -
      -

      Note

      -

      the attribute self.co_occurrences_dist is used to know wether mentions are in co_occurences

      -
      -
      -
      Parameters
      -
        -
      • mention_1 (NEREntity) –

      • -
      • mention_2 (NEREntity) –

      • -
      • sentences (Optional[List[List[str]]]) –

      • +
      • co_occurrences_blocks (Optional[Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]]) – only unit ‘tokens’ is accepted.

      Return type
      -

      bool

      -
      -
      Returns
      -

      a boolean indicating wether the two mentions are co-occuring

      +

      Graph

    @@ -2277,16 +2365,20 @@

    CoOccurrencesGraphExtractor

    -class renard.pipeline.graph_extraction.ConversationalGraphExtractor(conversation_dist)
    -

    A graph extractor using conversation between characters

    +class renard.pipeline.graph_extraction.ConversationalGraphExtractor(graph_type, conversation_dist=None, ignore_self_mention=True) +

    A graph extractor using conversation between characters or +mentions.

    Note

    -

    This is an early version, that only supports static graphs -for now.

    +

    Does not support dynamic networks yet.

    Parameters
    -

    conversation_dist (Union[int, Tuple[int, Literal[‘tokens’, ‘sentences’]]]) –

    +
      +
    • graph_type (Literal[‘conversation’, ‘mention’]) –

    • +
    • conversation_dist (Union[int, Tuple[int, Literal[‘tokens’, ‘sentences’]], None]) –

    • +
    • ignore_self_mention (bool) –

    • +
    @@ -2310,11 +2402,22 @@

    ConversationalGraphExtractor
    -__init__(conversation_dist)
    -

    Initialize the PipelineStep with a given configuration.

    -
    +__init__(graph_type, conversation_dist=None, ignore_self_mention=True) +
    Parameters
    -

    conversation_dist (Union[int, Tuple[int, Literal[‘tokens’, ‘sentences’]]]) –

    +
      +
    • graph_type (Literal[‘conversation’, ‘mention’]) – either ‘conversation’ or ‘mention’. +‘conversation’ extracts an undirected graph with +interactions being extracted from the conversations +occurring between characters. ‘mention’ extracts a +directed graph where interactions are character mentions +of one another in quoted speech.

    • +
    • conversation_dist (Union[int, Tuple[int, Literal[‘tokens’, ‘sentences’]], None]) – must be supplied if graph_type is +‘conversation’. The distance between two quotation for +them to be considered as being interacting.

    • +
    • ignore_self_mention (bool) – if True, self mentions are +ignore for graph_type=='mention'

    • +
    @@ -2570,6 +2673,16 @@

    Hypocorism

    Utils

    +
    +
    +renard.utils.BlockBounds
    +

    A BlockBounds delimits blocks in either raw text (“characters”) or +tokenized text (“tokens”). It has the following form:

    +

    ([(block start, block end), …], unit)

    +

    see block_indices() to easily create BlockBounds

    +

    alias of Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]

    +
    +
    renard.utils.batch_index_select(input, dim, index)
    @@ -2595,6 +2708,46 @@

    Hypocorism +
    +renard.utils.block_bounds(blocks)
    +

    Return the boundaries of a series of blocks.

    +
    +
    Parameters
    +

    blocks (Union[List[str], List[List[str]]]) – either a list of raw texts or a list of tokenized +texts.

    +
    +
    Return type
    +

    Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]

    +
    +
    Returns
    +

    A BlockBounds with the correct unit.

    +
    +
    +

    + +
    +
    +renard.utils.charbb2tokenbb(char_bb, char2token)
    +

    Convert a BlockBounds in characters to a BlockBounds in +tokens.

    +
    +
    Parameters
    +
      +
    • char_bb (Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]) – block bounds, in ‘characters’.

    • +
    • char2token (List[int]) – a list with char2token[i] being the index +of token corresponding to character i.

    • +
    +
    +
    Return type
    +

    Tuple[List[Tuple[int, int]], Literal[‘characters’, ‘tokens’]]

    +
    +
    Returns
    +

    a BlockBounds, in ‘tokens’.

    +
    +
    +
    +
    renard.utils.search_pattern(seq, pattern)
    @@ -2737,7 +2890,7 @@

    Hypocorism

    Plot utils

    -renard.plot_utils.plot_nx_graph_reasonably(G, ax=None, layout=None)
    +renard.plot_utils.plot_nx_graph_reasonably(G, ax=None, layout=None, node_kwargs=None, edge_kwargs=None, label_kwargs=None)

    Try to plot a nx.Graph with ‘reasonable’ parameters

    Parameters
    @@ -2746,6 +2899,9 @@

    HypocorismOptional[dict]) – if given, this graph layout will be applied. Otherwise, use layout_nx_graph_reasonably().

    +
  • node_kwargs (Optional[Dict[str, Any]]) – passed to nx.draw_networkx_nodes()

  • +
  • edge_kwargs (Optional[Dict[str, Any]]) – passed to nx.draw_networkx_nodes()

  • +
  • label_kwargs (Optional[Dict[str, Any]]) – passed to nx.draw_networkx_labels()

  • diff --git a/searchindex.js b/searchindex.js index 111614b..e7fa982 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["contributing","extending","index","installation","introduction","pipeline","reference"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":5,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["contributing.rst","extending.rst","index.rst","installation.rst","introduction.rst","pipeline.rst","reference.rst"],objects:{"renard.graph_utils":[[6,1,1,"","cumulative_graph"],[6,1,1,"","dynamic_graph_to_gephi_graph"],[6,1,1,"","graph_edges_attributes"],[6,1,1,"","graph_with_names"],[6,1,1,"","layout_with_names"]],"renard.ner_utils":[[6,2,1,"","DataCollatorForTokenClassificationWithBatchEncoding"],[6,2,1,"","NERDataset"],[6,1,1,"","_tokenize_and_align_labels"],[6,1,1,"","hgdataset_from_conll2002"],[6,1,1,"","load_conll2002_bio"],[6,1,1,"","ner_entities"]],"renard.ner_utils.DataCollatorForTokenClassificationWithBatchEncoding":[[6,3,1,"","__call__"],[6,3,1,"","__init__"]],"renard.ner_utils.NERDataset":[[6,3,1,"","__init__"]],"renard.pipeline":[[6,0,0,"-","preprocessing"],[6,0,0,"-","stanford_corenlp"]],"renard.pipeline.character_unification":[[6,2,1,"","Character"],[6,2,1,"","GraphRulesCharacterUnifier"],[6,2,1,"","NaiveCharacterUnifier"]],"renard.pipeline.character_unification.Character":[[6,3,1,"","__delattr__"],[6,3,1,"","__eq__"],[6,3,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"],[6,3,1,"","__setattr__"]],"renard.pipeline.character_unification.GraphRulesCharacterUnifier":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","infer_name_gender"],[6,3,1,"","names_are_related_after_title_removal"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.character_unification.NaiveCharacterUnifier":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.core":[[6,2,1,"","Mention"],[6,2,1,"","Pipeline"],[6,2,1,"","PipelineState"],[6,2,1,"","PipelineStep"]],"renard.pipeline.core.Mention":[[6,3,1,"","__eq__"],[6,3,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"]],"renard.pipeline.core.Pipeline":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_non_ignored_steps"],[6,3,1,"","_pipeline_init_steps"],[6,3,1,"","check_valid"],[6,3,1,"","rerun_from"]],"renard.pipeline.core.PipelineState":[[6,3,1,"","__eq__"],[6,4,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"],[6,4,1,"","chapter_tokens"],[6,4,1,"","chapters"],[6,4,1,"","character_network"],[6,4,1,"","characters"],[6,4,1,"","corefs"],[6,4,1,"","entities"],[6,3,1,"","export_graph_to_gexf"],[6,3,1,"","get_character"],[6,3,1,"","plot_graph"],[6,3,1,"","plot_graph_to_file"],[6,3,1,"","plot_graphs_to_dir"],[6,4,1,"","quotes"],[6,4,1,"","sentences"],[6,4,1,"","sentences_polarities"],[6,4,1,"","speakers"],[6,4,1,"","text"],[6,4,1,"","tokens"]],"renard.pipeline.core.PipelineStep":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.corefs":[[6,2,1,"","BertCoreferenceResolver"],[6,2,1,"","SpacyCorefereeCoreferenceResolver"]],"renard.pipeline.corefs.BertCoreferenceResolver":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.corefs.SpacyCorefereeCoreferenceResolver":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_coreferee_get_mention_tokens"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","_spacy_try_infer_spaces"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"]],"renard.pipeline.graph_extraction":[[6,2,1,"","CoOccurrencesGraphExtractor"],[6,2,1,"","ConversationalGraphExtractor"]],"renard.pipeline.graph_extraction.CoOccurrencesGraphExtractor":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_extract_dynamic_graph"],[6,3,1,"","_extract_gephi_dynamic_graph"],[6,3,1,"","_extract_graph"],[6,3,1,"","_mentions_interact"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.graph_extraction.ConversationalGraphExtractor":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.ner":[[6,2,1,"","BertNamedEntityRecognizer"],[6,2,1,"","NEREntity"],[6,2,1,"","NLTKNamedEntityRecognizer"]],"renard.pipeline.ner.BertNamedEntityRecognizer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","batch_labels"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.ner.NEREntity":[[6,3,1,"","__eq__"],[6,3,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"],[6,3,1,"","shifted"],[6,4,1,"","tag"]],"renard.pipeline.ner.NLTKNamedEntityRecognizer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.preprocessing":[[6,2,1,"","CustomSubstitutionPreprocessor"]],"renard.pipeline.preprocessing.CustomSubstitutionPreprocessor":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.quote_detection":[[6,2,1,"","QuoteDetector"]],"renard.pipeline.quote_detection.QuoteDetector":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.sentiment_analysis":[[6,2,1,"","NLTKSentimentAnalyzer"]],"renard.pipeline.sentiment_analysis.NLTKSentimentAnalyzer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.speaker_attribution":[[6,2,1,"","BertSpeakerDetector"]],"renard.pipeline.speaker_attribution.BertSpeakerDetector":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.stanford_corenlp":[[6,2,1,"","StanfordCoreNLPPipeline"],[6,1,1,"","corenlp_annotations_bio_tags"]],"renard.pipeline.stanford_corenlp.StanfordCoreNLPPipeline":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.tokenization":[[6,2,1,"","NLTKTokenizer"]],"renard.pipeline.tokenization.NLTKTokenizer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.plot_utils":[[6,1,1,"","plot_nx_graph_reasonably"]],"renard.resources.hypocorisms":[[6,2,1,"","HypocorismGazetteer"]],"renard.resources.hypocorisms.HypocorismGazetteer":[[6,3,1,"","__init__"],[6,3,1,"","_add_hypocorism_"],[6,3,1,"","are_related"],[6,3,1,"","get_nicknames"],[6,3,1,"","get_possible_names"]],"renard.utils":[[6,1,1,"","batch_index_select"],[6,1,1,"","search_pattern"],[6,1,1,"","spans"]],renard:[[6,0,0,"-","graph_utils"],[6,0,0,"-","ner_utils"],[6,0,0,"-","plot_utils"],[6,0,0,"-","utils"]]},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","method","Python method"],"4":["py","attribute","Python attribute"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:method","4":"py:attribute"},terms:{"0":6,"0x7fd9e9115900":5,"1":[0,5,6],"10":5,"10000":6,"14":6,"2":[5,6],"20":5,"2002":6,"2014":6,"2015":6,"2017":6,"2019":6,"25":5,"3":[1,5,6],"4":6,"512":6,"639":[1,5,6],"8":6,"8g":6,"9115":6,"9999999":6,"abstract":6,"boolean":6,"case":[5,6],"class":[0,1,5,6],"default":[1,5,6],"do":[0,5,6],"export":[5,6],"final":[5,6],"float":6,"function":[0,1,5,6],"import":[1,5],"int":6,"long":5,"new":[0,2],"return":[0,1,5,6],"static":6,"throw":5,"true":[5,6],"try":6,"while":6,A:[5,6],As:[1,5],By:[1,5],For:5,If:[0,3,5,6],In:[0,5,6],It:5,One:6,The:[0,2,3,6],These:[4,5],To:5,__call__:[1,5,6],__delattr__:6,__eq__:6,__hash__:6,__init__:[1,6],__repr__:6,__setattr__:6,_add_hypocorism_:6,_context_mask:6,_coreferee_get_mention_token:6,_extract_dynamic_graph:6,_extract_gephi_dynamic_graph:6,_extract_graph:6,_mentions_interact:6,_must_:6,_non_ignored_step:6,_pipeline_init_:[1,6],_pipeline_init_step:6,_spacy_try_infer_spac:6,_tokenize_and_align_label:6,abov:5,accept:6,access:5,accordingli:6,ad:6,adapt:6,add:[0,6],addit:[4,6],addition:1,additional_hypocor:6,after:6,al:6,algorithm:[4,6],alia:[5,6],align:6,all:[0,5,6],alloc:6,allow:[4,5,6],along:6,alreadi:[5,6],also:[0,3,4,5,6],an:[0,1,2,4,6],analysi:[2,4],analyz:6,ani:[1,5,6],ann:6,annot:[0,5,6],annotate_coref:6,anoth:5,antecedents_nb:6,apach:6,apparit:6,appear:6,appli:[5,6],applic:[0,4],applyfinegrain:6,ar:[0,4,5,6],arbor:6,are_rel:6,arg:6,argument:[0,1,5,6],arrai:6,assign:5,associ:6,attribut:[1,2],audienc:2,auto:6,autom:4,automat:6,avail:2,avoid:6,ax:6,b:6,base:[5,6],basic:[1,6],basictokenizerstep:1,batch:6,batch_i:6,batch_index_select:6,batch_label:6,batch_siz:6,batchencod:6,be_quiet:6,becaus:[5,6],befor:5,begin:6,behind:0,being:6,below:5,bert:6,bert_pipelin:5,bertcoreferenceresolv:5,bertforcoreferenceresolut:6,bertnamedentityrecogn:5,bertspeakerdetector:[5,6],better:0,between:[4,6],biggest:6,bio:6,bio_tag:6,black:0,blob:6,block:[5,6],block_siz:6,bool:6,both:6,bring:4,bypass:6,c:6,call:[1,5,6],callabl:6,can:[0,1,3,4,5,6],cannot:6,carltonnorthern:6,central:5,certain:5,chain:6,chapter:6,chapter_token:6,charact:[2,4],character_graph:6,character_network:[5,6],character_unif:[5,6],check:[0,5,6],check_valid:6,choos:6,chosen:6,chunk:6,chunk_siz:6,ci:0,client_properti:6,close:6,co:6,co_occur:6,co_occurences_dist:6,co_occurrences_dist:[5,6],code:[1,2,5,6],colab:6,collect:6,com:6,come:6,common:6,comparison:6,compat:6,complet:[0,4],comput:[5,6],concept:5,confer:6,config:6,configur:[5,6],conll2022:6,conll:6,consid:6,consist:0,constant:6,construct:6,contain:5,context:6,context_mask:6,context_retriev:6,contribut:2,conveni:5,convers:6,conversation_dist:6,conversationalgraphextractor:5,convert:6,cooccurencesgraphextractor:5,cooccurrencesgraphextractor:5,core:[0,1,2],coref:6,coref_model:6,corefer:[2,3],corefere:[3,6],corefereebrok:6,corefre:6,corefs_algorithm:6,corenlp:[2,3],corenlp_annotations_bio_tag:6,corenlp_custom_properti:6,correct:6,correctli:[5,6],correspond:6,costli:0,cpu:6,creat:[0,2,5,6],cuda:6,cumul:6,cumulative_graph:6,current:6,custom:6,customsubstitutionpreprocessor:[5,6],cut:[5,6],data:6,datacollatorfortokenclassif:6,datacollatorfortokenclassificationwithbatchencod:6,dataset:6,debug:5,declar:1,def:1,default_quote_pair:6,defin:6,delattr:6,depend:[1,3],deriv:[4,6],descript:6,detail:[5,6],detect:2,determinist:6,devic:6,dict:[1,6],dictionari:6,differ:5,digit:4,dim:6,dimens:6,diminut:6,directli:6,directori:[0,5,6],disabl:6,discard:6,discuss:[0,6],displai:5,distanc:6,doc:[0,6],docstr:0,document:[0,4,5,6],doe:[0,5,6],doesn:5,don:[5,6],done:5,downstream:4,draw:6,dure:5,dweight:6,dynam:[2,6],dynamic_graph_to_gephi_graph:6,dynamic_overlap:6,dynamic_window:[5,6],e:[3,6],each:[1,4,5,6],earli:6,easili:5,edg:[4,6],eighth:6,either:6,element:6,embed:4,en:6,encod:[4,6],encount:0,encourag:0,end:6,end_idx:6,eng:[1,6],english:5,enough:6,entir:[0,6],entiti:2,environ:3,equal:6,error:[5,6],escap:6,et:6,even:6,ever:3,evolv:5,exampl:[1,4,5,6],except:[5,6],execut:[5,6],exist:0,expect:[5,6],explain:[0,5],explicit:5,explor:5,export_graph_to_gexf:[5,6],extend:2,extra:[3,6],extract:[2,4],extractor:6,f:5,fals:6,featur:[0,5,6],few:5,fig:6,figur:6,file:[0,6],first:[5,6],fledg:5,flexibl:6,follow:[1,3,5],forget:0,form:6,format:[0,5,6],found:6,four:1,fra:5,french:5,from:[1,4,5,6],from_step:6,frozenset:6,fulfil:5,full:[5,6],fulli:5,further:0,futur:6,g:6,gazeet:6,gazett:6,gender:6,gener:6,gephi:[5,6],get:[3,6],get_charact:6,get_nicknam:6,get_possible_nam:6,gexf:[5,6],gilbert:6,github:[0,6],give:0,given:6,global:6,googl:6,graph2vec:4,graph:[2,4],graph_edges_attribut:6,graph_extract:[5,6],graph_extractor_kwarg:5,graph_start_idx:6,graph_util:6,graph_with_nam:6,graphrulescharacterunifi:5,guidelin:2,ha:[5,6],hack:6,hash:6,hatch:6,have:[4,5,6],head:6,help:5,here:[1,5,6],hgdataset_from_conll2002:6,high:[0,6],hname_const:6,hopefulli:0,howev:6,html:6,http:6,hugginfac:6,hugginface_model_id:6,huggingfac:6,huggingface_model_id:6,human:4,humannam:6,hutto:6,hypocorismgazett:6,hypothesi:0,i:6,icwsm:6,id:6,ignor:6,ignore_lone_titl:6,ignored_step:6,implement:[1,6],implemt:6,includ:6,index:[2,6],index_select:6,indic:[5,6],infer:6,infer_name_gend:6,inform:[0,1,4,5],init:[1,6],initi:6,input:[4,6],insensit:6,inspir:6,instal:[2,6],instead:6,intend:[2,6],interact:6,intern:6,intract:5,introduct:2,intuit:[4,5],invalid:5,io:6,ipynb:6,iso:[1,5,6],issu:[0,5,6],iter:6,its:[5,6],j:6,joshi:6,june:6,keep:6,kei:6,kept:6,know:6,kwarg:[1,6],labatutandbost2019:4,label:6,label_all_token:6,lang:[5,6],languag:[1,4,5,6],last:6,layout:6,layout_nx_graph_reason:6,layout_with_nam:6,least:1,lee:6,length:6,letter:6,level:0,leverag:5,librari:[5,6],licens:6,lifetim:6,limit:6,line:6,link:6,link_corefs_ment:6,list:[5,6],liter:6,literari:4,live:0,load:6,load_conll2002_bio:6,local:0,locat:6,longest:6,lookup:6,loos:6,lot:6,m:[0,6],made:6,mai:[4,5,6],maintain:0,make:[0,5],manag:3,manual:[2,5,6],map:6,mask:6,master:6,match:[0,6],mathemat:4,matplotlib:[5,6],max:6,max_char_length:6,max_chunk_s:6,max_len:6,max_span_s:6,maximum:6,mean:6,meanwhil:5,media:6,memori:6,mention:[0,5,6],mention_1:6,mention_2:6,mention_head:6,mentions_per_token:6,merg:[0,6],messag:5,method:[1,5,6],mi:6,might:6,min_appear:[5,6],minimum:6,misc:6,miss:6,model:[5,6],modul:[0,2,5],more:[4,5,6],most_frequ:6,mr:6,ms:6,multilingu:2,multipl:5,must:[1,6],my_doc:5,my_doc_in_french:5,my_script:3,my_tokenization_funct:5,n:6,naivecharacterunifi:5,name1:6,name2:6,name:2,name_styl:6,names_are_related_after_title_remov:6,narr:4,natur:4,ndarrai:6,necessari:0,need:[1,5,6],neeed:6,ner:5,ner_ent:6,ner_util:6,nercontextretriev:6,nerdataset:6,nerent:6,network:[4,5,6],networkx:[5,6],neural:6,newlin:6,next:6,nicknam:6,nlp:[4,5,6],nltk:[5,6],nltknamedentityrecogn:5,nltksentimentanalyz:5,nltktoken:5,nnp:6,node:[4,6],non:0,none:6,normal:6,note:[5,6],notebook:6,novel:6,now:6,number:6,nx:6,object:[4,5,6],occur:[5,6],onc:0,one:[5,6],ones:0,onli:[5,6],onlin:0,open:[0,5,6],option:[1,6],optional_ne:[1,6],order:[4,5,6],org:6,organ:6,origin:6,other:6,otherwis:6,our:0,out:[5,6],output:[2,6],overlap:6,overrid:5,overridden:[1,6],overriden:6,overview:[0,2,4],own:6,pad_to_multiple_of:6,page:2,paramet:[5,6],parsimoni:6,part:[5,6],partial_match:6,particular:[0,6],pass:[0,1,5,6],patch:0,path:6,pattern:6,per:6,perform:[4,5,6],person:6,pip:2,pipelin:[1,2,3,4],pipelinest:[1,5,6],pipelinestep:[1,5,6],platform:5,plot:5,plot_graph:[5,6],plot_graph_to_fil:[5,6],plot_graphs_to_dir:[5,6],plot_nx_graph_reason:6,plot_util:6,plt:5,po:6,poetri:[3,6],polar:6,posit:6,possibl:[0,4,5,6],practition:4,pre:6,preconfigur:5,predict:6,prefix:6,preprocess:2,preprocessor:6,pretrainedmodel:6,pretrainedtokenizerfast:6,previou:[5,6],previous:6,problem:0,process:4,produc:[1,6],product:[1,6],progress:6,progress_report:6,progressreport:6,project:3,pronoun:6,propag:5,properti:6,provid:5,pull:0,py:3,pyplot:5,pytest:0,python:[0,3,4,6],pytorch:6,qualiti:2,quot:2,quote_detect:6,quote_pair:6,quotedetector:5,r:6,ram:6,rather:[5,6],rational:0,re:6,read:[5,6],readm:6,reason:6,recogn:6,recognit:2,recomput:6,refer:[0,2],regardless:5,regex:[5,6],regroup:5,relabel:6,relat:6,relationship:[4,6],relev:[0,6],reli:0,remov:6,renard:[0,3,4,5,6],renard_test_al:0,report:6,repositori:0,repr:6,repres:[4,5],represent:4,representend:5,request:0,requir:[5,6],rerun_from:6,research:[4,6],resolut:2,resolv:[3,6],resolve_inconsist:6,resort:6,resourc:2,result:[5,6],retriev:6,retro:6,richardpaulhudson:6,rst:0,rule:6,run:[0,1,3,5,6],runtim:6,runtm:6,s:[0,5,6],same:[1,5,6],satisfi:[5,6],satisifi:0,save:[5,6],script:3,scrollto:6,search:[2,6],search_pattern:6,second:6,see:[4,5,6],seen:5,select:6,self:[1,6],sentenc:[5,6],sentences_polar:6,sentiment:2,sentiment_analysi:6,separ:6,seq:6,sequenc:6,sequenti:[5,6],server:6,server_kwarg:6,server_timeout:6,set:[1,5,6],setattr:6,sever:[4,5,6],shall:6,shape:6,shell:3,shift:6,shortest:6,should:[0,1,5,6],show:[0,5],simpl:[5,6],simpli:3,simplic:5,sinc:6,singl:[4,6],size:6,slider:[5,6],smallest:6,so:[0,6],social:6,solver:3,some:6,sometim:6,sourc:0,space:6,spaci:[3,6],spacycorefereecoreferenceresolv:5,span:6,speaker:2,speaker_attribut:6,special:[1,5],specif:0,specifi:[1,5,6],spinx:0,split:[1,6],spuriou:6,stable_layout:6,stai:0,stand:6,stanford:[2,3],stanford_corenlp:6,stanfordcorenlppipelin:[5,6],stanfordnlp:6,stanza:[3,6],start:6,start_idx:6,state:[1,2],statist:6,step:2,still:6,store:6,stori:4,str:[1,6],string:[1,5,6],strongest:6,style:[0,6],substit:6,substition_rul:6,substitut:[5,6],support:[1,2,6],supported_lang:[1,5,6],sure:0,t:[5,6],tag:6,tag_conversion_map:6,target:2,task:[4,5],tensor:6,termin:6,test:0,text:[1,4,5,6],than:[5,6],thei:[4,6],them:[5,6],therefor:5,thi:[1,5,6],thing:5,those:[5,6],though:6,through:5,tibert:5,time:[0,1,5,6],timeout:6,timestep:6,titl:6,token:[1,2],token_classif:6,tool:[4,5],torch:6,tqdm:6,trade:6,train:6,transform:6,trivial:0,troubleshoot:5,tupl:6,turn:6,two:[4,6],txt:5,type:[0,1,6],typevar:6,typo:6,under:[3,6],unif:2,unifi:6,union:6,uniqu:5,unit:6,unknown:6,unsatisfi:5,up:[0,5,6],us:[0,1,2,4,5,6],usag:[2,5,6],usual:[1,5,6],util:2,vader:[5,6],vala:6,valid:[5,6],valu:[1,5,6],valueerror:5,variabl:6,vc0bsbliirjq:6,vectori:4,version:6,virtual:3,visual:5,visualis:[4,5],wa:[5,6],wai:[1,6],want:[0,3,5],warn:6,we:0,weblog:6,weight:6,weirdli:6,welcom:0,well:6,wether:6,when:[0,4,5,6],where:[4,6],which:[5,6],whole:6,why:[5,6],wide:1,window:6,wise:6,without:6,wont:6,word:6,wordpiec:6,work:5,would:5,wp_label:6,write:0,write_gexf:6,written:4,yet:6,you:[0,3,5,6],your:[0,5],yourself:[0,5]},titles:["Contributing","Extending Renard","Welcome to Renard\u2019s documentation!","Installation","Introduction","The Pipeline","Reference"],titleterms:{"new":1,The:5,an:5,analysi:[5,6],attribut:[5,6],audienc:4,avail:5,bertcoreferenceresolv:6,bertnamedentityrecogn:6,charact:[5,6],code:0,content:2,contribut:0,conversationalgraphextractor:6,cooccurrencesgraphextractor:6,core:6,corefer:[5,6],corenlp:6,creat:1,detect:[5,6],document:2,dynam:5,entiti:[5,6],extend:1,extract:[5,6],graph:[5,6],graphrulescharacterunifi:6,guidelin:0,hypocor:6,indic:2,instal:3,intend:4,introduct:4,manual:3,multilingu:5,naivecharacterunifi:6,name:[5,6],ner:6,nltknamedentityrecogn:6,nltksentimentanalyz:6,nltktoken:6,output:5,overview:5,pip:3,pipelin:[5,6],plot:6,preprocess:[5,6],qualiti:0,quot:[5,6],quotedetector:6,recognit:[5,6],refer:6,renard:[1,2],resolut:[5,6],resourc:6,s:2,sentiment:[5,6],spacycorefereecoreferenceresolv:6,speaker:[5,6],stanford:6,state:[5,6],step:[1,5,6],support:5,tabl:2,target:4,token:[5,6],unif:6,us:3,usag:4,util:6,welcom:2}}) \ No newline at end of file +Search.setIndex({docnames:["contributing","extending","index","installation","introduction","pipeline","reference"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":5,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["contributing.rst","extending.rst","index.rst","installation.rst","introduction.rst","pipeline.rst","reference.rst"],objects:{"renard.graph_utils":[[6,1,1,"","cumulative_graph"],[6,1,1,"","dynamic_graph_to_gephi_graph"],[6,1,1,"","graph_edges_attributes"],[6,1,1,"","graph_with_names"],[6,1,1,"","layout_with_names"]],"renard.ner_utils":[[6,2,1,"","DataCollatorForTokenClassificationWithBatchEncoding"],[6,2,1,"","NERDataset"],[6,1,1,"","_tokenize_and_align_labels"],[6,1,1,"","hgdataset_from_conll2002"],[6,1,1,"","load_conll2002_bio"],[6,1,1,"","ner_entities"]],"renard.ner_utils.DataCollatorForTokenClassificationWithBatchEncoding":[[6,3,1,"","__call__"],[6,3,1,"","__init__"]],"renard.ner_utils.NERDataset":[[6,3,1,"","__init__"]],"renard.pipeline":[[6,0,0,"-","preprocessing"],[6,0,0,"-","stanford_corenlp"]],"renard.pipeline.character_unification":[[6,2,1,"","Character"],[6,2,1,"","GraphRulesCharacterUnifier"],[6,2,1,"","NaiveCharacterUnifier"]],"renard.pipeline.character_unification.Character":[[6,3,1,"","__delattr__"],[6,3,1,"","__eq__"],[6,3,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"],[6,3,1,"","__setattr__"]],"renard.pipeline.character_unification.GraphRulesCharacterUnifier":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","infer_name_gender"],[6,3,1,"","names_are_related_after_title_removal"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.character_unification.NaiveCharacterUnifier":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.core":[[6,2,1,"","Mention"],[6,2,1,"","Pipeline"],[6,2,1,"","PipelineState"],[6,2,1,"","PipelineStep"]],"renard.pipeline.core.Mention":[[6,3,1,"","__eq__"],[6,3,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"]],"renard.pipeline.core.Pipeline":[[6,4,1,"","PipelineParameter"],[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_non_ignored_steps"],[6,3,1,"","_pipeline_init_steps_"],[6,3,1,"","check_valid"],[6,3,1,"","rerun_from"]],"renard.pipeline.core.PipelineState":[[6,3,1,"","__eq__"],[6,4,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"],[6,4,1,"","char2token"],[6,4,1,"","character_network"],[6,4,1,"","characters"],[6,4,1,"","corefs"],[6,4,1,"","dynamic_blocks"],[6,4,1,"","entities"],[6,3,1,"","export_graph_to_gexf"],[6,3,1,"","get_character"],[6,3,1,"","plot_graph"],[6,3,1,"","plot_graph_to_file"],[6,3,1,"","plot_graphs_to_dir"],[6,4,1,"","quotes"],[6,4,1,"","sentences"],[6,4,1,"","sentences_polarities"],[6,4,1,"","speakers"],[6,4,1,"","text"],[6,4,1,"","tokens"]],"renard.pipeline.core.PipelineStep":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.corefs":[[6,2,1,"","BertCoreferenceResolver"],[6,2,1,"","SpacyCorefereeCoreferenceResolver"]],"renard.pipeline.corefs.BertCoreferenceResolver":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.corefs.SpacyCorefereeCoreferenceResolver":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_coreferee_get_mention_tokens"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","_spacy_try_infer_spaces"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"]],"renard.pipeline.graph_extraction":[[6,2,1,"","CoOccurrencesGraphExtractor"],[6,2,1,"","ConversationalGraphExtractor"]],"renard.pipeline.graph_extraction.CoOccurrencesGraphExtractor":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_create_co_occurrences_blocks"],[6,3,1,"","_extract_dynamic_graph"],[6,3,1,"","_extract_graph"],[6,3,1,"","needs"],[6,3,1,"","optional_needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.graph_extraction.ConversationalGraphExtractor":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.ner":[[6,2,1,"","BertNamedEntityRecognizer"],[6,2,1,"","NEREntity"],[6,2,1,"","NLTKNamedEntityRecognizer"]],"renard.pipeline.ner.BertNamedEntityRecognizer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","batch_labels"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.ner.NEREntity":[[6,3,1,"","__eq__"],[6,3,1,"","__hash__"],[6,3,1,"","__init__"],[6,3,1,"","__repr__"],[6,3,1,"","shifted"],[6,4,1,"","tag"]],"renard.pipeline.ner.NLTKNamedEntityRecognizer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.preprocessing":[[6,2,1,"","CustomSubstitutionPreprocessor"]],"renard.pipeline.preprocessing.CustomSubstitutionPreprocessor":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.quote_detection":[[6,2,1,"","QuoteDetector"]],"renard.pipeline.quote_detection.QuoteDetector":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.pipeline.sentiment_analysis":[[6,2,1,"","NLTKSentimentAnalyzer"]],"renard.pipeline.sentiment_analysis.NLTKSentimentAnalyzer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.speaker_attribution":[[6,2,1,"","BertSpeakerDetector"]],"renard.pipeline.speaker_attribution.BertSpeakerDetector":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.stanford_corenlp":[[6,2,1,"","StanfordCoreNLPPipeline"],[6,1,1,"","corenlp_annotations_bio_tags"]],"renard.pipeline.stanford_corenlp.StanfordCoreNLPPipeline":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","needs"],[6,3,1,"","production"]],"renard.pipeline.tokenization":[[6,2,1,"","NLTKTokenizer"]],"renard.pipeline.tokenization.NLTKTokenizer":[[6,3,1,"","__call__"],[6,3,1,"","__init__"],[6,3,1,"","_pipeline_init_"],[6,3,1,"","needs"],[6,3,1,"","production"],[6,3,1,"","supported_langs"]],"renard.plot_utils":[[6,1,1,"","plot_nx_graph_reasonably"]],"renard.resources.hypocorisms":[[6,2,1,"","HypocorismGazetteer"]],"renard.resources.hypocorisms.HypocorismGazetteer":[[6,3,1,"","__init__"],[6,3,1,"","_add_hypocorism_"],[6,3,1,"","are_related"],[6,3,1,"","get_nicknames"],[6,3,1,"","get_possible_names"]],"renard.utils":[[6,5,1,"","BlockBounds"],[6,1,1,"","batch_index_select"],[6,1,1,"","block_bounds"],[6,1,1,"","charbb2tokenbb"],[6,1,1,"","search_pattern"],[6,1,1,"","spans"]],renard:[[6,0,0,"-","graph_utils"],[6,0,0,"-","ner_utils"],[6,0,0,"-","plot_utils"],[6,0,0,"-","utils"]]},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","method","Python method"],"4":["py","attribute","Python attribute"],"5":["py","data","Python data"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:method","4":"py:attribute","5":"py:data"},terms:{"0":6,"0x7fd9e9115900":5,"1":[0,5,6],"10":5,"10000":6,"14":6,"2":[5,6],"20":5,"2002":6,"2014":6,"2015":6,"2017":6,"2019":6,"25":5,"3":[1,5,6],"4":6,"512":6,"639":[1,5,6],"8":6,"8g":6,"9115":6,"9999999":6,"abstract":6,"case":[5,6],"class":[0,1,5,6],"default":[1,5,6],"do":[0,5,6],"export":[5,6],"final":[5,6],"float":6,"function":[0,1,5,6],"import":[1,5],"int":6,"long":5,"new":[0,2,5,6],"return":[0,1,5,6],"static":6,"throw":5,"true":[5,6],"try":6,"while":[5,6],A:[5,6],As:[1,5],By:[1,5],For:[5,6],If:[0,3,5,6],In:[0,5,6],It:[5,6],No:6,One:6,The:[0,2,3,6],These:[0,4,5],To:5,__call__:[1,5,6],__delattr__:6,__eq__:6,__hash__:6,__init__:[1,6],__repr__:6,__setattr__:6,_add_hypocorism_:6,_context_mask:6,_coreferee_get_mention_token:6,_create_co_occurrences_block:6,_extract_dynamic_graph:6,_extract_graph:6,_must_:6,_non_ignored_step:6,_pipeline_init_:[1,6],_pipeline_init_step:6,_pipeline_init_steps_:6,_spacy_try_infer_spac:6,_tokenize_and_align_label:6,abov:5,accept:6,access:5,accord:6,accordingli:6,ad:6,adapt:6,add:[0,6],addit:[4,6],addition:1,additional_hypocor:6,additional_ner_class:6,after:6,al:6,algorithm:[4,6],alia:[5,6],align:6,all:[0,5,6],alloc:6,allow:[4,5,6],along:6,alreadi:[5,6],also:[0,3,4,5,6],an:[0,1,2,4,6],analysi:[2,4],analyz:6,ani:[1,5,6],ann:6,annot:[0,5,6],annotate_coref:6,anoth:[5,6],antecedents_nb:6,apach:6,apparit:6,appear:6,appli:[5,6],applic:[0,4],applyfinegrain:6,ar:[0,4,5,6],arbor:6,are_rel:6,arg:6,argument:[0,1,5,6],arrai:6,assign:5,associ:6,attempt:6,attribut:[1,2],audienc:2,auto:6,autom:4,automat:[5,6],avail:2,avoid:6,ax:6,b:6,base:[5,6],basic:[1,6],basictokenizerstep:1,batch:6,batch_i:6,batch_index_select:6,batch_label:6,batch_siz:6,batchencod:6,be_quiet:6,becaus:[5,6],befor:5,begin:6,behind:0,being:6,below:5,bert:6,bert_pipelin:5,bertcoreferenceresolv:5,bertforcoreferenceresolut:6,bertnamedentityrecogn:5,bertspeakerdetector:[5,6],better:0,between:[4,6],biggest:6,bio:6,bio_tag:6,black:0,blob:6,block:[5,6],block_bound:[5,6],block_indic:6,block_siz:6,blockbound:6,bool:6,both:6,bound:6,boundari:[5,6],bring:4,bypass:6,c:6,call:[1,5,6],callabl:6,can:[0,1,3,4,5,6],cannot:6,carltonnorthern:6,central:5,certain:5,chain:6,chapter:[5,6],char2token:6,char_bb:6,charact:[2,4],character_graph:6,character_ner_tag:6,character_network:[5,6],character_unif:[5,6],charbb2tokenbb:6,check:[0,5,6],check_valid:6,choos:6,chosen:6,chunk:6,chunk_siz:6,ci:0,citi:6,client_properti:6,co:6,co_occurrences_block:6,co_occurrences_dist:[5,6],code:[1,2,5,6],colab:6,collect:[1,6],com:6,come:6,common:6,comparison:6,compat:6,complet:[0,4],comput:[5,6],concept:5,confer:6,config:6,configur:[5,6],conll2022:6,conll:6,consid:6,consist:0,constant:6,contain:5,context:6,context_mask:6,context_retriev:6,contribut:2,conveni:5,convers:6,conversation_dist:6,conversationalgraphextractor:5,convert:6,cooccurencesgraphextractor:5,cooccurrencesgraphextractor:5,core:[0,1,2],coref:6,coref_model:6,corefer:[2,3],corefere:[3,6],corefereebrok:6,corefre:6,corefs_algorithm:6,corenlp:[2,3],corenlp_annotations_bio_tag:6,corenlp_custom_properti:6,correct:6,correctli:[5,6],correspond:6,costli:0,cpu:6,creat:[0,2,5,6],cuda:6,cumul:6,cumulative_graph:6,current:6,custom:6,customsubstitutionpreprocessor:[5,6],cut:[5,6],cut_into_chapt:5,data:6,datacollatorfortokenclassif:6,datacollatorfortokenclassificationwithbatchencod:6,dataset:6,debug:5,declar:1,def:1,default_quote_pair:6,defin:6,delattr:6,delimit:6,depend:[0,1,3],deriv:[4,6],descript:6,detail:[5,6],detect:2,determin:[5,6],determinist:6,devic:6,dict:[1,6],dictionari:6,differ:5,digit:4,dim:6,dimens:6,diminut:6,direct:6,directli:6,directori:[0,5,6],disabl:6,discard:6,discuss:[0,6],displai:5,distanc:6,distinct:6,doc:[0,6],docstr:0,document:[0,4,5,6],doe:[0,5,6],doesn:5,don:[5,6],done:5,downstream:4,draw:6,draw_networkx_label:6,draw_networkx_nod:6,dure:5,dweight:6,dynam:[2,6],dynamic_block:[5,6],dynamic_blocks_token:6,dynamic_graph_to_gephi_graph:6,dynamic_overlap:6,dynamic_window:[5,6],e:[3,6],each:[1,4,5,6],easili:[5,6],edg:[4,6],edge_kwarg:6,eighth:6,either:6,element:6,embed:4,en:6,enabl:0,encod:[4,6],encount:0,encourag:0,end:6,end_idx:6,eng:[1,6],english:5,entir:[0,6],entiti:2,environ:[0,3],equal:6,error:[5,6],escap:6,et:6,even:6,ever:3,evolv:5,exampl:[1,4,5,6],except:[5,6],execut:[5,6],exist:0,expect:[5,6],explain:[0,5],explicit:[0,5],explor:5,export_graph_to_gexf:[5,6],extend:2,extra:[3,6],extract:[2,4],extractor:[5,6],f:5,fals:6,featur:[0,5,6],few:5,fig:6,figur:6,file:[0,6],first:[5,6],fledg:5,flexibl:6,follow:[1,3,5,6],forget:0,form:6,format:[0,5,6],found:6,four:1,fra:5,french:5,from:[1,4,5,6],from_step:6,frozenset:6,fulfil:5,full:[5,6],fulli:5,further:0,futur:6,g:6,gazeet:6,gazett:6,gender:6,gener:6,gephi:[5,6],get:[3,6],get_charact:6,get_nicknam:6,get_possible_nam:6,gexf:[5,6],gilbert:6,github:[0,6],give:0,given:6,global:6,googl:6,graph2vec:4,graph:[2,4],graph_edges_attribut:6,graph_extract:[5,6],graph_extractor_kwarg:5,graph_start_idx:6,graph_typ:6,graph_util:6,graph_with_nam:6,graphrulescharacterunifi:5,guidelin:2,ha:[5,6],hack:6,hash:6,hatch:6,have:[4,5,6],head:6,help:5,here:[1,5,6],hgdataset_from_conll2002:6,hierarch:6,hierarchical_merg:6,high:[0,6],hname_const:6,hopefulli:0,howev:[5,6],html:6,http:6,hugginfac:6,hugginface_model_id:6,huggingfac:6,huggingface_model_id:6,human:4,humannam:6,hutto:6,hypocorismgazett:6,hypothesi:0,i:6,icwsm:6,id:6,ignor:6,ignore_lone_titl:6,ignore_self_ment:6,ignored_step:6,implement:[1,6],implemt:6,includ:6,index:[2,6],index_select:6,indic:[5,6],infer:6,infer_name_gend:6,inform:[0,1,4,5],init:[1,6],initi:6,initialis:6,input:[4,6],insensit:6,inspir:6,instal:[2,6],instead:[5,6],intend:[2,6],interact:[5,6],intern:6,intract:5,introduct:2,intuit:[4,5],invalid:5,io:6,ipynb:6,iso:[1,5,6],issu:[0,5,6],iter:6,its:[5,6],j:6,joshi:6,june:6,keep:6,kei:6,kept:6,know:6,kwarg:[1,6],labatutandbost2019:4,label:6,label_all_token:6,label_kwarg:6,lang:[5,6],languag:[1,4,5,6],last:6,layout:6,layout_nx_graph_reason:6,layout_with_nam:6,least:1,lee:6,length:6,let:5,letter:6,level:[0,6],leverag:5,librari:[5,6],licens:6,lifetim:6,limit:6,line:6,link:6,link_corefs_ment:6,list:[5,6],liter:6,literari:4,live:0,load:6,load_conll2002_bio:6,local:0,locat:6,longest:6,lookup:6,loos:6,lot:6,m:[0,6],made:6,mai:[4,5,6],maintain:0,make:[0,5],manag:3,manual:[2,5,6],map:6,mask:6,master:6,match:[0,6],mathemat:4,matplotlib:[5,6],max:6,max_char_length:6,max_chunk_s:6,max_len:6,max_span_s:6,maximum:6,mean:6,meanwhil:5,media:6,memori:6,mention:[0,5,6],mention_head:6,mentions_per_token:6,merg:[0,6],messag:5,method:[1,5,6],mi:6,might:6,min_appear:[5,6],minimum:6,misc:6,miss:[5,6],model:[5,6],modifi:6,modul:[0,2,5],more:[4,5,6],most_frequ:6,mr:6,ms:6,multilingu:2,multipl:5,must:[0,1,6],my_doc:5,my_doc_in_french:5,my_script:3,my_tokenization_funct:5,n:6,naivecharacterunifi:5,name1:6,name2:6,name:2,name_styl:6,names_are_related_after_title_remov:6,narr:4,natur:4,ndarrai:6,necessari:0,need:[1,5,6],neeed:6,ner:5,ner_ent:6,ner_util:6,nercontextretriev:6,nerdataset:6,nerent:6,network:[4,5,6],networkx:[5,6],neural:6,newlin:6,next:6,nicknam:6,nlp:[4,5,6],nltk:[5,6],nltknamedentityrecogn:5,nltksentimentanalyz:5,nltktoken:5,nnp:6,node:[4,6],node_kwarg:6,non:0,none:6,normal:6,note:[0,5,6],notebook:6,novel:6,number:[5,6],nx:6,object:[4,5,6],occur:[5,6],occurr:6,onc:0,one:[5,6],ones:0,onli:[5,6],onlin:0,open:[0,5,6],option:[0,1,6],optional_ne:[1,6],order:[4,5,6],org:6,organ:6,origin:6,other:6,otherwis:6,our:0,out:[5,6],output:[2,6],overlap:6,overrid:5,overridden:[1,6],overriden:6,overview:[0,2,4],own:6,pad_to_multiple_of:6,page:2,param:6,paramet:[5,6],parsimoni:6,part:[5,6],partial_match:6,particular:[0,6],pass:[0,1,5,6],patch:0,path:6,pattern:6,per:6,perform:[4,5,6],person:6,pip:2,pipelin:[1,2,3,4],pipelineparamet:6,pipelinest:[1,5,6],pipelinestep:[1,5,6],platform:5,plot:5,plot_graph:[5,6],plot_graph_to_fil:[5,6],plot_graphs_to_dir:[5,6],plot_nx_graph_reason:6,plot_util:6,plt:5,po:6,poetri:[3,6],polar:6,posit:6,possibl:[0,4,5,6],practition:4,pre:6,preconfigur:5,predict:6,prefix:6,preprocess:2,preprocessor:6,pretrainedmodel:6,pretrainedtokenizerfast:6,previou:[5,6],previous:6,problem:0,process:4,produc:[1,6],product:[1,6],progress:6,progress_report:6,progressreport:6,project:3,pronoun:6,propag:5,properti:6,provid:5,pull:0,py:3,pyplot:5,pytest:0,python:[0,3,4,6],pytorch:6,qualiti:2,quot:2,quotat:6,quote_detect:6,quote_pair:6,quotedetector:5,r:6,ram:6,rather:[5,6],rational:0,raw:6,re:6,read:[5,6],readm:6,reason:6,recogn:6,recognit:2,recomput:6,record:6,refer:[0,2],regardless:5,regex:[5,6],regroup:5,relabel:6,relat:6,relationship:[4,6],relev:[0,6],reli:[0,5],remov:6,renard:[0,3,4,5,6],renard_test_al:0,renard_test_stanza_optdep:0,report:6,repositori:0,repr:6,repres:[4,5],represent:4,representend:5,request:0,requir:[5,6],rerun_from:6,research:[4,6],resolut:2,resolv:[3,6],resolve_inconsist:6,resort:6,resourc:2,result:[5,6],retriev:6,richardpaulhudson:6,rst:0,rule:6,run:[0,1,3,5,6],runtim:6,runtm:6,s:[0,5,6],same:[1,5,6],satisfi:[5,6],satisifi:0,save:[5,6],script:3,scrollto:6,search:[2,6],search_pattern:6,second:6,see:[4,5,6],seen:5,select:6,self:[1,6],sentenc:[5,6],sentences_polar:6,sentiment:2,sentiment_analysi:6,separ:6,seq:6,sequenc:6,sequenti:[5,6],seri:6,server:6,server_kwarg:6,server_timeout:6,set:[0,1,5,6],setattr:6,sever:[4,5,6],shall:6,shape:6,share:6,shell:3,shift:6,shortest:6,should:[0,1,5,6],show:[0,5],simpl:[5,6],simpli:3,simplic:5,sinc:6,singl:[4,6],size:6,slider:[5,6],smallest:6,so:[0,6],social:6,solver:3,some:6,sometim:6,sourc:0,space:6,spaci:[3,6],spacycorefereecoreferenceresolv:5,span:6,speaker:2,speaker_attribut:6,special:[1,5],specif:[0,1],specifi:[1,5,6],speech:6,spinx:0,split:[1,6],spuriou:6,stable_layout:6,stai:0,stand:6,stanford:[2,3],stanford_corenlp:6,stanfordcorenlppipelin:[5,6],stanfordnlp:6,stanza:[0,3,6],start:6,start_idx:6,state:[1,2],statist:6,step:2,still:6,store:6,stori:4,str:[1,6],string:[1,5,6],strongest:6,style:[0,6],substit:6,substition_rul:6,substitut:[5,6],suppli:6,support:[1,2,6],supported_lang:[1,5,6],suppos:5,sure:0,t:[5,6],tag:6,tag_conversion_map:6,target:2,task:[4,5],tensor:6,termin:6,test:0,text:[1,4,5,6],than:[5,6],thei:[4,6],them:[5,6],therefor:5,thi:[1,5,6],thing:5,those:[5,6],though:6,through:5,tibert:[5,6],time:[0,1,5,6],timeout:6,timestep:6,titl:6,token:[1,2],token_classif:6,tool:[4,5],torch:6,tqdm:6,trade:6,train:6,transform:6,trivial:0,troubleshoot:5,tupl:6,turn:6,two:[4,6],txt:5,type:[0,1,6],typevar:6,under:[3,6],undirect:6,unic:6,unif:2,unifi:6,union:6,uniqu:5,unit:6,unknown:6,unsatisfi:5,up:[0,5,6],us:[0,1,2,4,5,6],usag:[2,5,6],usual:[1,5,6],util:[2,5],vader:[5,6],vala:6,valid:[1,5,6],valu:[1,5,6],valueerror:5,variabl:[0,6],vc0bsbliirjq:6,vectori:4,version:6,virtual:3,visual:5,visualis:[4,5],wa:[5,6],wai:[1,6],want:[0,3,5],warn:6,we:0,weblog:6,weight:6,weirdli:6,welcom:0,well:6,when:[0,4,5,6],where:[4,6],which:[5,6],whole:6,why:[5,6],wide:1,window:[5,6],wise:6,wish:6,within:6,without:6,wont:6,wordpiec:6,work:5,would:5,wp_label:6,write:0,write_gexf:6,written:4,yet:6,york:6,you:[0,3,5,6],your:[0,5],yourself:[0,5]},titles:["Contributing","Extending Renard","Welcome to Renard\u2019s documentation!","Installation","Introduction","The Pipeline","Reference"],titleterms:{"new":1,The:5,an:5,analysi:[5,6],attribut:[5,6],audienc:4,avail:5,bertcoreferenceresolv:6,bertnamedentityrecogn:6,charact:[5,6],code:0,content:2,contribut:0,conversationalgraphextractor:6,cooccurrencesgraphextractor:6,core:6,corefer:[5,6],corenlp:6,creat:1,custom:5,detect:[5,6],document:2,dynam:5,entiti:[5,6],extend:1,extract:[5,6],graph:[5,6],graphrulescharacterunifi:6,guidelin:0,hypocor:6,indic:2,instal:3,intend:4,introduct:4,manual:3,multilingu:5,naivecharacterunifi:6,name:[5,6],ner:6,nltknamedentityrecogn:6,nltksentimentanalyz:6,nltktoken:6,output:5,overview:5,pip:3,pipelin:[5,6],plot:6,preprocess:[5,6],qualiti:0,quot:[5,6],quotedetector:6,recognit:[5,6],refer:6,renard:[1,2],resolut:[5,6],resourc:6,s:2,segment:5,sentiment:[5,6],spacycorefereecoreferenceresolv:6,speaker:[5,6],stanford:6,state:[5,6],step:[1,5,6],support:5,tabl:2,target:4,token:[5,6],unif:6,us:3,usag:4,util:6,welcom:2}}) \ No newline at end of file