Программирование для лингвистов
Курс “Программирование для лингвистов” (2023/2024)
Курс “Программирование для лингвистов” (2024/2025)
Technical Track of Computer Tools for Linguistic Research (2023/2024)
Technical Track of Computer Tools for Linguistic Research (2024/2025)
Курс “Информационный поиск и извлечение данных” (2023/2024)
Курс “Информационный поиск и извлечение данных” (2024/2025)
Программирование для лингвистов
Index
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
|
И
|
П
|
С
|
Т
|
Ч
_
__algo (lab_4_retrieval_w_clustering.main.ClusteringSearchEngine attribute)
__centroid (lab_4_retrieval_w_clustering.main.ClusterDTO attribute)
__clusters (lab_4_retrieval_w_clustering.main.KMeans attribute)
__documents (lab_4_retrieval_w_clustering.main.DocumentVectorDB attribute)
__getitem__() (lab_7_llm.main.TaskDataset method)
,
[1]
__indices (lab_4_retrieval_w_clustering.main.ClusterDTO attribute)
__init__() (config.lab_settings.SFTParams method)
(core_utils.ctlr.article.article.Article method)
,
[1]
(core_utils.ctlr.config_dto.ConfigDTO method)
,
[1]
(core_utils.llm.llm_pipeline.AbstractLLMPipeline method)
,
[1]
(core_utils.llm.llm_pipeline.HFModelLike method)
,
[1]
(core_utils.llm.raw_data_importer.AbstractRawDataImporter method)
,
[1]
(core_utils.llm.raw_data_preprocessor.AbstractRawDataPreprocessor method)
,
[1]
(core_utils.llm.task_evaluator.AbstractTaskEvaluator method)
,
[1]
(lab_3_ann_retriever.main.AdvancedSearchEngine method)
(lab_3_ann_retriever.main.BasicSearchEngine method)
(lab_3_ann_retriever.main.NaiveKDTree method)
(lab_3_ann_retriever.main.Node method)
(lab_3_ann_retriever.main.NodeLike method)
(lab_3_ann_retriever.main.SearchEngine method)
(lab_3_ann_retriever.main.Tokenizer method)
(lab_3_ann_retriever.main.Vectorizer method)
(lab_3_generate_by_ngrams.main.BackOffGenerator method)
(lab_3_generate_by_ngrams.main.BeamSearcher method)
(lab_3_generate_by_ngrams.main.BeamSearchTextGenerator method)
(lab_3_generate_by_ngrams.main.GreedyTextGenerator method)
(lab_3_generate_by_ngrams.main.NGramLanguageModel method)
(lab_3_generate_by_ngrams.main.NGramLanguageModelReader method)
(lab_3_generate_by_ngrams.main.TextProcessor method)
(lab_4_fill_words_by_ngrams.main.Examiner method)
(lab_4_fill_words_by_ngrams.main.GenerationResultDTO method)
(lab_4_fill_words_by_ngrams.main.GeneratorRuleStudent method)
(lab_4_fill_words_by_ngrams.main.GeneratorTypes method)
(lab_4_fill_words_by_ngrams.main.QualityChecker method)
(lab_4_fill_words_by_ngrams.main.TopPGenerator method)
(lab_4_retrieval_w_clustering.main.BM25Vectorizer method)
(lab_4_retrieval_w_clustering.main.ClusterDTO method)
(lab_4_retrieval_w_clustering.main.ClusteringSearchEngine method)
(lab_4_retrieval_w_clustering.main.DocumentVectorDB method)
(lab_4_retrieval_w_clustering.main.KMeans method)
(lab_4_retrieval_w_clustering.main.VectorDBAdvancedSearchEngine method)
(lab_4_retrieval_w_clustering.main.VectorDBEngine method)
(lab_4_retrieval_w_clustering.main.VectorDBSearchEngine method)
(lab_4_retrieval_w_clustering.main.VectorDBTreeSearchEngine method)
(lab_5_scraper.scraper.Config method)
(lab_5_scraper.scraper.Crawler method)
(lab_5_scraper.scraper.CrawlerRecursive method)
(lab_5_scraper.scraper.HTMLParser method)
(lab_7_llm.main.LLMPipeline method)
,
[1]
(lab_7_llm.main.TaskDataset method)
,
[1]
(lab_7_llm.main.TaskEvaluator method)
,
[1]
__len__() (lab_4_retrieval_w_clustering.main.ClusterDTO method)
(lab_7_llm.main.TaskDataset method)
,
[1]
__perplexity (lab_4_fill_words_by_ngrams.main.GenerationResultDTO attribute)
__str__() (lab_4_fill_words_by_ngrams.main.GenerationResultDTO method)
__text (lab_4_fill_words_by_ngrams.main.GenerationResultDTO attribute)
__type (lab_4_fill_words_by_ngrams.main.GenerationResultDTO attribute)
__vectors (lab_4_retrieval_w_clustering.main.DocumentVectorDB attribute)
_abc_impl (core_utils.llm.llm_pipeline.AbstractLLMPipeline attribute)
,
[1]
(core_utils.llm.llm_pipeline.HFModelLike attribute)
,
[1]
(core_utils.llm.raw_data_importer.AbstractRawDataImporter attribute)
,
[1]
(core_utils.llm.raw_data_preprocessor.AbstractRawDataPreprocessor attribute)
,
[1]
(core_utils.llm.task_evaluator.AbstractTaskEvaluator attribute)
,
[1]
(lab_3_ann_retriever.main.Node attribute)
(lab_3_ann_retriever.main.NodeLike attribute)
(lab_7_llm.main.LLMPipeline attribute)
,
[1]
(lab_7_llm.main.RawDataImporter attribute)
,
[1]
(lab_7_llm.main.RawDataPreprocessor attribute)
,
[1]
(lab_7_llm.main.TaskEvaluator attribute)
,
[1]
_avg_doc_len (lab_4_retrieval_w_clustering.main.BM25Vectorizer attribute)
_beam_width (lab_3_generate_by_ngrams.main.BeamSearcher attribute)
(lab_3_generate_by_ngrams.main.BeamSearchTextGenerator attribute)
_calculate_bm25() (lab_4_retrieval_w_clustering.main.BM25Vectorizer method)
_calculate_knn() (lab_3_ann_retriever.main.BasicSearchEngine method)
_calculate_perplexity() (lab_4_fill_words_by_ngrams.main.QualityChecker method)
_calculate_tf_idf() (lab_3_ann_retriever.main.Vectorizer method)
_conllu_info (core_utils.ctlr.article.article.Article attribute)
,
[1]
_content (lab_3_generate_by_ngrams.main.NGramLanguageModelReader attribute)
_corpus (lab_3_ann_retriever.main.Vectorizer attribute)
(lab_4_retrieval_w_clustering.main.BM25Vectorizer attribute)
_data (core_utils.llm.raw_data_preprocessor.AbstractRawDataPreprocessor attribute)
,
[1]
(lab_7_llm.main.RawDataPreprocessor attribute)
_date_to_text() (core_utils.ctlr.article.article.Article method)
,
[1]
_db (lab_4_retrieval_w_clustering.main.ClusteringSearchEngine attribute)
(lab_4_retrieval_w_clustering.main.KMeans attribute)
(lab_4_retrieval_w_clustering.main.VectorDBEngine attribute)
(lab_4_retrieval_w_clustering.main.VectorDBSearchEngine attribute)
_decode() (lab_3_generate_by_ngrams.main.TextProcessor method)
_document_vectors (lab_3_ann_retriever.main.BasicSearchEngine attribute)
_documents (lab_3_ann_retriever.main.BasicSearchEngine attribute)
_dump_documents() (lab_3_ann_retriever.main.BasicSearchEngine method)
_encoded_corpus (lab_3_generate_by_ngrams.main.NGramLanguageModel attribute)
_end_of_word_token (lab_3_generate_by_ngrams.main.TextProcessor attribute)
_engine (lab_4_retrieval_w_clustering.main.VectorDBEngine attribute)
_eow_token (lab_3_generate_by_ngrams.main.NGramLanguageModelReader attribute)
_extract_config_content() (lab_5_scraper.scraper.Config method)
_extract_n_grams() (lab_3_generate_by_ngrams.main.NGramLanguageModel method)
_extract_url() (lab_5_scraper.scraper.Crawler method)
_fill_article_with_meta_information() (lab_5_scraper.scraper.HTMLParser method)
_fill_article_with_text() (lab_5_scraper.scraper.HTMLParser method)
_find_closest() (lab_3_ann_retriever.main.KDTree method)
(lab_3_ann_retriever.main.NaiveKDTree method)
_generator (lab_4_fill_words_by_ngrams.main.GeneratorRuleStudent attribute)
_generator_type (lab_4_fill_words_by_ngrams.main.GeneratorRuleStudent attribute)
_generators (lab_4_fill_words_by_ngrams.main.QualityChecker attribute)
_get_next_token() (lab_3_generate_by_ngrams.main.BackOffGenerator method)
(lab_3_generate_by_ngrams.main.BeamSearchTextGenerator method)
_idf_values (lab_3_ann_retriever.main.Vectorizer attribute)
_index_document() (lab_3_ann_retriever.main.BasicSearchEngine method)
_infer_batch() (lab_7_llm.main.LLMPipeline method)
,
[1]
_is_convergence_reached() (lab_4_retrieval_w_clustering.main.KMeans method)
_is_protocol (core_utils.llm.llm_pipeline.HFModelLike attribute)
,
[1]
(lab_3_ann_retriever.main.Node attribute)
(lab_3_ann_retriever.main.NodeLike attribute)
_json_path (lab_3_generate_by_ngrams.main.NGramLanguageModelReader attribute)
(lab_4_fill_words_by_ngrams.main.Examiner attribute)
_language_model (lab_3_generate_by_ngrams.main.BeamSearchTextGenerator attribute)
(lab_4_fill_words_by_ngrams.main.QualityChecker attribute)
_language_models (lab_3_generate_by_ngrams.main.BackOffGenerator attribute)
_load_documents() (lab_3_ann_retriever.main.BasicSearchEngine method)
_load_from_json() (lab_4_fill_words_by_ngrams.main.Examiner method)
_local_path (core_utils.llm.raw_data_importer.AbstractRawDataImporter attribute)
,
[1]
(lab_7_llm.main.RawDataImporter attribute)
_model (core_utils.llm.llm_pipeline.AbstractLLMPipeline attribute)
,
[1]
(lab_3_generate_by_ngrams.main.BeamSearcher attribute)
(lab_3_generate_by_ngrams.main.GreedyTextGenerator attribute)
(lab_4_fill_words_by_ngrams.main.TopPGenerator attribute)
(lab_7_llm.main.LLMPipeline attribute)
_n_clusters (lab_4_retrieval_w_clustering.main.KMeans attribute)
_n_gram_frequencies (lab_3_generate_by_ngrams.main.NGramLanguageModel attribute)
_n_gram_size (lab_3_generate_by_ngrams.main.NGramLanguageModel attribute)
_p_value (lab_4_fill_words_by_ngrams.main.TopPGenerator attribute)
_postprocess_decoded_text() (lab_3_generate_by_ngrams.main.TextProcessor method)
(lab_4_fill_words_by_ngrams.main.WordProcessor method)
_put() (lab_3_generate_by_ngrams.main.TextProcessor method)
(lab_4_fill_words_by_ngrams.main.WordProcessor method)
_questions_and_answers (lab_4_fill_words_by_ngrams.main.Examiner attribute)
_raw_data (core_utils.llm.raw_data_importer.AbstractRawDataImporter attribute)
,
[1]
(lab_7_llm.main.RawDataImporter attribute)
_remove_stop_words() (lab_3_ann_retriever.main.Tokenizer method)
_root (lab_3_ann_retriever.main.NaiveKDTree attribute)
_stop_words (lab_3_ann_retriever.main.Tokenizer attribute)
_storage (lab_3_generate_by_ngrams.main.TextProcessor attribute)
_text_processor (lab_3_generate_by_ngrams.main.BackOffGenerator attribute)
(lab_3_generate_by_ngrams.main.BeamSearchTextGenerator attribute)
(lab_3_generate_by_ngrams.main.GreedyTextGenerator attribute)
(lab_3_generate_by_ngrams.main.NGramLanguageModelReader attribute)
_token2ind (lab_3_ann_retriever.main.Vectorizer attribute)
_tokenize() (lab_3_generate_by_ngrams.main.TextProcessor method)
(lab_4_fill_words_by_ngrams.main.WordProcessor method)
_tokenizer (lab_3_ann_retriever.main.BasicSearchEngine attribute)
(lab_4_retrieval_w_clustering.main.DocumentVectorDB attribute)
_tree (lab_3_ann_retriever.main.AdvancedSearchEngine attribute)
(lab_3_ann_retriever.main.SearchEngine attribute)
_validate_config_content() (lab_5_scraper.scraper.Config method)
_vectorizer (lab_3_ann_retriever.main.BasicSearchEngine attribute)
(lab_4_retrieval_w_clustering.main.DocumentVectorDB attribute)
_vocabulary (lab_3_ann_retriever.main.Vectorizer attribute)
_word_processor (lab_4_fill_words_by_ngrams.main.QualityChecker attribute)
(lab_4_fill_words_by_ngrams.main.TopPGenerator attribute)
A
AbstractLLMPipeline (class in core_utils.llm.llm_pipeline)
,
[1]
AbstractRawDataImporter (class in core_utils.llm.raw_data_importer)
,
[1]
AbstractRawDataPreprocessor (class in core_utils.llm.raw_data_preprocessor)
,
[1]
AbstractTaskEvaluator (class in core_utils.llm.task_evaluator)
,
[1]
ACCURACY (core_utils.llm.metrics.Metrics attribute)
,
[1]
add_document_index() (lab_4_retrieval_w_clustering.main.ClusterDTO method)
AdvancedSearchEngine (class in lab_3_ann_retriever.main)
analyze() (core_utils.llm.raw_data_preprocessor.AbstractRawDataPreprocessor method)
,
[1]
(lab_7_llm.main.RawDataPreprocessor method)
,
[1]
analyze_model() (core_utils.llm.llm_pipeline.AbstractLLMPipeline method)
,
[1]
(lab_7_llm.main.LLMPipeline method)
,
[1]
Article (class in core_utils.ctlr.article.article)
,
[1]
ArtifactType (class in core_utils.ctlr.article.article)
,
[1]
assess_exam() (lab_4_fill_words_by_ngrams.main.Examiner method)
B
BackOffGenerator (class in lab_3_generate_by_ngrams.main)
BasicSearchEngine (class in lab_3_ann_retriever.main)
batch_size (config.lab_settings.SFTParams attribute)
beam_search (lab_4_fill_words_by_ngrams.main.GeneratorTypes attribute)
beam_searcher (lab_3_generate_by_ngrams.main.BeamSearchTextGenerator attribute)
BeamSearcher (class in lab_3_generate_by_ngrams.main)
BeamSearchTextGenerator (class in lab_3_generate_by_ngrams.main)
BLEU (core_utils.llm.metrics.Metrics attribute)
,
[1]
BM25Vectorizer (class in lab_4_retrieval_w_clustering.main)
build() (lab_3_ann_retriever.main.NaiveKDTree method)
(lab_3_ann_retriever.main.Vectorizer method)
(lab_3_generate_by_ngrams.main.NGramLanguageModel method)
build_vocabulary() (in module lab_2_retrieval_w_bm25.main)
C
calculate_bleu() (in module lab_2_tokenize_by_bpe.main)
calculate_bm25() (in module lab_2_retrieval_w_bm25.main)
calculate_bm25_with_cutoff() (in module lab_2_retrieval_w_bm25.main)
calculate_distance() (in module lab_3_ann_retriever.main)
calculate_frequencies() (in module lab_1_classify_by_unigrams.main)
,
[1]
calculate_idf() (in module lab_2_retrieval_w_bm25.main)
calculate_mse() (in module lab_1_classify_by_unigrams.main)
,
[1]
calculate_precision() (in module lab_2_tokenize_by_bpe.main)
calculate_spearman() (in module lab_2_retrieval_w_bm25.main)
calculate_square_sum() (lab_4_retrieval_w_clustering.main.ClusteringSearchEngine method)
(lab_4_retrieval_w_clustering.main.KMeans method)
calculate_tf() (in module lab_2_retrieval_w_bm25.main)
calculate_tf_idf() (in module lab_2_retrieval_w_bm25.main)
CLEANED (core_utils.ctlr.article.article.ArtifactType attribute)
,
[1]
ClusterDTO (class in lab_4_retrieval_w_clustering.main)
ClusteringSearchEngine (class in lab_4_retrieval_w_clustering.main)
collect_frequencies() (in module lab_2_tokenize_by_bpe.main)
collect_ngrams() (in module lab_2_tokenize_by_bpe.main)
collect_profiles() (in module lab_1_classify_by_unigrams.main)
,
[1]
ColumnNames (class in core_utils.llm.raw_data_preprocessor)
,
[1]
compare_profiles() (in module lab_1_classify_by_unigrams.main)
,
[1]
Config (class in lab_5_scraper.scraper)
config.lab_settings
module
ConfigDTO (class in core_utils.ctlr.config_dto)
,
[1]
CONTEXT (core_utils.llm.raw_data_preprocessor.ColumnNames attribute)
,
[1]
continue_sequence() (lab_3_generate_by_ngrams.main.BeamSearcher method)
core_utils.ctlr.article.article
module
,
[1]
core_utils.ctlr.article.io
module
,
[1]
core_utils.ctlr.config_dto
module
,
[1]
core_utils.llm.llm_pipeline
module
,
[1]
core_utils.llm.metrics
module
,
[1]
core_utils.llm.raw_data_importer
module
,
[1]
core_utils.llm.raw_data_preprocessor
module
,
[1]
core_utils.llm.task_evaluator
module
,
[1]
Corpus (in module lab_4_retrieval_w_clustering.main)
count_tokens_pairs() (in module lab_2_tokenize_by_bpe.main)
Crawler (class in lab_5_scraper.scraper)
CrawlerRecursive (class in lab_5_scraper.scraper)
create_language_profile() (in module lab_1_classify_by_unigrams.main)
,
[1]
D
data (core_utils.llm.raw_data_preprocessor.AbstractRawDataPreprocessor property)
,
[1]
(lab_7_llm.main.TaskDataset property)
,
[1]
date (core_utils.ctlr.article.article.Article attribute)
,
[1]
date_from_meta() (in module core_utils.ctlr.article.article)
,
[1]
decode() (in module lab_2_tokenize_by_bpe.main)
(lab_3_generate_by_ngrams.main.TextProcessor method)
detect_language() (in module lab_1_classify_by_unigrams.main)
,
[1]
detect_language_advanced() (in module lab_1_classify_by_unigrams.main)
,
[1]
device (config.lab_settings.SFTParams attribute)
DocumentVectorDB (class in lab_4_retrieval_w_clustering.main)
E
encode() (in module lab_2_tokenize_by_bpe.main)
(lab_3_generate_by_ngrams.main.TextProcessor method)
encoding (core_utils.ctlr.config_dto.ConfigDTO attribute)
,
[1]
erase_indices() (lab_4_retrieval_w_clustering.main.ClusterDTO method)
Examiner (class in lab_4_fill_words_by_ngrams.main)
F
F1 (core_utils.llm.metrics.Metrics attribute)
,
[1]
fill_from_ngrams() (lab_3_generate_by_ngrams.main.TextProcessor method)
find_articles() (lab_5_scraper.scraper.Crawler method)
(lab_5_scraper.scraper.CrawlerRecursive method)
finetuned_model_path (config.lab_settings.SFTParams attribute)
from_meta() (in module core_utils.ctlr.article.io)
,
[1]
from_raw() (in module core_utils.ctlr.article.io)
,
[1]
G
generate_next_token() (lab_3_generate_by_ngrams.main.NGramLanguageModel method)
GenerationResultDTO (class in lab_4_fill_words_by_ngrams.main)
GeneratorRuleStudent (class in lab_4_fill_words_by_ngrams.main)
GeneratorTypes (class in lab_4_fill_words_by_ngrams.main)
geo_mean() (in module lab_2_tokenize_by_bpe.main)
get_article_id_from_filepath() (in module core_utils.ctlr.article.article)
,
[1]
get_centroid() (lab_4_retrieval_w_clustering.main.ClusterDTO method)
get_cleaned_text() (core_utils.ctlr.article.article.Article method)
,
[1]
get_clusters_info() (lab_4_retrieval_w_clustering.main.KMeans method)
get_conllu_info() (core_utils.ctlr.article.article.Article method)
,
[1]
get_conllu_text() (core_utils.ctlr.article.article.Article method)
,
[1]
get_conversion_generator_type() (lab_4_fill_words_by_ngrams.main.GeneratorTypes method)
get_encoding() (lab_5_scraper.scraper.Config method)
get_end_of_word_token() (lab_3_generate_by_ngrams.main.TextProcessor method)
get_file_path() (core_utils.ctlr.article.article.Article method)
,
[1]
get_generator_type() (lab_4_fill_words_by_ngrams.main.GeneratorRuleStudent method)
get_headers() (lab_5_scraper.scraper.Config method)
get_headless_mode() (lab_5_scraper.scraper.Config method)
get_id() (lab_3_generate_by_ngrams.main.TextProcessor method)
get_indices() (lab_4_retrieval_w_clustering.main.ClusterDTO method)
get_meta() (core_utils.ctlr.article.article.Article method)
,
[1]
get_meta_file_path() (core_utils.ctlr.article.article.Article method)
,
[1]
get_n_gram_size() (lab_3_generate_by_ngrams.main.NGramLanguageModel method)
get_next_token() (lab_3_generate_by_ngrams.main.BeamSearcher method)
get_num_articles() (lab_5_scraper.scraper.Config method)
get_paragraphs() (in module lab_4_retrieval_w_clustering.main)
get_perplexity() (lab_4_fill_words_by_ngrams.main.GenerationResultDTO method)
get_pos_freq() (core_utils.ctlr.article.article.Article method)
,
[1]
get_raw_documents() (lab_4_retrieval_w_clustering.main.DocumentVectorDB method)
get_raw_text() (core_utils.ctlr.article.article.Article method)
,
[1]
get_raw_text_path() (core_utils.ctlr.article.article.Article method)
,
[1]
get_search_urls() (lab_5_scraper.scraper.Crawler method)
get_seed_urls() (lab_5_scraper.scraper.Config method)
get_text() (lab_4_fill_words_by_ngrams.main.GenerationResultDTO method)
get_text_processor() (lab_3_generate_by_ngrams.main.NGramLanguageModelReader method)
get_timeout() (lab_5_scraper.scraper.Config method)
get_token() (lab_3_generate_by_ngrams.main.TextProcessor method)
get_tokenizer() (lab_4_retrieval_w_clustering.main.DocumentVectorDB method)
get_type() (lab_4_fill_words_by_ngrams.main.GenerationResultDTO method)
get_vectorizer() (lab_4_retrieval_w_clustering.main.DocumentVectorDB method)
get_vectors() (lab_4_retrieval_w_clustering.main.DocumentVectorDB method)
get_verify_certificate() (lab_5_scraper.scraper.Config method)
get_vocabulary() (in module lab_2_tokenize_by_bpe.main)
greedy (lab_4_fill_words_by_ngrams.main.GeneratorTypes attribute)
GreedyTextGenerator (class in lab_3_generate_by_ngrams.main)
H
headers (core_utils.ctlr.config_dto.ConfigDTO attribute)
,
[1]
headless_mode (core_utils.ctlr.config_dto.ConfigDTO attribute)
,
[1]
HFModelLike (class in core_utils.llm.llm_pipeline)
,
[1]
HTMLParser (class in lab_5_scraper.scraper)
HYPOTHESIS (core_utils.llm.raw_data_preprocessor.ColumnNames attribute)
,
[1]
I
index_documents() (lab_3_ann_retriever.main.BasicSearchEngine method)
(lab_3_ann_retriever.main.SearchEngine method)
infer() (in module lab_7_llm.service)
,
[1]
(lab_4_retrieval_w_clustering.main.KMeans method)
infer_dataset() (core_utils.llm.llm_pipeline.AbstractLLMPipeline method)
,
[1]
(lab_7_llm.main.LLMPipeline method)
,
[1]
infer_sample() (core_utils.llm.llm_pipeline.AbstractLLMPipeline method)
,
[1]
(lab_7_llm.main.LLMPipeline method)
,
[1]
init_application() (in module lab_7_llm.service)
,
[1]
K
KDTree (class in lab_3_ann_retriever.main)
KMeans (class in lab_4_retrieval_w_clustering.main)
L
lab_1_classify_by_unigrams.main
module
,
[1]
lab_2_retrieval_w_bm25.main
module
lab_2_tokenize_by_bpe.main
module
lab_3_ann_retriever.main
module
lab_3_ann_retriever.start
module
lab_3_generate_by_ngrams.main
module
lab_4_fill_words_by_ngrams.main
module
lab_4_retrieval_w_clustering.main
module
lab_5_scraper.scraper
module
lab_7_llm.main
module
,
[1]
lab_7_llm.service
module
,
[1]
learning_rate (config.lab_settings.SFTParams attribute)
left_node (lab_3_ann_retriever.main.Node attribute)
LLMPipeline (class in lab_7_llm.main)
,
[1]
load() (lab_3_ann_retriever.main.BasicSearchEngine method)
(lab_3_ann_retriever.main.NaiveKDTree method)
(lab_3_ann_retriever.main.Node method)
(lab_3_ann_retriever.main.NodeLike method)
(lab_3_ann_retriever.main.SearchEngine method)
(lab_3_ann_retriever.main.Vectorizer method)
(lab_3_generate_by_ngrams.main.NGramLanguageModelReader method)
load_index() (in module lab_2_retrieval_w_bm25.main)
load_profile() (in module lab_1_classify_by_unigrams.main)
,
[1]
load_vector() (in module lab_3_ann_retriever.main)
load_vocabulary() (in module lab_2_tokenize_by_bpe.main)
M
main() (in module lab_3_ann_retriever.start)
(in module lab_5_scraper.scraper)
make_report() (lab_4_retrieval_w_clustering.main.ClusteringSearchEngine method)
make_request() (in module lab_5_scraper.scraper)
max_fine_tuning_steps (config.lab_settings.SFTParams attribute)
max_length (config.lab_settings.SFTParams attribute)
merge_tokens() (in module lab_2_tokenize_by_bpe.main)
Metrics (class in core_utils.llm.metrics)
,
[1]
module
config.lab_settings
core_utils.ctlr.article.article
,
[1]
core_utils.ctlr.article.io
,
[1]
core_utils.ctlr.config_dto
,
[1]
core_utils.llm.llm_pipeline
,
[1]
core_utils.llm.metrics
,
[1]
core_utils.llm.raw_data_importer
,
[1]
core_utils.llm.raw_data_preprocessor
,
[1]
core_utils.llm.task_evaluator
,
[1]
lab_1_classify_by_unigrams.main
,
[1]
lab_2_retrieval_w_bm25.main
lab_2_tokenize_by_bpe.main
lab_3_ann_retriever.main
lab_3_ann_retriever.start
lab_3_generate_by_ngrams.main
lab_4_fill_words_by_ngrams.main
lab_4_retrieval_w_clustering.main
lab_5_scraper.scraper
lab_7_llm.main
,
[1]
lab_7_llm.service
,
[1]
N
N-граммы
NaiveKDTree (class in lab_3_ann_retriever.main)
NGramLanguageModel (class in lab_3_generate_by_ngrams.main)
NGramLanguageModelReader (class in lab_3_generate_by_ngrams.main)
Node (class in lab_3_ann_retriever.main)
NodeLike (class in lab_3_ann_retriever.main)
O
obtain() (core_utils.llm.raw_data_importer.AbstractRawDataImporter method)
,
[1]
(lab_7_llm.main.RawDataImporter method)
,
[1]
open_files() (in module lab_3_ann_retriever.start)
P
parse() (lab_5_scraper.scraper.HTMLParser method)
payload (lab_3_ann_retriever.main.Node attribute)
PRECISION (core_utils.llm.metrics.Metrics attribute)
,
[1]
PREDICTION (core_utils.llm.raw_data_preprocessor.ColumnNames attribute)
,
[1]
PREMISE (core_utils.llm.raw_data_preprocessor.ColumnNames attribute)
,
[1]
prepare_environment() (in module lab_5_scraper.scraper)
prepare_word() (in module lab_2_tokenize_by_bpe.main)
preprocess_profile() (in module lab_1_classify_by_unigrams.main)
,
[1]
print_report() (in module lab_1_classify_by_unigrams.main)
,
[1]
provide_questions() (lab_4_fill_words_by_ngrams.main.Examiner method)
prune_sequence_candidates() (lab_3_generate_by_ngrams.main.BeamSearcher method)
put_corpus() (lab_4_retrieval_w_clustering.main.DocumentVectorDB method)
Q
QualityChecker (class in lab_4_fill_words_by_ngrams.main)
Query (class in lab_7_llm.service)
,
[1]
query() (lab_3_ann_retriever.main.NaiveKDTree method)
QUESTION (core_utils.llm.raw_data_preprocessor.ColumnNames attribute)
,
[1]
question (lab_7_llm.service.Query attribute)
,
[1]
R
rank_documents() (in module lab_2_retrieval_w_bm25.main)
raw_data (core_utils.llm.raw_data_importer.AbstractRawDataImporter property)
,
[1]
RawDataImporter (class in lab_7_llm.main)
,
[1]
RawDataPreprocessor (class in lab_7_llm.main)
,
[1]
RECALL (core_utils.llm.metrics.Metrics attribute)
,
[1]
remove_stopwords() (in module lab_2_retrieval_w_bm25.main)
retrieve_relevant_documents() (lab_3_ann_retriever.main.BasicSearchEngine method)
(lab_3_ann_retriever.main.SearchEngine method)
(lab_4_retrieval_w_clustering.main.ClusteringSearchEngine method)
(lab_4_retrieval_w_clustering.main.VectorDBEngine method)
(lab_4_retrieval_w_clustering.main.VectorDBSearchEngine method)
retrieve_vectorized() (lab_3_ann_retriever.main.BasicSearchEngine method)
right_node (lab_3_ann_retriever.main.Node attribute)
root() (in module lab_7_llm.service)
,
[1]
ROUGE (core_utils.llm.metrics.Metrics attribute)
,
[1]
run() (core_utils.llm.task_evaluator.AbstractTaskEvaluator method)
,
[1]
(lab_3_generate_by_ngrams.main.BackOffGenerator method)
(lab_3_generate_by_ngrams.main.BeamSearchTextGenerator method)
(lab_3_generate_by_ngrams.main.GreedyTextGenerator method)
(lab_4_fill_words_by_ngrams.main.QualityChecker method)
(lab_4_fill_words_by_ngrams.main.TopPGenerator method)
(lab_7_llm.main.TaskEvaluator method)
,
[1]
run_single_train_iteration() (lab_4_retrieval_w_clustering.main.KMeans method)
S
save() (lab_3_ann_retriever.main.BasicSearchEngine method)
(lab_3_ann_retriever.main.NaiveKDTree method)
(lab_3_ann_retriever.main.Node method)
(lab_3_ann_retriever.main.NodeLike method)
(lab_3_ann_retriever.main.SearchEngine method)
(lab_3_ann_retriever.main.Vectorizer method)
save_index() (in module lab_2_retrieval_w_bm25.main)
save_vector() (in module lab_3_ann_retriever.main)
SearchEngine (class in lab_3_ann_retriever.main)
seed_urls (core_utils.ctlr.config_dto.ConfigDTO attribute)
,
[1]
set_conllu_info() (core_utils.ctlr.article.article.Article method)
,
[1]
set_n_grams() (lab_3_generate_by_ngrams.main.NGramLanguageModel method)
set_new_centroid() (lab_4_retrieval_w_clustering.main.ClusterDTO method)
set_patterns_info() (core_utils.ctlr.article.article.Article method)
,
[1]
set_pos_info() (core_utils.ctlr.article.article.Article method)
,
[1]
set_tokenized_corpus() (lab_4_retrieval_w_clustering.main.BM25Vectorizer method)
SFTParams (class in config.lab_settings)
should_verify_certificate (core_utils.ctlr.config_dto.ConfigDTO attribute)
,
[1]
SOURCE (core_utils.llm.raw_data_preprocessor.ColumnNames attribute)
,
[1]
split_by_sentence() (in module core_utils.ctlr.article.article)
,
[1]
SQUAD (core_utils.llm.metrics.Metrics attribute)
,
[1]
STANZA_CONLLU (core_utils.ctlr.article.article.ArtifactType attribute)
,
[1]
T
take_exam() (lab_4_fill_words_by_ngrams.main.GeneratorRuleStudent method)
TARGET (core_utils.llm.raw_data_preprocessor.ColumnNames attribute)
,
[1]
target_modules (config.lab_settings.SFTParams attribute)
TaskDataset (class in lab_7_llm.main)
,
[1]
TaskEvaluator (class in lab_7_llm.main)
,
[1]
TextProcessor (class in lab_3_generate_by_ngrams.main)
timeout (core_utils.ctlr.config_dto.ConfigDTO attribute)
,
[1]
to() (core_utils.llm.llm_pipeline.HFModelLike method)
,
[1]
to_cleaned() (in module core_utils.ctlr.article.io)
,
[1]
to_meta() (in module core_utils.ctlr.article.io)
,
[1]
to_raw() (in module core_utils.ctlr.article.io)
,
[1]
tokenize() (in module lab_1_classify_by_unigrams.main)
,
[1]
(in module lab_2_retrieval_w_bm25.main)
(lab_3_ann_retriever.main.Tokenizer method)
tokenize_documents() (lab_3_ann_retriever.main.Tokenizer method)
tokenize_word() (in module lab_2_tokenize_by_bpe.main)
TokenizedCorpus (in module lab_4_retrieval_w_clustering.main)
Tokenizer (class in lab_3_ann_retriever.main)
top_p (lab_4_fill_words_by_ngrams.main.GeneratorTypes attribute)
TopPGenerator (class in lab_4_fill_words_by_ngrams.main)
total_articles (core_utils.ctlr.config_dto.ConfigDTO attribute)
,
[1]
train() (in module lab_2_tokenize_by_bpe.main)
(lab_4_retrieval_w_clustering.main.KMeans method)
transform() (core_utils.llm.raw_data_preprocessor.AbstractRawDataPreprocessor method)
,
[1]
(lab_7_llm.main.RawDataPreprocessor method)
,
[1]
U
UDPIPE_CONLLU (core_utils.ctlr.article.article.ArtifactType attribute)
,
[1]
unify_date_format() (lab_5_scraper.scraper.HTMLParser method)
url_pattern (lab_5_scraper.scraper.Crawler attribute)
V
Vector (in module lab_3_ann_retriever.main)
vector (lab_3_ann_retriever.main.Node attribute)
vector2tokens() (lab_3_ann_retriever.main.Vectorizer method)
VectorDBAdvancedSearchEngine (class in lab_4_retrieval_w_clustering.main)
VectorDBEngine (class in lab_4_retrieval_w_clustering.main)
VectorDBSearchEngine (class in lab_4_retrieval_w_clustering.main)
VectorDBTreeSearchEngine (class in lab_4_retrieval_w_clustering.main)
vectorize() (lab_3_ann_retriever.main.Vectorizer method)
(lab_4_retrieval_w_clustering.main.BM25Vectorizer method)
Vectorizer (class in lab_3_ann_retriever.main)
Vocabulary (словарь токенов)
W
WordProcessor (class in lab_4_fill_words_by_ngrams.main)
И
Идентификатор токена
П
Предобработанное слово
С
Слово
Т
Токен
Ч
Частота слова