2025
Pawar, Siddhesh; Arora, Arnav; Kaffee, Lucie-Aimée; Augenstein, Isabelle
Presumed Cultural Identity: How Names Shape LLM Responses Miscellaneous
2025, (arXiv:2502.11995 [cs]).
@misc{pawar_presumed_2025,
title = {Presumed Cultural Identity: How Names Shape LLM Responses},
author = {Siddhesh Pawar and Arnav Arora and Lucie-Aimée Kaffee and Isabelle Augenstein},
url = {http://arxiv.org/abs/2502.11995},
doi = {10.48550/arXiv.2502.11995},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Names are deeply tied to human identity. They can serve as markers of individuality, cultural heritage, and personal history. However, using names as a core indicator of identity can lead to over-simplification of complex identities. When interacting with LLMs, user names are an important point of information for personalisation. Names can enter chatbot conversations through direct user input (requested by chatbots), as part of task contexts such as CV reviews, or as built-in memory features that store user information for personalisation. We study biases associated with names by measuring cultural presumptions in the responses generated by LLMs when presented with common suggestion-seeking queries, which might involve making assumptions about the user. Our analyses demonstrate strong assumptions about cultural identity associated with names present in LLM generations across multiple cultures. Our work has implications for designing more nuanced personalisation systems that avoid reinforcing stereotypes while maintaining meaningful customisation.},
note = {arXiv:2502.11995 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Choudhary, Tavishi
Political Bias in Large Language Models: A Comparative Analysis of ChatGPT-4, Perplexity, Google Gemini, and Claude Journal Article
In: IEEE Access, vol. 13, pp. 11341–11379, 2025, ISSN: 2169-3536.
@article{choudhary_political_2025,
title = {Political Bias in Large Language Models: A Comparative Analysis of ChatGPT-4, Perplexity, Google Gemini, and Claude},
author = {Tavishi Choudhary},
url = {https://ieeexplore.ieee.org/document/10817610/},
doi = {10.1109/ACCESS.2024.3523764},
issn = {2169-3536},
year = {2025},
date = {2025-01-01},
urldate = {2025-02-27},
journal = {IEEE Access},
volume = {13},
pages = {11341–11379},
abstract = {Artificial Intelligence large language models have rapidly gained widespread adoption, sparking discussions on their societal and political impact, especially for political bias and its far-reaching consequences on society and citizens. This study explores the political bias in large language models by conducting a comparative analysis across four popular AI models—ChatGPT-4, Perplexity, Google Gemini, and Claude. This research systematically evaluates their responses to politically charged prompts and questions from the Pew Research Center’s Political Typology Quiz, Political Compass Quiz, and ISideWith Quiz. The findings revealed that ChatGPT-4 and Claude exhibit a liberal bias, Perplexity is more conservative, while Google Gemini adopts more centrist stances based on their training data sets. The presence of such biases underscores the critical need for transparency in AI development and the incorporation of diverse training datasets, regular audits, and user education to mitigate any of these biases. The most significant question surrounding political bias in AI is its consequences, particularly its influence on public discourse, policy-making, and democratic processes. The results of this study advocate for ethical implications for the development of AI models and the need for transparency to build trust and integrity in AI models. Additionally, future research directions have been outlined to explore and address the complex AI bias issue.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Martín, Antonio San
Optimizing Contextonymic Analysis for Terminological Definition Writing Journal Article
In: Information, vol. 16, no. 4, 2025.
@article{san_martin_optimizing_2025,
title = {Optimizing Contextonymic Analysis for Terminological Definition Writing},
author = {Antonio San Martín},
url = {https://www.mdpi.com/2078-2489/16/4/257},
doi = {10.3390/info16040257},
year = {2025},
date = {2025-01-01},
journal = {Information},
volume = {16},
number = {4},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Auclair-Ouellet, Noémie; Lavoie, Alexandra; Bédard, Pascale; Barbeau-Morrison, Alexandra; Drouin, Patrick; Tremblay, Pascale
Expansion of the SyllabO+ corpus and database: Words, lemmas, and morphology Journal Article
In: Behavior Research Methods, vol. 57, no. 1, pp. 47, 2025.
@article{auclair-ouellet_expansion_2025,
title = {Expansion of the SyllabO+ corpus and database: Words, lemmas, and morphology},
author = {Noémie Auclair-Ouellet and Alexandra Lavoie and Pascale Bédard and Alexandra Barbeau-Morrison and Patrick Drouin and Pascale Tremblay},
url = {https://doi.org/10.3758/s13428-024-02582-2},
doi = {10.3758/s13428-024-02582-2},
year = {2025},
date = {2025-01-01},
journal = {Behavior Research Methods},
volume = {57},
number = {1},
pages = {47},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
Cahyawijaya, Samuel; Chen, Delong; Bang, Yejin; Khalatbari, Leila; Wilie, Bryan; Ji, Ziwei; Ishii, Etsuko; Fung, Pascale
High-Dimension Human Value Representation in Large Language Models Miscellaneous
2024, (arXiv:2404.07900 [cs]).
@misc{cahyawijaya_high-dimension_2024,
title = {High-Dimension Human Value Representation in Large Language Models},
author = {Samuel Cahyawijaya and Delong Chen and Yejin Bang and Leila Khalatbari and Bryan Wilie and Ziwei Ji and Etsuko Ishii and Pascale Fung},
url = {http://arxiv.org/abs/2404.07900},
year = {2024},
date = {2024-10-01},
urldate = {2024-11-07},
publisher = {arXiv},
abstract = {The widespread application of Large Language Models (LLMs) across various tasks and fields has necessitated the alignment of these models with human values and preferences. Given various approaches of human value alignment, there is an urgent need to understand the scope and nature of human values injected into these LLMs before their deployment and adoption. We propose UniVaR, a high-dimensional neural representation of symbolic human value distributions in LLMs, orthogonal to model architecture and training data. This is a continuous and scalable representation, self-supervised from the value-relevant output of 8 LLMs and evaluated on 15 open-source and commercial LLMs. Through UniVaR, we visualize and explore how LLMs prioritize different values in 25 languages and cultures, shedding light on the complex interplay between human values and language modeling.},
note = {arXiv:2404.07900 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Buyl, Maarten; Rogiers, Alexander; Noels, Sander; Dominguez-Catena, Iris; Heiter, Edith; Romero, Raphael; Johary, Iman; Mara, Alexandru-Cristian; Lijffijt, Jefrey; Bie, Tijl De
Large Language Models Reflect the Ideology of their Creators Miscellaneous
2024, (arXiv:2410.18417 [cs]).
@misc{buyl_large_2024,
title = {Large Language Models Reflect the Ideology of their Creators},
author = {Maarten Buyl and Alexander Rogiers and Sander Noels and Iris Dominguez-Catena and Edith Heiter and Raphael Romero and Iman Johary and Alexandru-Cristian Mara and Jefrey Lijffijt and Tijl De Bie},
url = {http://arxiv.org/abs/2410.18417},
year = {2024},
date = {2024-10-01},
urldate = {2024-10-28},
publisher = {arXiv},
abstract = {Large language models (LLMs) are trained on vast amounts of data to generate natural language, enabling them to perform tasks like text summarization and question answering. These models have become popular in artificial intelligence (AI) assistants like ChatGPT and already play an influential role in how humans access information. However, the behavior of LLMs varies depending on their design, training, and use. In this paper, we uncover notable diversity in the ideological stance exhibited across different LLMs and languages in which they are accessed. We do this by prompting a diverse panel of popular LLMs to describe a large number of prominent and controversial personalities from recent world history, both in English and in Chinese. By identifying and analyzing moral assessments reflected in the generated descriptions, we find consistent normative differences between how the same LLM responds in Chinese compared to English. Similarly, we identify normative disagreements between Western and non-Western LLMs about prominent actors in geopolitical conflicts. Furthermore, popularly hypothesized disparities in political goals among Western models are reflected in significant normative differences related to inclusion, social inequality, and political scandals. Our results show that the ideological stance of an LLM often reflects the worldview of its creators. This raises important concerns around technological and regulatory efforts with the stated aim of making LLMs ideologically `unbiased', and it poses risks for political instrumentalization.},
note = {arXiv:2410.18417 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
L'Homme, Marie-Claude
Predicative terms, frames and specialized knowledge Miscellaneous
2024.
@misc{marie-claude_lhomme_predicative_2024,
title = {Predicative terms, frames and specialized knowledge},
author = {Marie-Claude L'Homme},
url = {https://www.uibk.ac.at/congress/csft-2024/index.html.en},
year = {2024},
date = {2024-09-01},
address = {Innsbruck University, Innsbrusk},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
L'Homme, Marie-Claude
Predicative terms, frames and specialized knowledge Miscellaneous
2024.
@misc{lhomme_predicative_2024,
title = {Predicative terms, frames and specialized knowledge},
author = {Marie-Claude L'Homme},
url = {https://www.uibk.ac.at/congress/csft-2024/index.html.en},
year = {2024},
date = {2024-09-01},
address = {Innsbruck University, Innsbrusk},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Tao, Yan; Viberg, Olga; Baker, Ryan S; Kizilcec, René F
Cultural bias and cultural alignment of large language models Journal Article
In: PNAS Nexus, vol. 3, no. 9, pp. pgae346, 2024, ISSN: 2752-6542.
@article{tao_cultural_2024,
title = {Cultural bias and cultural alignment of large language models},
author = {Yan Tao and Olga Viberg and Ryan S Baker and René F Kizilcec},
editor = {Michael Muthukrishna},
url = {https://academic.oup.com/pnasnexus/article/doi/10.1093/pnasnexus/pgae346/7756548},
doi = {10.1093/pnasnexus/pgae346},
issn = {2752-6542},
year = {2024},
date = {2024-09-01},
urldate = {2025-03-06},
journal = {PNAS Nexus},
volume = {3},
number = {9},
pages = {pgae346},
abstract = {Culture fundamentally shapes people’s reasoning, behavior, and communication. As people increasingly use generative artificial intelligence (AI) to expedite and automate personal and professional tasks, cultural values embedded in AI models may bias people’s authentic expression and contribute to the dominance of certain cultures. We conduct a disaggregated evaluation of cultural bias for five widely used large language models (OpenAI’s GPT-4o/4-turbo/4/3.5-turbo/3) by comparing the models’ responses to nationally representative survey data. All models exhibit cultural values resembling English-speaking and Protestant European countries. We test cultural prompting as a control strategy to increase cultural alignment for each country/territory. For later models (GPT-4, 4turbo, 4o), this improves the cultural alignment of the models’ output for 71–81% of countries and territories. We suggest using cultural prompting and ongoing evaluation to reduce cultural bias in the output of generative AI.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Li, Huihan; Jiang, Liwei; Hwang, Jena D.; Kim, Hyunwoo; Santy, Sebastin; Sorensen, Taylor; Lin, Bill Yuchen; Dziri, Nouha; Ren, Xiang; Choi, Yejin
CULTURE-GEN: Revealing Global Cultural Perception in Language Models through Natural Language Prompting Miscellaneous
2024, (arXiv:2404.10199 [cs]).
@misc{li_culture-gen_2024,
title = {CULTURE-GEN: Revealing Global Cultural Perception in Language Models through Natural Language Prompting},
author = {Huihan Li and Liwei Jiang and Jena D. Hwang and Hyunwoo Kim and Sebastin Santy and Taylor Sorensen and Bill Yuchen Lin and Nouha Dziri and Xiang Ren and Yejin Choi},
url = {http://arxiv.org/abs/2404.10199},
year = {2024},
date = {2024-08-01},
urldate = {2024-11-07},
publisher = {arXiv},
abstract = {As the utilization of large language models (LLMs) has proliferated worldwide, it is crucial for them to have adequate knowledge and fair representation for diverse global cultures. In this work, we uncover culture perceptions of three SOTA models on 110 countries and regions on 8 culturerelated topics through culture-conditioned generations, and extract symbols from these generations that are associated to each culture by the LLM. We discover that culture-conditioned generation consist of linguistic “markers” that distinguish marginalized cultures apart from default cultures. We also discover that LLMs have an uneven degree of diversity in the culture symbols, and that cultures from different geographic regions have different presence in LLMs’ culture-agnostic generation. Our findings promote further research in studying the knowledge and fairness of global culture perception in LLMs. Code and Data can be found here 1.},
note = {arXiv:2404.10199 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Gagné, Nancy; Casademont, Anna Joan
Enhancing Linguistic Awareness through Microlearning: a Comparative Study of Multilingual French and English Learners of Catalan Miscellaneous
2024.
@misc{gagne_enhancing_2024,
title = {Enhancing Linguistic Awareness through Microlearning: a Comparative Study of Multilingual French and English Learners of Catalan},
author = {Nancy Gagné and Anna Joan Casademont},
year = {2024},
date = {2024-07-01},
address = {London},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Rozado, David
The political preferences of LLMs Journal Article
In: PLOS ONE, vol. 19, no. 7, pp. e0306621, 2024, ISSN: 1932-6203.
@article{rozado_political_2024,
title = {The political preferences of LLMs},
author = {David Rozado},
editor = {Tianlin Zhang},
url = {https://dx.plos.org/10.1371/journal.pone.0306621},
doi = {10.1371/journal.pone.0306621},
issn = {1932-6203},
year = {2024},
date = {2024-07-01},
urldate = {2024-10-28},
journal = {PLOS ONE},
volume = {19},
number = {7},
pages = {e0306621},
abstract = {I report here a comprehensive analysis about the political preferences embedded in Large Language Models (LLMs). Namely, I administer 11 political orientation tests, designed to identify the political preferences of the test taker, to 24 state-of-the-art conversational LLMs, both closed and open source. When probed with questions/statements with political connotations, most conversational LLMs tend to generate responses that are diagnosed by most political test instruments as manifesting preferences for left-of-center viewpoints. This does not appear to be the case for five additional base (i.e. foundation) models upon which LLMs optimized for conversation with humans are built. However, the weak performance of the base models at coherently answering the tests’ questions makes this subset of results inconclusive. Finally, I demonstrate that LLMs can be steered towards specific locations in the political spectrum through Supervised Fine-Tuning (SFT) with only modest amounts of politically aligned data, suggesting SFT’s potential to embed political orientation in LLMs. With LLMs beginning to partially displace traditional information sources like search engines and Wikipedia, the societal implications of political biases embedded in LLMs are substantial.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chang, Yupeng; Wang, Xu; Wang, Jindong; Wu, Yuan; Yang, Linyi; Zhu, Kaijie; Chen, Hao; Yi, Xiaoyuan; Wang, Cunxiang; Wang, Yidong; Ye, Wei; Zhang, Yue; Chang, Yi; Yu, Philip S.; Yang, Qiang; Xie, Xing
A Survey on Evaluation of Large Language Models Journal Article
In: ACM Transactions on Intelligent Systems and Technology, vol. 15, no. 3, pp. 1–45, 2024, ISSN: 2157-6904, 2157-6912.
@article{chang_survey_2024,
title = {A Survey on Evaluation of Large Language Models},
author = {Yupeng Chang and Xu Wang and Jindong Wang and Yuan Wu and Linyi Yang and Kaijie Zhu and Hao Chen and Xiaoyuan Yi and Cunxiang Wang and Yidong Wang and Wei Ye and Yue Zhang and Yi Chang and Philip S. Yu and Qiang Yang and Xing Xie},
url = {https://dl.acm.org/doi/10.1145/3641289},
doi = {10.1145/3641289},
issn = {2157-6904, 2157-6912},
year = {2024},
date = {2024-06-01},
urldate = {2024-11-11},
journal = {ACM Transactions on Intelligent Systems and Technology},
volume = {15},
number = {3},
pages = {1–45},
abstract = {Large language models (LLMs) are gaining increasing popularity in both academia and industry, owing to their unprecedented performance in various applications. As LLMs continue to play a vital role in both research and daily use, their evaluation becomes increasingly critical, not only at the task level, but also at the society level for better understanding of their potential risks. Over the past years, significant efforts have been made to examine LLMs from various perspectives. This paper presents a comprehensive review of these evaluation methods for LLMs, focusing on three key dimensions:
what to evaluate
,
where to evaluate
, and
how to evaluate
. Firstly, we provide an overview from the perspective of evaluation tasks, encompassing general natural language processing tasks, reasoning, medical usage, ethics, education, natural and social sciences, agent applications, and other areas. Secondly, we answer the ‘where’ and ‘how’ questions by diving into the evaluation methods and benchmarks, which serve as crucial components in assessing the performance of LLMs. Then, we summarize the success and failure cases of LLMs in different tasks. Finally, we shed light on several future challenges that lie ahead in LLMs evaluation. Our aim is to offer invaluable insights to researchers in the realm of LLMs evaluation, thereby aiding the development of more proficient LLMs. Our key point is that evaluation should be treated as an essential discipline to better assist the development of LLMs. We consistently maintain the related open-source materials at:
https://github.com/MLGroupJLU/LLM-eval-survey},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
what to evaluate
,
where to evaluate
, and
how to evaluate
. Firstly, we provide an overview from the perspective of evaluation tasks, encompassing general natural language processing tasks, reasoning, medical usage, ethics, education, natural and social sciences, agent applications, and other areas. Secondly, we answer the ‘where’ and ‘how’ questions by diving into the evaluation methods and benchmarks, which serve as crucial components in assessing the performance of LLMs. Then, we summarize the success and failure cases of LLMs in different tasks. Finally, we shed light on several future challenges that lie ahead in LLMs evaluation. Our aim is to offer invaluable insights to researchers in the realm of LLMs evaluation, thereby aiding the development of more proficient LLMs. Our key point is that evaluation should be treated as an essential discipline to better assist the development of LLMs. We consistently maintain the related open-source materials at:
https://github.com/MLGroupJLU/LLM-eval-survey
Marshman, Elizabeth
Translation programs in the age of genAI: Pivotal change, or plus ça change? Proceedings Article
In: Montreal, 2024.
@inproceedings{marshman_translation_2024,
title = {Translation programs in the age of genAI: Pivotal change, or plus ça change?},
author = {Elizabeth Marshman},
year = {2024},
date = {2024-06-01},
address = {Montreal},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
L'Homme, Marie-Claude
Le DiCoEnviro : découverte et représentation de structures terminologiques dans le domaine de l’environnement Journal Article
In: Repères-Dorif, no. 30, 2024.
@article{lhomme_dicoenviro_2024,
title = {Le DiCoEnviro : découverte et représentation de structures terminologiques dans le domaine de l’environnement},
author = {Marie-Claude L'Homme},
url = {https://www.dorif.it/reperes/marie-claude-lhomme-le-dicoenviro-decouverte-et-representation-de-structures-terminologiques-dans-le-domaine-de-lenvironnement/},
year = {2024},
date = {2024-06-01},
journal = {Repères-Dorif},
number = {30},
abstract = {Abstract
The design of terminology resources in a field such as the environment raises several difficulties and is based on different choices made by their designers. In this article, we address the issue of considering the linguistic aspects of terms (as opposed to the dominant approach in terminology, which focuses on the conceptual level). We describe some theoretical choices and the broad outlines of a method guiding the development of DiCoEnviro, Dictionnaire fondamental de l'environnement (2023). We also suggest ways of uncovering the terminological structure of the field and connecting the linguistic and conceptual levels, based on Frame Semantics (FILLMORE 1982; FILLMORE and BAKER 2010; RUPPENHOFER et al. 2016).
Résumé La conception de ressources terminologiques dans un domaine comme l’environnement soulève un certain nombre de difficultés et repose sur différents choix faits par leurs concepteurs. Dans le présent article, nous abordons la question de la prise en compte des aspects linguistiques des termes (par opposition à l’approche dominante en terminologie qui se focalise sur le plan conceptuel). Nous décrivons quelques choix théoriques et les grandes lignes d’une méthode guidant l’élaboration du DiCoEnviro, Dictionnaire fondamental de l’environnement (2023). Nous proposons également des pistes afin de mettre au jour la structure terminologique du domaine et de connecter les plans linguistique et conceptuel en nous appuyant sur la Sémantique des cadres (FILLMORE 1982 ; FILLMORE et BAKER 2010 ; RUPPENHOFER et al. 2016).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The design of terminology resources in a field such as the environment raises several difficulties and is based on different choices made by their designers. In this article, we address the issue of considering the linguistic aspects of terms (as opposed to the dominant approach in terminology, which focuses on the conceptual level). We describe some theoretical choices and the broad outlines of a method guiding the development of DiCoEnviro, Dictionnaire fondamental de l'environnement (2023). We also suggest ways of uncovering the terminological structure of the field and connecting the linguistic and conceptual levels, based on Frame Semantics (FILLMORE 1982; FILLMORE and BAKER 2010; RUPPENHOFER et al. 2016).
Résumé La conception de ressources terminologiques dans un domaine comme l’environnement soulève un certain nombre de difficultés et repose sur différents choix faits par leurs concepteurs. Dans le présent article, nous abordons la question de la prise en compte des aspects linguistiques des termes (par opposition à l’approche dominante en terminologie qui se focalise sur le plan conceptuel). Nous décrivons quelques choix théoriques et les grandes lignes d’une méthode guidant l’élaboration du DiCoEnviro, Dictionnaire fondamental de l’environnement (2023). Nous proposons également des pistes afin de mettre au jour la structure terminologique du domaine et de connecter les plans linguistique et conceptuel en nous appuyant sur la Sémantique des cadres (FILLMORE 1982 ; FILLMORE et BAKER 2010 ; RUPPENHOFER et al. 2016).
Zhong, Qishuai; Yun, Yike; Sun, Aixin
Cultural Value Differences of LLMs: Prompt, Language, and Model Size Miscellaneous
2024, (arXiv:2407.16891 [cs]).
@misc{zhong_cultural_2024,
title = {Cultural Value Differences of LLMs: Prompt, Language, and Model Size},
author = {Qishuai Zhong and Yike Yun and Aixin Sun},
url = {http://arxiv.org/abs/2407.16891},
year = {2024},
date = {2024-06-01},
urldate = {2024-09-10},
publisher = {arXiv},
abstract = {Our study aims to identify behavior patterns in cultural values exhibited by large language models (LLMs). The studied variants include question ordering, prompting language, and model size. Our experiments reveal that each tested LLM can efficiently behave with different cultural values. More interestingly: (i) LLMs exhibit relatively consistent cultural values when presented with prompts in a single language. (ii) The prompting language e.g., Chinese or English, can influence the expression of cultural values. The same question can elicit divergent cultural values when the same LLM is queried in a different language. (iii) Differences in sizes of the same model (e.g., Llama2-7B vs 13B vs 70B) have a more significant impact on their demonstrated cultural values than model differences (e.g., Llama2 vs Mixtral). Our experiments reveal that query language and model size of LLM are the main factors resulting in cultural value differences.},
note = {arXiv:2407.16891 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Retzlaff, Niklas
Political Biases of ChatGPT in Different Languages Miscellaneous
2024.
@misc{retzlaff_political_2024,
title = {Political Biases of ChatGPT in Different Languages},
author = {Niklas Retzlaff},
url = {https://www.preprints.org/manuscript/202406.1224/v1},
doi = {10.20944/preprints202406.1224.v1},
year = {2024},
date = {2024-06-01},
urldate = {2024-10-28},
abstract = {Given the widespread use of AI language models such as ChatGPT, there is an urgent need to understand their political biases in different languages. This study aims to uncover such biases. To investigate this issue, we applied IDRLabs’ Political Coordinates Test to ChatGPT based on GPT-4 in four languages - English, French, German, and Italian. The results confirm previous studies that identified a left-libertarian bias in ChatGPT. There was remarkable consistency in political orientation across all four languages, although minor variations were found. Differences in ChatGPT response distributions across languages could indicate culturally determined differences in interpretation of political questions. The results highlight the need to diversify and optimize data collection and model training methods to ensure greater fairness and neutrality and minimize the influence of bias. Our study highlights the importance of cultural differences in the processing and interpretation of political information by AI models.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Kharchenko, Julia; Roosta, Tanya; Chadha, Aman; Shah, Chirag
How Well Do LLMs Represent Values Across Cultures? Empirical Analysis of LLM Responses Based on Hofstede Cultural Dimensions Miscellaneous
2024, (arXiv:2406.14805 [cs]).
@misc{kharchenko_how_2024,
title = {How Well Do LLMs Represent Values Across Cultures? Empirical Analysis of LLM Responses Based on Hofstede Cultural Dimensions},
author = {Julia Kharchenko and Tanya Roosta and Aman Chadha and Chirag Shah},
url = {http://arxiv.org/abs/2406.14805},
year = {2024},
date = {2024-06-01},
urldate = {2024-09-10},
publisher = {arXiv},
abstract = {Large Language Models (LLMs) attempt to imitate human behavior by responding to humans in a way that pleases them, including by adhering to their values. However, humans come from diverse cultures with different values. It is critical to understand whether LLMs showcase different values to the user based on the stereotypical values of a user’s known country. We prompt different LLMs with a series of advice requests based on 5 Hofstede Cultural Dimensions – a quantifiable way of representing the values of a country. Throughout each prompt, we incorporate personas representing 36 different countries and, separately, languages predominantly tied to each country to analyze the consistency in the LLMs’ cultural understanding. Through our analysis of the responses, we found that LLMs can differentiate between one side of a value and another, as well as understand that countries have differing values, but will not always uphold the values when giving advice, and fail to understand the need to answer differently based on different cultural values. Rooted in these findings, we present recommendations for training valuealigned and culturally sensitive LLMs. More importantly, the methodology and the framework developed here can help further understand and mitigate culture and language alignment issues with LLMs.},
note = {arXiv:2406.14805 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Gazeau, Avril; Lareau, François
Flexible Lexicalization in Rule-based Text Realization Proceedings Article
In: Calzolari, Nicoletta; Kan, Min-Yen; Hoste, Veronique; Lenci, Alessandro; Sakti, Sakriani; Xue, Nianwen (Ed.): Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pp. 7366–7376, ELRA and ICCL, Torino, Italia, 2024.
@inproceedings{gazeau_flexible_2024,
title = {Flexible Lexicalization in Rule-based Text Realization},
author = {Avril Gazeau and François Lareau},
editor = {Nicoletta Calzolari and Min-Yen Kan and Veronique Hoste and Alessandro Lenci and Sakriani Sakti and Nianwen Xue},
url = {https://aclanthology.org/2024.lrec-main.649},
year = {2024},
date = {2024-05-01},
urldate = {2024-05-31},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
pages = {7366–7376},
publisher = {ELRA and ICCL},
address = {Torino, Italia},
abstract = {GenDR is a text realizer that takes as input a graph-based semantic representation and outputs the corresponding syntactic dependency trees. One of the tasks in this transduction is lexicalization, i.e., choosing the right lexical units to express a given semanteme. To do so, GenDR uses a semantic dictionary that maps semantemes to corresponding lexical units in a given language. This study aims to develop a flexible lexicalization module to automatically build a rich semantic dictionary for French. To achieve this, we tried two methods. The first one consisted in extracting information from the French Lexical Network, a large-scale French lexical resource, and adapting it to GenDR. The second one was to test a contextual neural language model's ability to generate potential additional lexicalizations. The first method significantly broadened the coverage of GenDR, while the additional lexicalizations produced by the language model turned out to be of limited use, which brings us to the conclusion that it is not suited to perform the task we've asked from it.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Li; Lareau, François
Assessing BERT's sensitivity to idiomaticity Proceedings Article
In: Bhatia, Archna; Bouma, Gosse; Dogruoz, A. Seza; Evang, Kilian; Garcia, Marcos; Giouli, Voula; Han, Lifeng; Nivre, Joakim; Rademaker, Alexandre (Ed.): Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024, pp. 14–23, ELRA and ICCL, Torino, Italia, 2024.
@inproceedings{liu_assessing_2024,
title = {Assessing BERT's sensitivity to idiomaticity},
author = {Li Liu and François Lareau},
editor = {Archna Bhatia and Gosse Bouma and A. Seza Dogruoz and Kilian Evang and Marcos Garcia and Voula Giouli and Lifeng Han and Joakim Nivre and Alexandre Rademaker},
url = {https://aclanthology.org/2024.mwe-1.4},
year = {2024},
date = {2024-05-01},
urldate = {2024-05-31},
booktitle = {Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024},
pages = {14–23},
publisher = {ELRA and ICCL},
address = {Torino, Italia},
abstract = {BERT-like language models have been demonstrated to capture the idiomatic meaning of multiword expressions. Linguists have also shown that idioms have varying degrees of idiomaticity. In this paper, we assess CamemBERT's sensitivity to the degree of idiomaticity within idioms, as well as the dependency of this sensitivity on part of speech and idiom length. We used a demasking task on tokens from 3127 idioms and 22551 tokens corresponding to simple lexemes taken from the French Lexical Network (LN-fr), and observed that CamemBERT performs distinctly on tokens embedded within idioms compared to simple ones. When demasking tokens within idioms, the model is not proficient in discerning their level of idiomaticity. Moreover, regardless of idiomaticity, CamemBERT excels at handling function words. The length of idioms also impacts CamemBERT's performance to a certain extent. The last two observations partly explain the difference between the model's performance on idioms versus simple lexemes. We conclude that the model treats idioms differently from simple lexemes, but that it does not capture the difference in compositionality between subclasses of idioms.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Durmus, Esin; Nguyen, Karina; Liao, Thomas I.; Schiefer, Nicholas; Askell, Amanda; Bakhtin, Anton; Chen, Carol; Hatfield-Dodds, Zac; Hernandez, Danny; Joseph, Nicholas; Lovitt, Liane; McCandlish, Sam; Sikder, Orowa; Tamkin, Alex; Thamkul, Janel; Kaplan, Jared; Clark, Jack; Ganguli, Deep
Towards Measuring the Representation of Subjective Global Opinions in Language Models Miscellaneous
2024, (arXiv:2306.16388 [cs]).
@misc{durmus_towards_2024,
title = {Towards Measuring the Representation of Subjective Global Opinions in Language Models},
author = {Esin Durmus and Karina Nguyen and Thomas I. Liao and Nicholas Schiefer and Amanda Askell and Anton Bakhtin and Carol Chen and Zac Hatfield-Dodds and Danny Hernandez and Nicholas Joseph and Liane Lovitt and Sam McCandlish and Orowa Sikder and Alex Tamkin and Janel Thamkul and Jared Kaplan and Jack Clark and Deep Ganguli},
url = {http://arxiv.org/abs/2306.16388},
doi = {10.48550/arXiv.2306.16388},
year = {2024},
date = {2024-04-01},
urldate = {2025-02-22},
publisher = {arXiv},
abstract = {Large language models (LLMs) may not equitably represent diverse global perspectives on societal issues. In this paper, we develop a quantitative framework to evaluate whose opinions model-generated responses are more similar to. We first build a dataset, GlobalOpinionQA, comprised of questions and answers from cross-national surveys designed to capture diverse opinions on global issues across different countries. Next, we define a metric that quantifies the similarity between LLM-generated survey responses and human responses, conditioned on country. With our framework, we run three experiments on an LLM trained to be helpful, honest, and harmless with Constitutional AI. By default, LLM responses tend to be more similar to the opinions of certain populations, such as those from the USA, and some European and South American countries, highlighting the potential for biases. When we prompt the model to consider a particular country’s perspective, responses shift to be more similar to the opinions of the prompted populations, but can reflect harmful cultural stereotypes. When we translate GlobalOpinionQA questions to a target language, the model’s responses do not necessarily become the most similar to the opinions of speakers of those languages. We release our dataset for others to use and build on.2 We also provide an interactive visualization at https://llmglobalvalues.anthropic.com.},
note = {arXiv:2306.16388 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
L'Homme, Marie-Claude
Du verbe à l’unité prédicative en terminologie Miscellaneous
2024.
@misc{lhomme_du_2024,
title = {Du verbe à l’unité prédicative en terminologie},
author = {Marie-Claude L'Homme},
year = {2024},
date = {2024-04-01},
address = {Pise, Italie},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Frassi, Paolo; Pessot, Matteo; L'Homme, Marie-Claude
Les verbes phraséologiques et leur traitement terminographique. Quelques exemples du domaine de la mode Miscellaneous
2024.
@misc{frassi_verbes_2024,
title = {Les verbes phraséologiques et leur traitement terminographique. Quelques exemples du domaine de la mode},
author = {Paolo Frassi and Matteo Pessot and Marie-Claude L'Homme},
year = {2024},
date = {2024-04-01},
address = {Pise, Italie},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Alfetlawi, Anwar Ghanim; Marshman, Elizabeth
Can generative AI help users overcome classic challenges for MT? A pilot study Proceedings Article
In: Doha, Qatar, 2024.
@inproceedings{alfetlawi_can_2024,
title = {Can generative AI help users overcome classic challenges for MT? A pilot study},
author = {Anwar Ghanim Alfetlawi and Elizabeth Marshman},
year = {2024},
date = {2024-02-01},
address = {Doha, Qatar},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Casademont, Anna Joan
L'acquisition des langues additionnelles : langues minoritaires et ressources pédagogiques Miscellaneous
2024.
@misc{joan_casademont_acquisition_2024,
title = {L'acquisition des langues additionnelles : langues minoritaires et ressources pédagogiques},
author = {Anna Joan Casademont},
year = {2024},
date = {2024-02-01},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Choenni, Rochelle; Lauscher, Anne; Shutova, Ekaterina
The Echoes of Multilinguality: Tracing Cultural Value Shifts during LM Fine-tuning Proceedings Article
In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 15042–15058, Association for Computational Linguistics, Bangkok, Thailand, 2024.
@inproceedings{choenni_echoes_2024,
title = {The Echoes of Multilinguality: Tracing Cultural Value Shifts during LM Fine-tuning},
author = {Rochelle Choenni and Anne Lauscher and Ekaterina Shutova},
url = {https://aclanthology.org/2024.acl-long.803/},
doi = {10.18653/v1/2024.acl-long.803},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {15042–15058},
publisher = {Association for Computational Linguistics},
address = {Bangkok, Thailand},
abstract = {Texts written in different languages reflect different culturally-dependent beliefs of their writers. Thus, we expect multilingual LMs (MLMs), that are jointly trained on a concatenation of text in multiple languages, to encode different cultural values for each language. Yet, as the ‘multilinguality’ of these LMs is driven by cross-lingual sharing, we also have reason to belief that cultural values bleed over from one language into another. This limits the use of MLMs in practice, as apart from being proficient in generating text in multiple languages, creating language technology that can serve a community also requires the output of LMs to be sensitive to their biases (Naous et al., 2023). Yet, little is known about how cultural values emerge and evolve in MLMs (Hershcovich et al., 2022a). We are the first to study how languages can exert influence on the cultural values encoded for different test languages, by studying how such values are revised during fine-tuning. Focusing on the finetuning stage allows us to study the interplay between value shifts when exposed to new linguistic experience from different data sources and languages. Lastly, we use a training data attribution method to find patterns in the finetuning examples, and the languages that they come from, that tend to instigate value shifts.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
AlKhamissi, Badr; ElNokrashy, Muhammad; Alkhamissi, Mai; Diab, Mona
Investigating Cultural Alignment of Large Language Models Proceedings Article
In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 12404–12422, Association for Computational Linguistics, Bangkok, Thailand, 2024.
@inproceedings{alkhamissi_investigating_2024,
title = {Investigating Cultural Alignment of Large Language Models},
author = {Badr AlKhamissi and Muhammad ElNokrashy and Mai Alkhamissi and Mona Diab},
url = {https://aclanthology.org/2024.acl-long.671},
doi = {10.18653/v1/2024.acl-long.671},
year = {2024},
date = {2024-01-01},
urldate = {2025-02-27},
booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {12404–12422},
publisher = {Association for Computational Linguistics},
address = {Bangkok, Thailand},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Agarwal, Utkarsh; Tanmay, Kumar; Khandelwal, Aditi; Choudhury, Monojit
Ethical Reasoning and Moral Value Alignment of LLMs Depend on the Language We Prompt Them in Proceedings Article
In: Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pp. 6330–6340, ELRA and ICCL, Torino, Italy, 2024.
@inproceedings{agarwal_ethical_2024,
title = {Ethical Reasoning and Moral Value Alignment of LLMs Depend on the Language We Prompt Them in},
author = {Utkarsh Agarwal and Kumar Tanmay and Aditi Khandelwal and Monojit Choudhury},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
pages = {6330–6340},
publisher = {ELRA and ICCL},
address = {Torino, Italy},
abstract = {Ethical reasoning is a crucial skill for Large Language Models (LLMs). However, moral values are not universal, but rather influenced by language and culture. This paper explores how three prominent LLMs – GPT-4, ChatGPT, and Llama2-70B-Chat – perform ethical reasoning in different languages and if their moral judgement depend on the language in which they are prompted. We extend the study of ethical reasoning of LLMs by Rao et al. (2023) to a multilingual setup following their framework of probing LLMs with ethical dilemmas and policies from three branches of normative ethics: deontology, virtue, and consequentialism. We experiment with six languages: English, Spanish, Russian, Chinese, Hindi, and Swahili. We find that GPT-4 is the most consistent and unbiased ethical reasoner across languages, while ChatGPT and Llama2-70B-Chat show significant moral value bias when we move to languages other than English. Interestingly, the nature of this bias significantly vary across languages for all LLMs, including GPT-4.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Beck, Tilman; Schuff, Hendrik; Lauscher, Anne; Gurevych, Iryna
Deconstructing the Effect of Sociodemographic Prompting Proceedings Article
In: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics, pp. 2589–2615, Association for Computational Linguistics, St. Julian’s, Malta, 2024.
@inproceedings{beck_deconstructing_2024,
title = {Deconstructing the Effect of Sociodemographic Prompting},
author = {Tilman Beck and Hendrik Schuff and Anne Lauscher and Iryna Gurevych},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics},
volume = {Volume 1: Long Papers},
pages = {2589–2615},
publisher = {Association for Computational Linguistics},
address = {St. Julian’s, Malta},
abstract = {Annotators’ sociodemographic backgrounds (i.e., the individual compositions of their gender, age, educational background, etc.) have a strong impact on their decisions when working on subjective NLP tasks, such as toxic language detection. Often, heterogeneous backgrounds result in high disagreements. To model this variation, recent work has explored sociodemographic prompting, a technique, which steers the output of prompt-based models towards answers that humans with specific sociodemographic profiles would give. However, the available NLP literature disagrees on the efficacy of this technique — it remains unclear for which tasks and scenarios it can help, and the role of the individual factors in sociodemographic prompting is still unexplored. We address this research gap by presenting the largest and most comprehensive study of sociodemographic prompting today. We use it to analyze its influence on model sensitivity, performance and robustness across seven datasets and six instruction-tuned model families. We show that sociodemographic information affects model predictions and can be beneficial for improving zero-shot learning in subjective NLP tasks.However, its outcomes largely vary for different model types, sizes, and datasets, and are subject to large variance with regards to prompt formulations. Most importantly, our results show that sociodemographic prompting should be used with care when used for data annotation or studying LLM alignment.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Martín, Antonio San
What Generative Artificial Intelligence Means for Terminological Definitions Proceedings Article
In: Proceedings of the 3rd International Conference on Multilingual Digital Terminology Today (MDTT 2024), CEUR-WS, Granada, 2024, (arXiv:2402.16139 [cs]).
@inproceedings{san_martin_what_2024,
title = {What Generative Artificial Intelligence Means for Terminological Definitions},
author = {Antonio San Martín},
url = {https://ceur-ws.org/Vol-3703/paper1.pdf},
year = {2024},
date = {2024-01-01},
urldate = {2024-03-26},
booktitle = {Proceedings of the 3rd International Conference on Multilingual Digital Terminology Today (MDTT 2024)},
publisher = {CEUR-WS},
address = {Granada},
abstract = {This paper examines the impact of Generative Artificial Intelligence (GenAI) on the creation and consumption of terminological definitions. GenAI tools like ChatGPT present a mix of benefits and drawbacks compared to traditional terminological resources. ChatGPT excels in providing context-specific meanings in an interactive and customized fashion but faces challenges with accuracy. Terminological definitions in recognized resources will likely survive because of their reliability. From the point of view of the terminologist, tools like ChatGPT enable AI-assisted terminography, including post-editing terminography, as an approach blending AI efficiency with human expertise for faster definition creation.},
note = {arXiv:2402.16139 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Adilazuarda, Muhammad Farid; Mukherjee, Sagnik; Lavania, Pradhyumna; Singh, Siddhant Shivdutt; Aji, Alham Fikri; O’Neill, Jacki; Modi, Ashutosh; Choudhury, Monojit
Towards Measuring and Modeling “Culture” in LLMs: A Survey Proceedings Article
In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 15763–15784, Association for Computational Linguistics, Miami, Florida, USA, 2024.
@inproceedings{adilazuarda_towards_2024,
title = {Towards Measuring and Modeling “Culture” in LLMs: A Survey},
author = {Muhammad Farid Adilazuarda and Sagnik Mukherjee and Pradhyumna Lavania and Siddhant Shivdutt Singh and Alham Fikri Aji and Jacki O’Neill and Ashutosh Modi and Monojit Choudhury},
url = {https://aclanthology.org/2024.emnlp-main.882},
doi = {10.18653/v1/2024.emnlp-main.882},
year = {2024},
date = {2024-01-01},
urldate = {2025-02-27},
booktitle = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},
pages = {15763–15784},
publisher = {Association for Computational Linguistics},
address = {Miami, Florida, USA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
L'Homme, Marie-Claude
Managing polysemy in terminological resources Journal Article
In: Terminology, vol. 30, no. 2, 2024, ISSN: 0929-9971.
@article{marie-claude_lhomme_managing_2024,
title = {Managing polysemy in terminological resources},
author = {Marie-Claude L'Homme},
url = {https://benjamins.com/catalog/term.22017.lho},
doi = {https://doi.org/10.1075/term.22017.lho},
issn = {0929-9971},
year = {2024},
date = {2024-01-01},
journal = {Terminology},
volume = {30},
number = {2},
abstract = {Polysemy, even when it is considered within specialized domains, is a recurrent phenomenon and the topic is debated from time to time in terminology literature. Part of this literature still advocates ways to prevent polysemy. Another portion recognizes the prevalence of polysemy, especially in specialized corpora, but considers it from the perspective of other phenomena, such as ambiguity, indeterminacy, categorization or variation. Although the number of perspectives on meaning have increased over the years, the treatment of polysemy in terminological resources is still unsatisfactory. This article first shows that polysemy is an integral part of specialized communication and that there are different kinds of domain-specific polysemy. Then, it reviews selected perspectives that have been taken on polysemy in terminology literature. The treatment of 45 polysemous lexical items in four specialized resources is then analysed. Finally, different methods based on lexical semantics are proposed to account for polysemy in terminological resources.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Mel'čuk, Igor Aleksandrovic
Nepričësannye mysli po povodu “Akademičeskoj grammatiki sovremennogo russkogo jazyka”: na primere čislenno-imennyx slovosočetanij. — Des pensées mal peignées au sujet de la “Grammaire académique du russe moderne”: l’exemple des syntagmes NUMÉRAL + NOM Journal Article
In: Russian Linguistics, vol. 48, no. 7, pp. 1–21, 2024.
@article{melcuk_nepricesannye_2024,
title = {Nepričësannye mysli po povodu “Akademičeskoj grammatiki sovremennogo russkogo jazyka”: na primere čislenno-imennyx slovosočetanij. — Des pensées mal peignées au sujet de la “Grammaire académique du russe moderne”: l’exemple des syntagmes NUMÉRAL + NOM},
author = {Igor Aleksandrovic Mel'čuk},
year = {2024},
date = {2024-01-01},
journal = {Russian Linguistics},
volume = {48},
number = {7},
pages = {1–21},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
L'Homme, Marie-Claude
Frame Semantics and domain-specific resources Book Section
In: Durán-Muñoz,; Jiménez-Navarro, E. L (Ed.): Exploring the Language of Adventure Tourism: A Corpus-assisted Approach, pp. 159–184, Peter Lang, 2024, ISBN: 978-3-631-88677-9.
@incollection{lhomme_frame_2024,
title = {Frame Semantics and domain-specific resources},
author = {Marie-Claude L'Homme},
editor = {Durán-Muñoz and E. L Jiménez-Navarro},
url = {https://www.peterlang.com/document/1354173},
isbn = {978-3-631-88677-9},
year = {2024},
date = {2024-01-01},
booktitle = {Exploring the Language of Adventure Tourism: A Corpus-assisted Approach},
pages = {159–184},
publisher = {Peter Lang},
edition = {Durán-Muñoz and E. L. Jiménez-Navarro},
abstract = {Designers of domain-specific resources have access to different methods and tools to identify terms and collect information from specialised texts as well as to various models to describe terms and represent relations between them. These methods and models (and the theoretical frameworks on which they are based) can raise questions about the linguistic content of resources, four of which are examined in this chapter: (1) kinds of terms to be taken into consideration, (2) the linguistic behaviour of terms, (3) capturing semantically related terms in ways that are meaningful to users of resources, and (4) connecting linguistic behaviour to knowledge. In this chapter, answers provided by Frame Semantics (Fillmore, 1976, 1982; Fillmore & Baker, 2010) and FrameNet (2023; Ruppenhofer et al., 2016) are examined as well as concrete implementations in two resources, namely DicoAdventure (2023), a resource that records terms in the field of adventure tourism, and DiCoEnviro (2022), a resource that contains environment terms.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
L'Homme, Marie-Claude
Polysemy and Representation of Meaning in Terminology Book Section
In: Plaza, S Molina; Maroto, N (Ed.): Aspects of Cognitive Terminology Studies.Theoretical Considerations and the Role of Metaphor in Terminology, vol. 55, pp. 73–94, De Gruyter, 2024, ISBN: 978-3-11-107314-9.
@incollection{lhomme_polysemy_2024,
title = {Polysemy and Representation of Meaning in Terminology},
author = {Marie-Claude L'Homme},
editor = {S Molina Plaza and N Maroto},
url = {https://www.degruyter.com/document/doi/10.1515/9783111073149-004/html},
isbn = {978-3-11-107314-9},
year = {2024},
date = {2024-01-01},
booktitle = {Aspects of Cognitive Terminology Studies.Theoretical Considerations and the Role of Metaphor in Terminology},
volume = {55},
pages = {73–94},
publisher = {De Gruyter},
edition = {Molina Plaza, S. and N. Maroto},
series = {Applications of Cognitive Linguistics [ACL]},
abstract = {Even if polysemy is an important phenomenon in domain-specific corpora and specialized communication, terminology literature seldom offers ways to explain or handle it efficiently. The reliance on domain delimitation to distinguish meanings, and the recording a high number of multi-word terms in terminological resources contribute to give the impression that polysemy is a marginal phenomenon in specialized domains. Recent work recognizes the importance and prevalence of polysemy, but often considers it from the perspective of other phenomena, such as ambiguity, perspectivization or variation. This chapter argues that polysemy is an integral part of specialized communication and shows that it manifests itself in different forms. It then suggests a method for representing specialized meanings (inspired by Frame Semantics), which consists in situating specific meanings in broad conceptual scenarios.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Michot, Sandrine; Tremblay, Ophélie
Un carré, est-ce toujours un carré? : pistes pour travailler le lexique mathématique au primaire. Proceedings Article
In: Actes du colloque du GDM 2023, 2024.
@inproceedings{michot_carre_2024,
title = {Un carré, est-ce toujours un carré? : pistes pour travailler le lexique mathématique au primaire.},
author = {Sandrine Michot and Ophélie Tremblay},
year = {2024},
date = {2024-01-01},
booktitle = {Actes du colloque du GDM 2023},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Anctil, Dominic; Proulx, Caroline; Tremblay, Ophélie; Gagné, Andréanne
Repenser l’enseignement du vocabulaire à la maternelle en milieu défavorisé par une approche collaborative : quelles données et quelle portée pour les résultats ? Book Section
In: Recherche en didactique et en formation des enseignants, Presses universitaires de louvain, Louvain-La-Neuve, Belgique, 2024.
@incollection{anctil_repenser_2024,
title = {Repenser l’enseignement du vocabulaire à la maternelle en milieu défavorisé par une approche collaborative : quelles données et quelle portée pour les résultats ?},
author = {Dominic Anctil and Caroline Proulx and Ophélie Tremblay and Andréanne Gagné},
year = {2024},
date = {2024-01-01},
booktitle = {Recherche en didactique et en formation des enseignants},
publisher = {Presses universitaires de louvain},
address = {Louvain-La-Neuve, Belgique},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Berthiaume, Rachel; Anctil, Dominic
In: Lidil, vol. 62, 2024.
@article{berthiaume_effets_2024,
title = {Les effets d’un dispositif d’enseignement du vocabulaire pluridimensionnel et multimodal sur les connaissances lexicales d’élèves de la 4e année du primaire},
author = {Rachel Berthiaume and Dominic Anctil},
url = {http://journals.openedition.org/lidil/8502},
year = {2024},
date = {2024-01-01},
journal = {Lidil},
volume = {62},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Proulx, Caroline; Anctil, Dominic
Chronique Au fil des mots : Les dictionnaires : des alliés tout au long du processus d’écriture Journal Article
In: Vivre le primaire, vol. 37, no. 1, pp. 9–12, 2024.
@article{proulx_chronique_2024,
title = {Chronique Au fil des mots : Les dictionnaires : des alliés tout au long du processus d’écriture},
author = {Caroline Proulx and Dominic Anctil},
year = {2024},
date = {2024-01-01},
journal = {Vivre le primaire},
volume = {37},
number = {1},
pages = {9–12},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Anctil, Dominic; Proulx, Caroline
Chronique Au fil des mots : Quelques idées pour rendre votre classe plus lexicale Journal Article
In: Vivre le primaire, vol. 37, no. 3, pp. 9–12, 2024.
@article{anctil_chronique_2024,
title = {Chronique Au fil des mots : Quelques idées pour rendre votre classe plus lexicale},
author = {Dominic Anctil and Caroline Proulx},
year = {2024},
date = {2024-01-01},
journal = {Vivre le primaire},
volume = {37},
number = {3},
pages = {9–12},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bouebdelli, Aya; Anctil, Dominic
Consolider le vocabulaire à l’éducation préscolaire en milieu défavorisé : la recherche collaborative comme moteur de changement de pratiques Journal Article
In: Action didactique, vol. 7, no. 1, pp. 64–91, 2024.
@article{bouebdelli_consolider_2024,
title = {Consolider le vocabulaire à l’éducation préscolaire en milieu défavorisé : la recherche collaborative comme moteur de changement de pratiques},
author = {Aya Bouebdelli and Dominic Anctil},
doi = {https://www.asjp.cerist.dz/en/article/252120},
year = {2024},
date = {2024-01-01},
journal = {Action didactique},
volume = {7},
number = {1},
pages = {64–91},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Anctil, Dominic; Bouebdelli, Aya
In: Multimodalité(s), no. 19, pp. 221–251, 2024.
@article{anctil_developper_2024,
title = {Développer le vocabulaire à l’éducation préscolaire à partir de la littérature jeunesse : quelle place pour la multimodalité dans les activités que proposent les enseignantes ?},
author = {Dominic Anctil and Aya Bouebdelli},
doi = {https://doi.org/10.7202/1112436ar},
year = {2024},
date = {2024-01-01},
journal = {Multimodalité(s)},
number = {19},
pages = {221–251},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kinley, Sarah Jane Mc; Anctil, Dominic
In: Action didactique, vol. 7, no. 1, pp. 114–136, 2024.
@article{mc_kinley_former_2024,
title = {Former à l’enseignement du vocabulaire à la maternelle par une approche collaborative : quelles traces de changement des conceptions et des pratiques des enseignantes ?},
author = {Sarah Jane Mc Kinley and Dominic Anctil},
doi = {https://www.asjp.cerist.dz/en/article/252122},
year = {2024},
date = {2024-01-01},
journal = {Action didactique},
volume = {7},
number = {1},
pages = {114–136},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ftita, Amel; Anctil, Dominic; Cavalla, Cristelle
Introduction Journal Article
In: Action didactique, revue internationale en didactique du français, vol. 7, no. 1, pp. 8–20, 2024.
@article{ftita_introduction_2024,
title = {Introduction},
author = {Amel Ftita and Dominic Anctil and Cristelle Cavalla},
doi = {https://www.asjp.cerist.dz/en/article/252117},
year = {2024},
date = {2024-01-01},
journal = {Action didactique, revue internationale en didactique du français},
volume = {7},
number = {1},
pages = {8–20},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Anctil, Dominic; Sauvageau, Claudine
In: Action didactique, vol. 7, no. 1, pp. 22–43, 2024.
@article{anctil_effets_2024,
title = {Effets d’une recherche collaborative sur les pratiques d’enseignement de vocabulaire d’enseignantes de français de 4e et 5e secondaire intervenant auprès d’élèves éprouvant des difficultés langagières},
author = {Dominic Anctil and Claudine Sauvageau},
doi = {https://www.asjp.cerist.dz/en/article/252118},
year = {2024},
date = {2024-01-01},
journal = {Action didactique},
volume = {7},
number = {1},
pages = {22–43},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sauvageau, Claudine; Anctil, Dominic
In: Action didactique, vol. 7, no. 1, pp. 190–208, 2024.
@article{sauvageau_lobstacle_2024,
title = {L’obstacle comme levier de développement professionnel chez des enseignantes du 1er cycle du primaire en contexte d’enseignement direct de vocabulaire par l’oral réflexif},
author = {Claudine Sauvageau and Dominic Anctil},
doi = {https://www.asjp.cerist.dz/en/article/252126},
year = {2024},
date = {2024-01-01},
journal = {Action didactique},
volume = {7},
number = {1},
pages = {190–208},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Escouflaire, Louis; Venant, Antoine; Descampes, Antoine; Fayron, Cédric
La subjectivité dans le journalisme québécois et belge : Transfert de connaissances inter-médias et inter-cultures Proceedings Article
In: JADT 2024: 17th International Conference on Statistical Analysis of Textual Data, Brussels, 2024.
@inproceedings{escouflaire_subjectivite_2024,
title = {La subjectivité dans le journalisme québécois et belge : Transfert de connaissances inter-médias et inter-cultures},
author = {Louis Escouflaire and Antoine Venant and Antoine Descampes and Cédric Fayron},
year = {2024},
date = {2024-01-01},
booktitle = {JADT 2024: 17th International Conference on Statistical Analysis of Textual Data},
address = {Brussels},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ftita, Amel; Anctil, Dominic; Cavalla, Cristelle
Pratiques d’enseignement et de formation en didactique du lexique Journal Article
In: Action didactique, vol. 7, no. 1, pp. 08–20, 2024.
@article{ftita_pratiques_2024,
title = {Pratiques d’enseignement et de formation en didactique du lexique},
author = {Amel Ftita and Dominic Anctil and Cristelle Cavalla},
url = {https://www.asjp.cerist.dz/en/article/252117},
year = {2024},
date = {2024-01-01},
journal = {Action didactique},
volume = {7},
number = {1},
pages = {08–20},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Terryn, Ayla Rigouts; Lhoneux, Miryam
Exploratory Study on the Impact of English Bias of Generative Large Language Models in Dutch and French Proceedings Article
In: Proceedings of The 4th Workshop on Human Evaluation of NLP Systems (HumEval’24) at LREC-COLING, pp. 12–27, European Language Resources Association, Turin, Italy, 2024.
@inproceedings{rigouts_terryn_exploratory_2024,
title = {Exploratory Study on the Impact of English Bias of Generative Large Language Models in Dutch and French},
author = {Ayla Rigouts Terryn and Miryam Lhoneux},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of The 4th Workshop on Human Evaluation of NLP Systems (HumEval’24) at LREC-COLING},
pages = {12–27},
publisher = {European Language Resources Association},
address = {Turin, Italy},
abstract = {The most widely used LLMs like GPT4 and Llama 2 are trained on large amounts of data, mostly in English but are still able to deal with non-English languages. This English bias leads to lower performance in other languages, especially low-resource ones. This paper studies the linguistic quality of LLMs in two non-English high-resource languages: Dutch and French, with a focus on the influence of English. We first construct a comparable corpus of text generated by humans versus LLMs (GPT-4, Zephyr, and GEITje) in the news domain. We proceed to annotate linguistic issues in the LLM-generated texts, obtaining high inter-annotator agreement, and analyse these annotated issues.
We find a substantial influence of English for all models under all conditions: on average, 16textbackslash% of all annotations of linguistic errors or peculiarities had a clear link to English. Fine-tuning a LLM to a target language (GEITje is fine-tuned on Dutch) reduces the number of linguistic issues and probably also the influence of English. We further find that using a more elaborate prompt leads to linguistically better results than a concise prompt. Finally, increasing the temperature for one of the models leads to lower linguistic quality but does not alter the influence of English.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We find a substantial influence of English for all models under all conditions: on average, 16textbackslash% of all annotations of linguistic errors or peculiarities had a clear link to English. Fine-tuning a LLM to a target language (GEITje is fine-tuned on Dutch) reduces the number of linguistic issues and probably also the influence of English. We further find that using a more elaborate prompt leads to linguistically better results than a concise prompt. Finally, increasing the temperature for one of the models leads to lower linguistic quality but does not alter the influence of English.
Lefever, Els; Terryn, Ayla Rigouts
Computational Terminology Book Section
In: New Advances in Translation and Interpreting Technology: Theories, Applications and Training, Springer, 2024.
@incollection{lefever_computational_2024,
title = {Computational Terminology},
author = {Els Lefever and Ayla Rigouts Terryn},
year = {2024},
date = {2024-01-01},
booktitle = {New Advances in Translation and Interpreting Technology: Theories, Applications and Training},
publisher = {Springer},
series = {New Frontiers in Translation Studies},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}