-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathcc2026.bib
More file actions
159 lines (155 loc) · 14 KB
/
Copy pathcc2026.bib
File metadata and controls
159 lines (155 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
@Article{cc:PeiskerHoffmannMuttarak:2026:Climate-news,
title = "Climate news mediates extreme weather effects on climate change concern",
journal = "Climate Risk Management",
volume = "52",
pages = "100806",
year = "2026",
ISSN = "2212-0963",
DOI = "https://doi.org/10.1016/j.crm.2026.100806",
URL = "https://www.sciencedirect.com/science/article/pii/S2212096326000197",
author = "Jonas Peisker and Roman Hoffmann and Raya Muttarak",
keywords = "Climate change concern, Extreme weather, News media, Issue attention, Mediation",
abstract = "As the severe impacts of climate change become increasingly apparent, concerns about climate-related
issues have grown in recent years. The news media plays an important role in disseminating information
about climate change and its consequences to the wider public and thus can influence public climate
concern. Here, we investigate how extreme weather affects issue attention to climate change in the
European online news media and how extreme weather and news coverage jointly shape changes in climate
change concern. For the analysis, we combine 12 harmonized Eurobarometer survey waves, measuring public
concerns about climate issues, with meteorological data and indices of environmental news coverage
based on publications from 2481 media outlets in 200 regions of 22 European countries. Using fixed
effects panel models, we estimate effects of temperature anomalies on climate news and climate concern
and explore the role of the news media in explaining changes in concerns in response to temperature
anomalies. The results indicate that unusually high temperatures exhibit a robust positive effect on
media attention, especially when they overlap with other events that draw attention to the climate
topic, such as major climate change conferences. We furthermore find evidence that the climate news in
national outlets increases public concern about climate change and show that reporting by such outlets
is likely to partly explain the effects of temperature anomalies on concerns. We do not find any
significant effects of climate reporting in regional news outlets on climate concern. Our results
suggest that the national news media partly mediates the effects of extreme weather on public climate
change concern. The findings also highlight that focusing events strongly influence issue attention of
the media, providing windows of opportunity to raise awareness about climate issues, while pointing to
challenges in sustaining attention to related topics beyond short-lived news cycles.",
cc-author-affiliation = "International Institute for Applied Systems Analysis, Population and Just Societies,
Laxenburg, Austria; University of Bologna, Italy",
cc-class = "climate change, climate risk management, news, websiteranking, domain-ranking, hyperlinkgraph",
cc-derived-dataset-used = "OpenPageRank",
cc-snippet = "The weights are provided by the Open Page Rank that is based on Common Crawl project, an open source
database of web crawl data (DomCop, 2022). The Open Page Rank is scaled to a range from 0 to 10. Fig. 1
shows the distribution of page ranks over the included articles.",
}
@Article{cc:JumeletWeissweilerNivreBisazza:2026:MultiBLiMP-1.0,
author = "Jumelet, Jaap and Weissweiler, Leonie and Nivre, Joakim and Bisazza, Arianna",
title = "{MultiBLiMP 1.0}: A Massively Multilingual Benchmark of Linguistic Minimal Pairs",
journal = "Transactions of the Association for Computational Linguistics",
volume = "14",
pages = "193--216",
year = "2026",
month = "01",
abstract = "We introduce MultiBLiMP 1.0, a massively multilingual benchmark of linguistic minimal pairs, covering
101 languages and 2 types of subject-verb agreement, containing more than 128,000 minimal pairs. Our
minimal pairs are created using a fully automated pipeline, leveraging the large-scale linguistic
resources of Universal Dependencies and UniMorph. MultiBLiMP 1.0 evaluates abilities of LLMs at an
unprecedented multilingual scale, and highlights the shortcomings of the current state-of-the-art in
modelling low-resource languages.1",
ISSN = "2307-387X",
DOI = "10.1162/TACL.a.600",
URL = "https://doi.org/10.1162/TACL.a.600",
eprint = "https://direct.mit.edu/tacl/article-pdf/doi/10.1162/TACL.a.600/2577913/tacl.a.600.pdf",
cc-author-affiliation = "University of Groningen, The Netherlands; Uppsala University, Sweden",
cc-class = "linguistic minimal pairs, language frequency, nlp/corpus-construction, nlp/multi-lingual-corpus",
cc-derived-dataset-used = "GlotCC",
cc-snippet = "Since the training corpora of most LLMs are not publicly available, we estimate this distribution
based on the language frequencies of Kargaran et al. (2024), which were computed on a 3.9T token split
of the Common Crawl corpus. Common Crawl provides a good reflection of the language distribution of the
web-scraped data that is at the core of many LLM training corpora. [...] We also report results for
language subgroups split based on the Common Crawl language frequencies: [...]",
}
@Article{cc:HanleyLuPan:2025:Across-the-firewall,
author = "Hans W. A. Hanley and Yingdan Lu and Jennifer Pan",
title = "Across the firewall: Foreign media’s role in shaping Chinese social media narratives on the
Russo-Ukrainian War",
journal = "Proceedings of the National Academy of Sciences",
volume = "122",
number = "1",
pages = "e2420607122",
year = "2025",
DOI = "10.1073/pnas.2420607122",
URL = "https://www.pnas.org/doi/abs/10.1073/pnas.2420607122",
eprint = "https://www.pnas.org/doi/pdf/10.1073/pnas.2420607122",
abstract = "There is a widespread perception that China’s digital censorship distances its people from the
global internet, and the Chinese Communist Party, through state-controlled media, is the main
gatekeeper of information about foreign affairs. Our analysis of narratives about the Russo-Ukrainian
War circulating on the Chinese social media platform Weibo challenges this view. Comparing narratives
on Weibo with 8.26 million unique news articles from 2,500 of some of the most trafficked websites in
China, Russia, Ukraine, and the United States (totaling 10,000 sites), we find that Russian news
websites published more articles matching narratives found on Weibo than news websites from China,
Ukraine, or the United States. Similarly, a plurality of Weibo narratives were most associated with
narratives found on Russian news websites while less than ten percent were most associated with
narratives from Chinese news sites. Narratives later appearing on Weibo were more likely to first
appear on Russian rather than Chinese, Ukrainian, or US news websites, and Russian websites were highly
influential for narratives appearing on Weibo. Altogether, these results show that Chinese state media
was not the main gatekeeper of information about Russia’s invasion of Ukraine for Weibo users.",
cc-author-affiliation = "Stanford University, Stanford, CA, USA; Northwestern University, Evanston, IL, USA",
cc-class = "political science, news, news narratives, hyperlinkgraph, domain-ranks,",
cc-dataset-used = "hyperlinkgraph, CC-MAIN-2023-06, CC-MAIN-2022-49, CC-MAIN-2022-40 CC-MAIN-2022-33, CC-MAIN-2022-27,
CC-MAIN-2022-21, CC-MAIN-2022-05",
cc-snippet = "News websites were identified using Amazon Alexa, Common Crawl, and Cloudflare data (SI Appendix,
section S2). [...] Namely, we collected the set of most popular websites ranked in Amazon Alexa’s top
one million websites and Common Crawl’s Domain Rank datasets from April 2022, which utilize the
top-level domain of each country we were interested in (i.e., .cn, .ua, and .ru).ˢ² [ˢ² We utilized
both Common Crawl and Amazon Alexa due to the paucity of Chinese, Ukrainian, and Russian domains in the
US-dominated Amazon Alexa list.] [...] From each news website in our dataset, we collected news
articles published between January 1, 2022, and June 1, 2022. To gather this data, we took two main
approaches: (1) gathering available web crawls from Common Crawl [22], and (2) extensively crawling
each website retrospectively between November 2022 and March 2023. Common Crawl is widely considered
the most complete public source of web crawl data. For each website, we downloaded Common Crawl indexed
pagesˢ³ from between January 1, 2022, and January 1, 2023 (CC-MAIN-2023-06, CC-MAIN-2022-49,
CC-MAIN-2022-40 CC-MAIN-2022-33, CC-MAIN-2022-27, CC-MAIN-2022-21, CC-MAIN-2022-05), identified the
publication date using the Python htmldate library, and included HTML pages published on their websites
between January 1, 2022, and June 1, 2022. To further expand the Common Crawl dataset, we performed a
breadth-first crawl (15 hops from the homepage) of each website to gather the set of HTML pages that
are missing from Common Crawl.",
}
@TechReport{cc:Di-PaoloLiberatiRubeo:2026:GreenWashing-climate-information-and-banking-policies,
year = "2026",
title = "{(Green)Washing} the Trust: Climate Information and Banking Policies",
author = "Di Paolo, Simone and Liberati, Danilo and Rubeo, Lorenzo",
URL = "https://www.bancaditalia.it/pubblicazioni/temi-discussione/2026/2026-1514/en_tema_1514.pdf",
journal = "Temi di discussione (Working Papers)",
number = "1514",
abstract = "Greenwashing, that is, the deceptive self-portrayal of companies as sustainable and environmentally
friendly, is an increasingly relevant issue in finance. Identifying greenwashers is not a trivial task,
given the difficulty of assessing firms’ true environmental profiles, especially when relying on
traditional data sources that generally overlook communication strategies and mass perceptions. Using
granular credit data from the euro area banking system, we show that during the period 2019-2023,
greenwashers, initially identified by combining information on firms’ carbon emissions with an
assessment of the reliability of their reporting, were able to borrow at lower interest rates than
other companies. We then assess companies’ environmental profiles by extracting textual information
from newspapers and the internet. We find that sentiment scores based on firms’ own websites are
generally higher than those derived from newspapers, suggesting that companies use their communication
channels to place greater emphasis on their sustainable image than is reflected in external sources. By
integrating this textual metric with our initial proxy, we construct an alternative definition of
greenwashing. Based on a sample of Italian firms, results obtained from this combined proxy are
consistent with those derived from structured data alone. Finally, by introducing an unexpected
contractionary monetary policy shock into our framework, we confirm the operation of the credit risk
channel of monetary policy and find evidence of a reduction in the pricing benefits previously enjoyed
by greenwashers.",
cc-author-affiliation = "Banca d'Italia, Italy",
cc-class = "climate change, company websites, banking, policies",
cc-dataset-used = "CC-MAIN-2024-10",
cc-snippet = "We then developed a second Python script using the BeautifulSoup library to crawl these websites and
download their complete HTML content, following internal links up to three levels deep within the same
domain. When this automated approach failed (mostly for technical reasons, e.g. in the case of
single-page applications), we turned to Common Crawl, an open-access repository that regularly archives
vast portions of the internet. Common Crawl stores petabytes of raw web data, including HTML pages,
metadata, and text extracts, collected through periodic crawls of publicly accessible websites. Its
datasets are freely available and widely used in research for tasks such as text mining, search engine
development, and large-scale content analysis. However, Common Crawl is less suited for projects
focused on a restricted number of websites, since accessing a few domains of interest requires
downloading and processing very large amounts of data, often including irrelevant content. Moreover,
the temporal granularity of the snapshots and the potential incompleteness of some archived websites
may limit its reliability for capturing the most up-to-date corporate information. In our case, we
therefore relied on Common Crawl only as a complementary source, to obtain the HTML content for those
sites that our script was unable to fetch.¹⁸ [¹⁸We used the snapshot CC-MAIN-2024-10, the 10th
main crawl of 2024.]",
}