|
| 1 | +@Article{cc:PeiskerHoffmannMuttarak:2026:Climate-news, |
| 2 | + title = "Climate news mediates extreme weather effects on climate change concern", |
| 3 | + journal = "Climate Risk Management", |
| 4 | + volume = "52", |
| 5 | + pages = "100806", |
| 6 | + year = "2026", |
| 7 | + ISSN = "2212-0963", |
| 8 | + DOI = "https://doi.org/10.1016/j.crm.2026.100806", |
| 9 | + URL = "https://www.sciencedirect.com/science/article/pii/S2212096326000197", |
| 10 | + author = "Jonas Peisker and Roman Hoffmann and Raya Muttarak", |
| 11 | + keywords = "Climate change concern, Extreme weather, News media, Issue attention, Mediation", |
| 12 | + abstract = "As the severe impacts of climate change become increasingly apparent, concerns about climate-related |
| 13 | + issues have grown in recent years. The news media plays an important role in disseminating information |
| 14 | + about climate change and its consequences to the wider public and thus can influence public climate |
| 15 | + concern. Here, we investigate how extreme weather affects issue attention to climate change in the |
| 16 | + European online news media and how extreme weather and news coverage jointly shape changes in climate |
| 17 | + change concern. For the analysis, we combine 12 harmonized Eurobarometer survey waves, measuring public |
| 18 | + concerns about climate issues, with meteorological data and indices of environmental news coverage |
| 19 | + based on publications from 2481 media outlets in 200 regions of 22 European countries. Using fixed |
| 20 | + effects panel models, we estimate effects of temperature anomalies on climate news and climate concern |
| 21 | + and explore the role of the news media in explaining changes in concerns in response to temperature |
| 22 | + anomalies. The results indicate that unusually high temperatures exhibit a robust positive effect on |
| 23 | + media attention, especially when they overlap with other events that draw attention to the climate |
| 24 | + topic, such as major climate change conferences. We furthermore find evidence that the climate news in |
| 25 | + national outlets increases public concern about climate change and show that reporting by such outlets |
| 26 | + is likely to partly explain the effects of temperature anomalies on concerns. We do not find any |
| 27 | + significant effects of climate reporting in regional news outlets on climate concern. Our results |
| 28 | + suggest that the national news media partly mediates the effects of extreme weather on public climate |
| 29 | + change concern. The findings also highlight that focusing events strongly influence issue attention of |
| 30 | + the media, providing windows of opportunity to raise awareness about climate issues, while pointing to |
| 31 | + challenges in sustaining attention to related topics beyond short-lived news cycles.", |
| 32 | + cc-author-affiliation = "International Institute for Applied Systems Analysis, Population and Just Societies, |
| 33 | + Laxenburg, Austria; University of Bologna, Italy", |
| 34 | + cc-class = "climate change, climate risk management, news, websiteranking, domain-ranking, hyperlinkgraph", |
| 35 | + cc-derived-dataset-used = "OpenPageRank", |
| 36 | + cc-snippet = "The weights are provided by the Open Page Rank that is based on Common Crawl project, an open source |
| 37 | + database of web crawl data (DomCop, 2022). The Open Page Rank is scaled to a range from 0 to 10. Fig. 1 |
| 38 | + shows the distribution of page ranks over the included articles.", |
| 39 | +} |
| 40 | + |
| 41 | +@Article{cc:JumeletWeissweilerNivreBisazza:2026:MultiBLiMP-1.0, |
| 42 | + author = "Jumelet, Jaap and Weissweiler, Leonie and Nivre, Joakim and Bisazza, Arianna", |
| 43 | + title = "{MultiBLiMP 1.0}: A Massively Multilingual Benchmark of Linguistic Minimal Pairs", |
| 44 | + journal = "Transactions of the Association for Computational Linguistics", |
| 45 | + volume = "14", |
| 46 | + pages = "193--216", |
| 47 | + year = "2026", |
| 48 | + month = "01", |
| 49 | + abstract = "We introduce MultiBLiMP 1.0, a massively multilingual benchmark of linguistic minimal pairs, covering |
| 50 | + 101 languages and 2 types of subject-verb agreement, containing more than 128,000 minimal pairs. Our |
| 51 | + minimal pairs are created using a fully automated pipeline, leveraging the large-scale linguistic |
| 52 | + resources of Universal Dependencies and UniMorph. MultiBLiMP 1.0 evaluates abilities of LLMs at an |
| 53 | + unprecedented multilingual scale, and highlights the shortcomings of the current state-of-the-art in |
| 54 | + modelling low-resource languages.1", |
| 55 | + ISSN = "2307-387X", |
| 56 | + DOI = "10.1162/TACL.a.600", |
| 57 | + URL = "https://doi.org/10.1162/TACL.a.600", |
| 58 | + eprint = "https://direct.mit.edu/tacl/article-pdf/doi/10.1162/TACL.a.600/2577913/tacl.a.600.pdf", |
| 59 | + cc-author-affiliation = "University of Groningen, The Netherlands; Uppsala University, Sweden", |
| 60 | + cc-class = "linguistic minimal pairs, language frequency, nlp/corpus-construction, nlp/multi-lingual-corpus", |
| 61 | + cc-derived-dataset-used = "GlotCC", |
| 62 | + cc-snippet = "Since the training corpora of most LLMs are not publicly available, we estimate this distribution |
| 63 | + based on the language frequencies of Kargaran et al. (2024), which were computed on a 3.9T token split |
| 64 | + of the Common Crawl corpus. Common Crawl provides a good reflection of the language distribution of the |
| 65 | + web-scraped data that is at the core of many LLM training corpora. [...] We also report results for |
| 66 | + language subgroups split based on the Common Crawl language frequencies: [...]", |
| 67 | +} |
| 68 | + |
| 69 | +@Article{cc:HanleyLuPan:2025:Across-the-firewall, |
| 70 | + author = "Hans W. A. Hanley and Yingdan Lu and Jennifer Pan", |
| 71 | + title = "Across the firewall: Foreign media’s role in shaping Chinese social media narratives on the |
| 72 | + Russo-Ukrainian War", |
| 73 | + journal = "Proceedings of the National Academy of Sciences", |
| 74 | + volume = "122", |
| 75 | + number = "1", |
| 76 | + pages = "e2420607122", |
| 77 | + year = "2025", |
| 78 | + DOI = "10.1073/pnas.2420607122", |
| 79 | + URL = "https://www.pnas.org/doi/abs/10.1073/pnas.2420607122", |
| 80 | + eprint = "https://www.pnas.org/doi/pdf/10.1073/pnas.2420607122", |
| 81 | + abstract = "There is a widespread perception that China’s digital censorship distances its people from the |
| 82 | + global internet, and the Chinese Communist Party, through state-controlled media, is the main |
| 83 | + gatekeeper of information about foreign affairs. Our analysis of narratives about the Russo-Ukrainian |
| 84 | + War circulating on the Chinese social media platform Weibo challenges this view. Comparing narratives |
| 85 | + on Weibo with 8.26 million unique news articles from 2,500 of some of the most trafficked websites in |
| 86 | + China, Russia, Ukraine, and the United States (totaling 10,000 sites), we find that Russian news |
| 87 | + websites published more articles matching narratives found on Weibo than news websites from China, |
| 88 | + Ukraine, or the United States. Similarly, a plurality of Weibo narratives were most associated with |
| 89 | + narratives found on Russian news websites while less than ten percent were most associated with |
| 90 | + narratives from Chinese news sites. Narratives later appearing on Weibo were more likely to first |
| 91 | + appear on Russian rather than Chinese, Ukrainian, or US news websites, and Russian websites were highly |
| 92 | + influential for narratives appearing on Weibo. Altogether, these results show that Chinese state media |
| 93 | + was not the main gatekeeper of information about Russia’s invasion of Ukraine for Weibo users.", |
| 94 | + cc-author-affiliation = "Stanford University, Stanford, CA, USA; Northwestern University, Evanston, IL, USA", |
| 95 | + cc-class = "political science, news, news narratives, hyperlinkgraph, domain-ranks,", |
| 96 | + cc-dataset-used = "hyperlinkgraph, CC-MAIN-2023-06, CC-MAIN-2022-49, CC-MAIN-2022-40 CC-MAIN-2022-33, CC-MAIN-2022-27, |
| 97 | + CC-MAIN-2022-21, CC-MAIN-2022-05", |
| 98 | + cc-snippet = "News websites were identified using Amazon Alexa, Common Crawl, and Cloudflare data (SI Appendix, |
| 99 | + section S2). [...] Namely, we collected the set of most popular websites ranked in Amazon Alexa’s top |
| 100 | + one million websites and Common Crawl’s Domain Rank datasets from April 2022, which utilize the |
| 101 | + top-level domain of each country we were interested in (i.e., .cn, .ua, and .ru).ˢ² [ˢ² We utilized |
| 102 | + both Common Crawl and Amazon Alexa due to the paucity of Chinese, Ukrainian, and Russian domains in the |
| 103 | + US-dominated Amazon Alexa list.] [...] From each news website in our dataset, we collected news |
| 104 | + articles published between January 1, 2022, and June 1, 2022. To gather this data, we took two main |
| 105 | + approaches: (1) gathering available web crawls from Common Crawl [22], and (2) extensively crawling |
| 106 | + each website retrospectively between November 2022 and March 2023. Common Crawl is widely considered |
| 107 | + the most complete public source of web crawl data. For each website, we downloaded Common Crawl indexed |
| 108 | + pagesˢ³ from between January 1, 2022, and January 1, 2023 (CC-MAIN-2023-06, CC-MAIN-2022-49, |
| 109 | + CC-MAIN-2022-40 CC-MAIN-2022-33, CC-MAIN-2022-27, CC-MAIN-2022-21, CC-MAIN-2022-05), identified the |
| 110 | + publication date using the Python htmldate library, and included HTML pages published on their websites |
| 111 | + between January 1, 2022, and June 1, 2022. To further expand the Common Crawl dataset, we performed a |
| 112 | + breadth-first crawl (15 hops from the homepage) of each website to gather the set of HTML pages that |
| 113 | + are missing from Common Crawl.", |
| 114 | +} |
| 115 | + |
| 116 | +@TechReport{cc:Di-PaoloLiberatiRubeo:2026:GreenWashing-climate-information-and-banking-policies, |
| 117 | + year = "2026", |
| 118 | + title = "{(Green)Washing} the Trust: Climate Information and Banking Policies", |
| 119 | + author = "Di Paolo, Simone and Liberati, Danilo and Rubeo, Lorenzo", |
| 120 | + URL = "https://www.bancaditalia.it/pubblicazioni/temi-discussione/2026/2026-1514/en_tema_1514.pdf", |
| 121 | + journal = "Temi di discussione (Working Papers)", |
| 122 | + number = "1514", |
| 123 | + abstract = "Greenwashing, that is, the deceptive self-portrayal of companies as sustainable and environmentally |
| 124 | + friendly, is an increasingly relevant issue in finance. Identifying greenwashers is not a trivial task, |
| 125 | + given the difficulty of assessing firms’ true environmental profiles, especially when relying on |
| 126 | + traditional data sources that generally overlook communication strategies and mass perceptions. Using |
| 127 | + granular credit data from the euro area banking system, we show that during the period 2019-2023, |
| 128 | + greenwashers, initially identified by combining information on firms’ carbon emissions with an |
| 129 | + assessment of the reliability of their reporting, were able to borrow at lower interest rates than |
| 130 | + other companies. We then assess companies’ environmental profiles by extracting textual information |
| 131 | + from newspapers and the internet. We find that sentiment scores based on firms’ own websites are |
| 132 | + generally higher than those derived from newspapers, suggesting that companies use their communication |
| 133 | + channels to place greater emphasis on their sustainable image than is reflected in external sources. By |
| 134 | + integrating this textual metric with our initial proxy, we construct an alternative definition of |
| 135 | + greenwashing. Based on a sample of Italian firms, results obtained from this combined proxy are |
| 136 | + consistent with those derived from structured data alone. Finally, by introducing an unexpected |
| 137 | + contractionary monetary policy shock into our framework, we confirm the operation of the credit risk |
| 138 | + channel of monetary policy and find evidence of a reduction in the pricing benefits previously enjoyed |
| 139 | + by greenwashers.", |
| 140 | + cc-author-affiliation = "Banca d'Italia, Italy", |
| 141 | + cc-class = "climate change, company websites, banking, policies", |
| 142 | + cc-dataset-used = "CC-MAIN-2024-10", |
| 143 | + cc-snippet = "We then developed a second Python script using the BeautifulSoup library to crawl these websites and |
| 144 | + download their complete HTML content, following internal links up to three levels deep within the same |
| 145 | + domain. When this automated approach failed (mostly for technical reasons, e.g. in the case of |
| 146 | + single-page applications), we turned to Common Crawl, an open-access repository that regularly archives |
| 147 | + vast portions of the internet. Common Crawl stores petabytes of raw web data, including HTML pages, |
| 148 | + metadata, and text extracts, collected through periodic crawls of publicly accessible websites. Its |
| 149 | + datasets are freely available and widely used in research for tasks such as text mining, search engine |
| 150 | + development, and large-scale content analysis. However, Common Crawl is less suited for projects |
| 151 | + focused on a restricted number of websites, since accessing a few domains of interest requires |
| 152 | + downloading and processing very large amounts of data, often including irrelevant content. Moreover, |
| 153 | + the temporal granularity of the snapshots and the potential incompleteness of some archived websites |
| 154 | + may limit its reliability for capturing the most up-to-date corporate information. In our case, we |
| 155 | + therefore relied on Common Crawl only as a complementary source, to obtain the HTML content for those |
| 156 | + sites that our script was unable to fetch.¹⁸ [¹⁸We used the snapshot CC-MAIN-2024-10, the 10th |
| 157 | + main crawl of 2024.]", |
| 158 | +} |
| 159 | + |
0 commit comments