# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. name=apache-nutch version=1.21-SNAPSHOT final.name=${name}-${version} year=2024 basedir = ./ src.dir = ./src/java lib.dir = ./lib conf.dir = ./conf plugins.dir = ./src/plugin build.dir = ./build build.classes = ${build.dir}/classes build.plugins = ${build.dir}/plugins build.javadoc = ${build.dir}/docs/api build.encoding = UTF-8 build.ivy.dir=${build.dir}/ivy build.lib.dir=${build.dir}/lib test.src.dir = ./src/test test.build.dir = ${build.dir}/test test.build.lib.dir = ${test.build.dir}/lib test.build.data = ${test.build.dir}/data test.build.classes = ${test.build.dir}/classes test.build.javadoc = ${test.build.dir}/docs/api test.junit.output.format = plain # Proxy Host and Port to use for building JavaDoc javadoc.proxy.host=-J-DproxyHost= javadoc.proxy.port=-J-DproxyPort= javadoc.link.java=https://docs.oracle.com/en/java/javase/11/docs/api/ javadoc.link.hadoop=https://hadoop.apache.org/docs/r3.3.6/api/ javadoc.packages=org.apache.nutch.* dist.dir=./dist src.dist.version.dir=${dist.dir}/${final.name}-src bin.dist.version.dir=${dist.dir}/${final.name}-bin javac.debug=on javac.optimize=on javac.deprecation=on javac.version=11 runtime.dir=./runtime runtime.deploy=${runtime.dir}/deploy runtime.local=${runtime.dir}/local ivy.version=2.5.2 ivy.dir=${basedir}/ivy ivy.file=${ivy.dir}/ivy.xml ivy.jar=${ivy.dir}/ivy-${ivy.version}.jar ivy.repo.url=https://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar ivy.local.default.root=${ivy.default.ivy.user.dir}/local ivy.local.default.ivy.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext] ivy.local.default.artifact.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext] ivy.shared.default.root=${ivy.default.ivy.user.dir}/shared ivy.shared.default.ivy.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext] ivy.shared.default.artifact.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext] # work-around to fix failing dependency download of # javax.ws.rs-api.jar # required by Tika (1.19 and higher) # cf. (also affects ant/ivy) # https://github.com/eclipse-ee4j/jaxrs-api/issues/572 # https://github.com/gradle/gradle/issues/3065 packaging.type=jar # # Plugins API # plugins.api=\ org.apache.nutch.protocol.http.api*:\ org.apache.nutch.urlfilter.api* # # Protocol Plugins # plugins.protocol=\ org.apache.nutch.protocol.file*:\ org.apache.nutch.protocol.ftp*:\ org.apache.nutch.protocol.http*:\ org.apache.nutch.protocol.httpclient*:\ org.apache.nutch.protocol.interactiveselenium*:\ org.apache.nutch.protocol.okhttp*:\ org.apache.nutch.protocol.selenium*:\ org.apache.nutch.protocol.htmlunit*:\ # # URL Filter Plugins # plugins.urlfilter=\ org.apache.nutch.urlfilter.automaton*:\ org.apache.nutch.urlfilter.domain*:\ org.apache.nutch.urlfilter.domaindenylist*:\ org.apache.nutch.urlfilter.fast*:\ org.apache.nutch.urlfilter.ignoreexempt*:\ org.apache.nutch.urlfilter.prefix*:\ org.apache.nutch.urlfilter.regex*:\ org.apache.nutch.urlfilter.suffix*:\ org.apache.nutch.urlfilter.validator* # # URL Normalizer Plugins # plugins.urlnormalizer=\ org.apache.nutch.net.urlnormalizer.ajax*:\ org.apache.nutch.net.urlnormalizer.basic*:\ org.apache.nutch.net.urlnormalizer.host*:\ org.apache.nutch.net.urlnormalizer.pass*:\ org.apache.nutch.net.urlnormalizer.protocol*:\ org.apache.nutch.net.urlnormalizer.querystring*:\ org.apache.nutch.net.urlnormalizer.regex*:\ org.apache.nutch.net.urlnormalizer.slash* # # Scoring Plugins # plugins.scoring=\ org.apache.nutch.scoring.adaptive*:\ org.apache.nutch.scoring.depth*:\ org.apache.nutch.scoring.link*:\ org.apache.nutch.scoring.opic*:\ org.apache.nutch.scoring.orphan*:\ org.apache.nutch.scoring.similarity*:\ org.apache.nutch.scoring.urlmeta*\ org.apache.nutch.scoring.metadata* # # Parse Plugins # plugins.parse=\ org.apache.nutch.parse.ext*:\ org.apache.nutch.parse.feed*:\ org.apache.nutch.parse.html*:\ org.apache.nutch.parse.js:\ org.apache.nutch.parse.replace*:\ org.apache.nutch.parse.tika:\ org.apache.nutch.parse.zip # # Parse Filter Plugins # plugins.parsefilter=\ org.apache.nutch.parsefilter.debug*:\ org.apache.nutch.parse.headings*:\ org.apache.nutch.parsefilter.naivebayes*:\ org.apache.nutch.parsefilter.regex*:\ org.apache.nutch.parse.metatags* # # Publisher Plugins # plugins.publisher=\ org.apache.nutch.publisher.rabbitmq* # # Exchange Plugins # plugins.exchange=\ org.apache.nutch.exchange.jexl* # # Indexing Filter Plugins # plugins.index=\ org.apache.nutch.indexer.anchor*:\ org.apache.nutch.indexer.basic*:\ org.apache.nutch.indexer.feed*:\ org.apache.nutch.indexer.geoip*:\ org.apache.nutch.indexer.jexl*:\ org.apache.nutch.indexer.filter*:\ org.apache.nutch.indexer.links*:\ org.apache.nutch.indexer.metadata*:\ org.apache.nutch.indexer.more*:\ org.apache.nutch.indexer.replace*:\ org.apache.nutch.indexer.staticfield*:\ org.apache.nutch.indexer.subcollection*:\ org.apache.nutch.indexer.tld*:\ org.apache.nutch.indexer.urlmeta* # # Indexing Backend Plugins # plugins.indexer=\ org.apache.nutch.indexwriter.cloudsearch*:\ org.apache.nutch.indexwriter.csv*:\ org.apache.nutch.indexwriter.dummy*:\ org.apache.nutch.indexwriter.elastic*:\ org.apache.nutch.indexwriter.rabbit*:\ org.apache.nutch.indexwriter.kafka*:\ org.apache.nutch.indexwriter.solr* # # Misc. Plugins # # (gathers plugins that cannot be dispatched # in any category, mainly because they contain # many extension points) # plugins.misc=\ org.apache.nutch.collection*:\ org.apache.nutch.analysis.lang*:\ org.creativecommons.nutch*:\ org.apache.nutch.microformats.reltag*: