Config

Instance Constructors

new Config(configPath: String)

Type Members

class LineReader extends AnyRef

Attributes
private[java.util]
Definition Classes
Properties

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
lazy val abstractParameters: AbstractParameters
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clear(): Unit

Definition Classes
Hashtable → Map
def clone(): AnyRef

Definition Classes
Hashtable → AnyRef
def compute(arg0: AnyRef, arg1: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): AnyRef

Definition Classes
Hashtable → Map
def computeIfAbsent(arg0: AnyRef, arg1: Function[_ >: AnyRef, _ <: AnyRef]): AnyRef

Definition Classes
Hashtable → Map
def computeIfPresent(arg0: AnyRef, arg1: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): AnyRef

Definition Classes
Hashtable → Map
val configPath: String
def contains(arg0: Any): Boolean

Definition Classes
Hashtable
def containsKey(arg0: Any): Boolean

Definition Classes
Hashtable → Map
def containsValue(arg0: Any): Boolean

Definition Classes
Hashtable → Map
lazy val copyrightCheck: Boolean
lazy val datasetnameExtension: Option[String]

instead of a defined output dataset name, one can specify a name extension turncated at the end of the input dataset name (e.g.
instead of a defined output dataset name, one can specify a name extension turncated at the end of the input dataset name (e.g. '-transitive' -> instance-types-transitive)
lazy val dbPediaVersion: String

The version string of the DBpedia version being extracted
lazy val disambiguations: String
lazy val dumpDir: File

base-dir gives either an absolute path or a relative path to where all data is stored, normally wikidumps are downloaded here and extracted data is saved next to it, created folder structure is {{lang}}wiki/$date
base-dir gives either an absolute path or a relative path to where all data is stored, normally wikidumps are downloaded here and extracted data is saved next to it, created folder structure is {{lang}}wiki/$date
DEV NOTE: 1. this must stay lazy as it might not be used or creatable in the SPARK extraction 2. Download.scala in core does the creation
DEFAULT ./wikidumps
TODO rename dumpDir to baseDir
def elements(): Enumeration[AnyRef]

Definition Classes
Hashtable → Dictionary
def entrySet(): Set[Entry[AnyRef, AnyRef]]

Definition Classes
Hashtable → Map
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
Hashtable → Map → AnyRef → Any
lazy val extractorClasses: Map[Language, Seq[Class[_ <: Extractor[_]]]]

the extractor classes to be used when extracting the XML dumps
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def forEach(arg0: BiConsumer[_ >: AnyRef, _ >: AnyRef]): Unit

Definition Classes
Hashtable → Map
lazy val formats: Map[String, Formatter]
def get(arg0: Any): AnyRef

Definition Classes
Hashtable → Map → Dictionary
def getArbitraryStringProperty(key: String): Option[String]
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getDefaultExtractionRecorder[T](lang: Language, interval: Int = 100000, preamble: String = null, writer: Writer = null, datasets: ListBuffer[Dataset] = ListBuffer[Dataset](), monitor: ExtractionMonitor = null): ExtractionRecorder[T]
def getOrDefault(arg0: Any, arg1: AnyRef): AnyRef

Definition Classes
Hashtable → Map
def getProperty(arg0: String, arg1: String): String

Definition Classes
Properties
def getProperty(arg0: String): String

Definition Classes
Properties
def hashCode(): Int

Definition Classes
Hashtable → Map → AnyRef → Any
lazy val inputDatasets: Seq[String]

An array of input dataset names (e.g.
An array of input dataset names (e.g. 'instance-types' or 'mappingbased-literals') (separated by a ',')
lazy val inputSuffix: Option[String]

the suffix of the files representing the input dataset (usually a combination of RDF serialization extension and compression used - e.g.
the suffix of the files representing the input dataset (usually a combination of RDF serialization extension and compression used - e.g. .ttl.bz2 when using the TURTLE triples compressed with bzip2)
def isDownloadComplete(lang: Language): Boolean

determines if 1.
determines if 1. the download has to be completed and if so 2. looks for the download-complete file
lang
- the language for which to check
returns
def isEmpty(): Boolean

Definition Classes
Hashtable → Map → Dictionary
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def keySet(): Set[AnyRef]

Definition Classes
Hashtable → Map
def keys(): Enumeration[AnyRef]

Definition Classes
Hashtable → Dictionary
lazy val languages: Array[Language]

An array of languages specified by the exact enumeration of language wiki codes (e.g.
An array of languages specified by the exact enumeration of language wiki codes (e.g. en,de,fr...) or article count ranges ('10000-20000' or '10000-' -> all wiki languages having that much articles...) or '@mappings', '@chapters' when only mapping/chapter languages are of concern or '@downloaded' if all downloaded languages are to be processed (containing the download.complete file) or '@abstracts' to only process languages which provide human readable abstracts (thus not 'wikidata' and the like...)
def list(arg0: PrintWriter): Unit

Definition Classes
Properties
def list(arg0: PrintStream): Unit

Definition Classes
Properties
def load(arg0: InputStream): Unit

Definition Classes
Properties
Annotations
@throws( classOf[java.io.IOException] )
def load(arg0: Reader): Unit

Definition Classes
Properties
Annotations
@throws( classOf[java.io.IOException] )
def loadFromXML(arg0: InputStream): Unit

Definition Classes
Properties
Annotations
@throws( ... ) @throws( classOf[java.io.IOException] )
lazy val logDir: Option[File]

The directory where all log files will be stored
lazy val mappingsDir: File

Local mappings files, downloaded for speed and reproducibility Note: This is lazy to defer initialization until actually called (eg.
Local mappings files, downloaded for speed and reproducibility Note: This is lazy to defer initialization until actually called (eg. this class is not used directly in the distributed extraction framework - DistConfig.ExtractionConfig extends Config and overrides this val to null because it is not needed)
lazy val mediawikiConnection: MediaWikiConnection
def merge(arg0: AnyRef, arg1: AnyRef, arg2: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): AnyRef

Definition Classes
Hashtable → Map
lazy val namespaces: Set[Namespace]

namespaces loaded defined by the languages in use (see languages)
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
lazy val nifParameters: NifParameters
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
lazy val ontologyFile: File

Local ontology file, downloaded for speed and reproducibility Note: This is lazy to defer initialization until actually called (eg.
Local ontology file, downloaded for speed and reproducibility Note: This is lazy to defer initialization until actually called (eg. this class is not used directly in the distributed extraction framework - DistConfig.ExtractionConfig extends Config and overrides this val to null because it is not needed)
lazy val outputDataset: Option[String]

A dataset name for the output file generated (e.g.
A dataset name for the output file generated (e.g. 'instance-types' or 'mappingbased-literals')
lazy val outputSuffix: Option[String]

same as for inputSuffix (for the output dataset)
lazy val parallelProcesses: Int

Number of parallel processes allowed.
Number of parallel processes allowed. Depends on the number of cores, type of disk and IO speed
lazy val policies: Map[String, Array[Policy]]
def propertyNames(): Enumeration[_]

Definition Classes
Properties
def put(arg0: AnyRef, arg1: AnyRef): AnyRef

Definition Classes
Hashtable → Map → Dictionary
def putAll(arg0: Map[_ <: AnyRef, _ <: AnyRef]): Unit

Definition Classes
Hashtable → Map
def putIfAbsent(arg0: AnyRef, arg1: AnyRef): AnyRef

Definition Classes
Hashtable → Map
def rehash(): Unit

Attributes
protected[java.util]
Definition Classes
Hashtable
def remove(arg0: Any, arg1: Any): Boolean

Definition Classes
Hashtable → Map
def remove(arg0: Any): AnyRef

Definition Classes
Hashtable → Map → Dictionary
def replace(arg0: AnyRef, arg1: AnyRef): AnyRef

Definition Classes
Hashtable → Map
def replace(arg0: AnyRef, arg1: AnyRef, arg2: AnyRef): Boolean

Definition Classes
Hashtable → Map
def replaceAll(arg0: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): Unit

Definition Classes
Hashtable → Map
lazy val requireComplete: Boolean

before processing a given language, check if the download.complete file is present
lazy val retryFailedPages: Boolean

TODO experimental, ignore for now
val runJobsInParallel: Boolean

Normally extraction jobs are run sequentially (one language after the other), but for some jobs it makes sense to run these in parallel.
Normally extraction jobs are run sequentially (one language after the other), but for some jobs it makes sense to run these in parallel. This only should be used if a single extraction job does not take up the available computing power.
def setProperty(arg0: String, arg1: String): AnyRef

Definition Classes
Properties
def size(): Int

Definition Classes
Hashtable → Map → Dictionary
lazy val slackCredentials: Try[SlackCredentials]

If set, extraction summaries are forwarded via the API of Slack, displaying messages on a dedicated channel.
If set, extraction summaries are forwarded via the API of Slack, displaying messages on a dedicated channel. The URL of the slack webhook to be used the username under which all messages are posted (has to be registered for this webhook?) Threshold of extracted pages over which a summary of the current extraction is posted Threshold of exceptions over which an exception report is posted
lazy val source: Seq[String]

get all universal properties, check if there is an override in the provided config file
lazy val sparkLocalDir: String
lazy val sparkMaster: String
def store(arg0: OutputStream, arg1: String): Unit

Definition Classes
Properties
Annotations
@throws( classOf[java.io.IOException] )
def store(arg0: Writer, arg1: String): Unit

Definition Classes
Properties
Annotations
@throws( classOf[java.io.IOException] )
def storeToXML(arg0: OutputStream, arg1: String, arg2: String): Unit

Definition Classes
Properties
Annotations
@throws( classOf[java.io.IOException] )
def storeToXML(arg0: OutputStream, arg1: String): Unit

Definition Classes
Properties
Annotations
@throws( classOf[java.io.IOException] )
def stringPropertyNames(): Set[String]

Definition Classes
Properties
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def throwMissingPropertyException(property: String, required: Boolean): Unit
def toString(): String

Definition Classes
Hashtable → AnyRef → Any
def values(): Collection[AnyRef]

Definition Classes
Hashtable → Map
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
lazy val wikiName: String
lazy val wikidataMappingsFile: File

Deprecated Value Members

def save(arg0: OutputStream, arg1: String): Unit

Definition Classes
Properties
Annotations
@Deprecated @deprecated
Deprecated
(Since version ) see corresponding Javadoc for more information.

class Config extends Properties with Serializable

Instance Constructors

new Config(configPath: String)

Type Members

class LineReader extends AnyRef

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

lazy val abstractParameters: AbstractParameters

final def asInstanceOf[T0]: T0

def clear(): Unit

def clone(): AnyRef

def compute(arg0: AnyRef, arg1: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): AnyRef

def computeIfAbsent(arg0: AnyRef, arg1: Function[_ >: AnyRef, _ <: AnyRef]): AnyRef

def computeIfPresent(arg0: AnyRef, arg1: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): AnyRef

val configPath: String

def contains(arg0: Any): Boolean

def containsKey(arg0: Any): Boolean

def containsValue(arg0: Any): Boolean

lazy val copyrightCheck: Boolean

lazy val datasetnameExtension: Option[String]

lazy val dbPediaVersion: String

lazy val disambiguations: String

lazy val dumpDir: File

def elements(): Enumeration[AnyRef]

def entrySet(): Set[Entry[AnyRef, AnyRef]]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

lazy val extractorClasses: Map[Language, Seq[Class[_ <: Extractor[_]]]]

def finalize(): Unit

def forEach(arg0: BiConsumer[_ >: AnyRef, _ >: AnyRef]): Unit

lazy val formats: Map[String, Formatter]

def get(arg0: Any): AnyRef

def getArbitraryStringProperty(key: String): Option[String]

final def getClass(): Class[_]

def getDefaultExtractionRecorder[T](lang: Language, interval: Int = 100000, preamble: String = null, writer: Writer = null, datasets: ListBuffer[Dataset] = ListBuffer[Dataset](), monitor: ExtractionMonitor = null): ExtractionRecorder[T]

def getOrDefault(arg0: Any, arg1: AnyRef): AnyRef

def getProperty(arg0: String, arg1: String): String

def getProperty(arg0: String): String

def hashCode(): Int

lazy val inputDatasets: Seq[String]

lazy val inputSuffix: Option[String]

def isDownloadComplete(lang: Language): Boolean

def isEmpty(): Boolean

final def isInstanceOf[T0]: Boolean

def keySet(): Set[AnyRef]

def keys(): Enumeration[AnyRef]

lazy val languages: Array[Language]

def list(arg0: PrintWriter): Unit

def list(arg0: PrintStream): Unit

def load(arg0: InputStream): Unit

def load(arg0: Reader): Unit

def loadFromXML(arg0: InputStream): Unit

lazy val logDir: Option[File]

lazy val mappingsDir: File

lazy val mediawikiConnection: MediaWikiConnection

def merge(arg0: AnyRef, arg1: AnyRef, arg2: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): AnyRef

lazy val namespaces: Set[Namespace]

final def ne(arg0: AnyRef): Boolean

lazy val nifParameters: NifParameters

final def notify(): Unit

final def notifyAll(): Unit

lazy val ontologyFile: File

lazy val outputDataset: Option[String]

lazy val outputSuffix: Option[String]

lazy val parallelProcesses: Int

lazy val policies: Map[String, Array[Policy]]

def propertyNames(): Enumeration[_]

def put(arg0: AnyRef, arg1: AnyRef): AnyRef

def putAll(arg0: Map[_ <: AnyRef, _ <: AnyRef]): Unit

def putIfAbsent(arg0: AnyRef, arg1: AnyRef): AnyRef

def rehash(): Unit

def remove(arg0: Any, arg1: Any): Boolean

def remove(arg0: Any): AnyRef

def replace(arg0: AnyRef, arg1: AnyRef): AnyRef

def replace(arg0: AnyRef, arg1: AnyRef, arg2: AnyRef): Boolean

def replaceAll(arg0: BiFunction[_ >: AnyRef, _ >: AnyRef, _ <: AnyRef]): Unit

lazy val requireComplete: Boolean