Object

ksb.csle.didentification.utilities

OutlierManager

Related Doc: package utilities

Permalink

object OutlierManager extends Statistics with MethodString

This object provides some functions to manage outliers.

Linear Supertypes
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. OutlierManager
  2. MethodString
  3. Statistics
  4. DataFrameCheck
  5. AnyRef
  6. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  5. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  6. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  7. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  8. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  9. def getAvgValue(values: List[Long]): Double

    Permalink

    Gets the average value among the given list of values

    Gets the average value among the given list of values

    values

    the list of values

    returns

    Double The average value

    Definition Classes
    Statistics
    Annotations
    @throws( ... )
  10. def getAvgValue(src: DataFrame, columnName: String): String

    Permalink

    Gets the average value among this column

    Gets the average value among this column

    src

    Dataframe

    columnName

    Column

    returns

    String The string type of average value

    Definition Classes
    Statistics
  11. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  12. def getColumnName(src: DataFrame, columnId: Int): String

    Permalink

    Returns column name from src dataframe specified by the column ID defined by protobuf.

    Returns column name from src dataframe specified by the column ID defined by protobuf.

    src

    dataframe to get names of columns.

    columnId

    column ID to anonymize.

    returns

    String.

    Definition Classes
    DataFrameCheck
  13. def getColumnNames(src: DataFrame, columnIDs: Array[Int]): Array[String]

    Permalink

    Returns column names from src dataframe specified by column IDs.

    Returns column names from src dataframe specified by column IDs. Note that the column with invalid IDs are ignored.

    src

    dataframe to get names of columns.

    returns

    Array[String].

    Definition Classes
    DataFrameCheck
  14. def getCountValue(values: List[Long]): Double

    Permalink

    Gets the number of tuples among he given list of values

    Gets the number of tuples among he given list of values

    returns

    Double The number of tuples

    Definition Classes
    Statistics
  15. def getCountValue(src: DataFrame, columnName: String): String

    Permalink

    Gets the number of tuples among this column

    Gets the number of tuples among this column

    src

    Dataframe

    columnName

    Column

    returns

    Double The number of tuples

    Definition Classes
    Statistics
  16. def getMaxValue(values: List[Double]): Double

    Permalink

    Gets the maximum value among the given list of values

    Gets the maximum value among the given list of values

    values

    the list of values

    returns

    Double Maximum value

    Definition Classes
    Statistics
  17. def getMaxValue(src: DataFrame, columnName: String): String

    Permalink

    Gets the maximum value among this column

    Gets the maximum value among this column

    src

    Dataframe

    columnName

    Column

    returns

    String the string type of Maximum value

    Definition Classes
    Statistics
  18. def getMethodString[T](method: T): String

    Permalink
    Definition Classes
    MethodString
  19. def getMinValue(values: List[Double]): Double

    Permalink

    Gets the minimum value among the given list of values

    Gets the minimum value among the given list of values

    values

    the list of values

    returns

    Double Minimum value

    Definition Classes
    Statistics
  20. def getMinValue(src: DataFrame, columnName: String): String

    Permalink

    Gets the minimum value among this column

    Gets the minimum value among this column

    src

    Dataframe

    columnName

    Column

    returns

    String the string type of Minimum value

    Definition Classes
    Statistics
  21. def getQuasiColumnIDs(fieldInfos: Array[FieldInfo]): Array[Int]

    Permalink
    Definition Classes
    DataFrameCheck
  22. def getSensColumnIDs(fieldInfos: Array[FieldInfo]): Array[Int]

    Permalink
    Definition Classes
    DataFrameCheck
  23. def getStdValue(values: List[Long]): Double

    Permalink

    Gets the average value among the given list of values

    Gets the average value among the given list of values

    values

    the list of values

    returns

    Double The average value

    Definition Classes
    Statistics
  24. def getStdValue(src: DataFrame, columnName: String): String

    Permalink

    Gets the standard deviation value among this column

    Gets the standard deviation value among this column

    src

    Dataframe

    columnName

    Column

    returns

    String The string type of standard deviation value

    Definition Classes
    Statistics
  25. def getValidColumnIDs(src: DataFrame, columnIDs: Array[Int]): Array[Int]

    Permalink
    Definition Classes
    DataFrameCheck
  26. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  27. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  28. def isValidColumnID(src: DataFrame, columnID: Int): Boolean

    Permalink

    Checks the given column ID is valid.

    Checks the given column ID is valid.

    src

    dataframe to get names of columns.

    returns

    Boolean.

    Definition Classes
    DataFrameCheck
  29. def isValidColumnName(src: DataFrame, columnName: String): Boolean

    Permalink

    Checks the given column Name is valid.

    Checks the given column Name is valid.

    src

    dataframe to get names of columns.

    columnName

    column Name.

    returns

    Boolean.

    Definition Classes
    DataFrameCheck
  30. def makeAgeOutlierMgmtTableBoxplot(src: DataFrame, columnName: String, method: String): Map[Interval, OutlierInfo]

    Permalink

    In case of age-related column, the outlier information info may be decided by the 10s, 20s, and so on.

    In case of age-related column, the outlier information info may be decided by the 10s, 20s, and so on.

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Boxplot] Outlier management table ( the Boxplot information in a interval)

  31. def makeAgeOutlierMgmtTableBoxplot[T](src: DataFrame, columnName: String, method: T): Map[Interval, OutlierInfo]

    Permalink

    Same as makeAgeStatTable(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod.

    Same as makeAgeStatTable(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod.

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Boxplot] Outlier management table ( the Boxplot information in a interval)

  32. def makeAgeOutlierMgmtTableZscore(src: DataFrame, columnName: String, method: String): Map[Interval, OutlierInfo]

    Permalink

    In case of age-related column, the outlier information info may be decided by the 10s, 20s, and so on.

    In case of age-related column, the outlier information info may be decided by the 10s, 20s, and so on.

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Zscore] Outlier management table ( the Z-score information in a interval)

  33. def makeAgeOutlierMgmtTableZscore[T](src: DataFrame, columnName: String, method: T): Map[Interval, OutlierInfo]

    Permalink

    Same as makeAgeStatTable(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod.

    Same as makeAgeStatTable(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod.

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Boxplot] Outlier management table ( the Boxplot information in a interval)

  34. def makeOutlierMgmtTableBoxplot(src: DataFrame, columnName: String, method: String, nSteps: Int): Map[Interval, OutlierInfo]

    Permalink

    Makes the outlier management table based on the boxplot technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info].

    Makes the outlier management table based on the boxplot technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info].

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Boxplot] Outlier management table ( the Boxplot information in a interval)

  35. def makeOutlierMgmtTableBoxplot[T](src: DataFrame, columnName: String, method: T, nSteps: Int): Map[Interval, OutlierInfo]

    Permalink
  36. def makeOutlierMgmtTableBoxplot(src: DataFrame, columnName: String, method: String): Map[Interval, OutlierInfo]

    Permalink

    Makes the outlier management table based on the boxplot technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info].

    Makes the outlier management table based on the boxplot technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info]. The default interval is set to satisfy the number of intervals to be be 10

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Boxplot] Outlier management table ( the Boxplot information in a interval)

  37. def makeOutlierMgmtTableBoxplot[T](src: DataFrame, columnName: String, method: T): Map[Interval, OutlierInfo]

    Permalink

    Same as makeOutlierMgmtTableBoxplot(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod

    Same as makeOutlierMgmtTableBoxplot(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Boxplot] Outlier management table ( the Boxplot information in a interval)

  38. def makeOutlierMgmtTableZscore(src: DataFrame, columnName: String, method: String, nSteps: Int): Map[Interval, OutlierInfo]

    Permalink

    Makes the outlier management table based on the z-score technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info].

    Makes the outlier management table based on the z-score technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info].

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Zscore] Outlier management table ( the Z-score information in a interval)

  39. def makeOutlierMgmtTableZscore[T](src: DataFrame, columnName: String, method: T, nSteps: Int): Map[Interval, OutlierInfo]

    Permalink
  40. def makeOutlierMgmtTableZscore(src: DataFrame, columnName: String, method: String): Map[Interval, OutlierInfo]

    Permalink

    Makes the outlier management table based on the z-score technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info].

    Makes the outlier management table based on the z-score technique which includes outlier information about some numerical interval as a form of map [numerical interval, outlier management info]. The default interval is set to satisfy the number of intervals to be be 10

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Zscore] Outlier management table ( the Z-score information in a interval)

  41. def makeOutlierMgmtTableZscore[T](src: DataFrame, columnName: String, method: T): Map[Interval, OutlierInfo]

    Permalink

    Same as makeOutlierMgmtTableZscore(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod

    Same as makeOutlierMgmtTableZscore(src: DataFrame, columnName: String, method: String), but the method is the type of AggregationMethod

    src

    Dataframe

    columnName

    Column

    method

    Methods to handle outliers. ex., min, max, avg, std, and count

    returns

    Map[Interval, Zscore] Outlier management table ( the Z-score information in a interval)

  42. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  43. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  44. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  45. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  46. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  47. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  48. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  49. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from MethodString

Inherited from Statistics

Inherited from DataFrameCheck

Inherited from AnyRef

Inherited from Any

Ungrouped