2023年4月4日 12:53:27go评论89阅读模式

英文:

error: object apache is not a member of package org

问题

I am learning scala on docker, which doesn't have sbt or maven on it, I am facing this error and all of the internet solutions involve sbt or maven, was wondering if this can be handled without sbt or maven.

Wanted to create the jar using

scalac problem1.scala -d problem1.jar

Error:
problem1.scala:3: error: object apache is not a member of package org
import org.apache.spark.SparkContext

Code:

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.log4j.{Logger,Level}
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{StructType, StructField,  LongType, StringType}
//import org.apache.parquet.format.StringType

object problem1 {
  def main(args: Array[String]) {
    Logger.getLogger("org").setLevel(Level.OFF)
    //Create conf object
    val conf = new SparkConf().setMaster("local[2]").setAppName("loadData")
    //create spark context object
    val sc = new SparkContext(conf)

    val SQLContext = new SQLContext(sc)
    import SQLContext.implicits._

    //Read file and create RDD
    val table_schema = StructType(Seq(
      StructField("TransID", LongType, true),
      StructField("CustID", LongType, true),
      StructField("TransTotal", LongType, true),
      StructField("TransNumItems", LongType, true),
      StructField("TransDesc", StringType, true)
    ))
    val T = SQLContext.read
      .format("csv")
      .schema(table_schema)
      .option("header","false")
      .option("nullValue","NA")
      .option("delimiter",",")
      .load(args(0))
    //    T.show(5)

    val T1 = T.filter($"TransTotal" >= 200)
    //    T1.show(5)
    val T2 = T1.groupBy("TransNumItems").agg(sum("TransTotal"), avg("TransTotal"),
      min("TransTotal"), max("TransTotal"))
    //    T2.show(500)
    T2.show()
    val T3 =  T1.groupBy("CustID").agg(count("TransID").as("number_of_transactions_T3"))
    //    T3.show(50)
    val T4 = T.filter($"TransTotal" >= 600)
    //   T4.show(5)
    val T5 = T4.groupBy("CustID").agg(count("TransID").as("number_of_transactions_T5"))
    //    T5.show(50)
    val temp = T3.as("T3").join(T5.as("T5"), ($"T3.CustID" === $"T5.CustID") )
    //    T6.show(5)
    //    print(T6.count())
    val T6 = temp.where(($"number_of_transactions_T5")*5 < $"number_of_transactions_T3")
    //    T6.show(5)
    T6.show()
    sc.stop
  }
}

英文:

<br>
I am learning scala on docker, which doesn't have sbt or maven on it, I am facing this error and all of the internet solutions involve sbt or maven, was wondering if this can be handled without sbt or maven.<br>
Wanted to create the jar using

> scalac problem1.scala -d problem1.jar

Error: <br>
problem1.scala:3: error: object apache is not a member of package org
import org.apache.spark.SparkContext

Code:

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.log4j.{Logger,Level}
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{StructType, StructField,  LongType, StringType}
//import org.apache.parquet.format.StringType

object problem1 {
  def main(args: Array[String]) {
    Logger.getLogger(&quot;org&quot;).setLevel(Level.OFF)
    //Create conf object
    val conf = new SparkConf().setMaster(&quot;local[2]&quot;).setAppName(&quot;loadData&quot;)
    //create spark context object
    val sc = new SparkContext(conf)

    val SQLContext = new SQLContext(sc)
    import SQLContext.implicits._

    //Read file and create RDD
    val table_schema = StructType(Seq(
      StructField(&quot;TransID&quot;, LongType, true),
      StructField(&quot;CustID&quot;, LongType, true),
      StructField(&quot;TransTotal&quot;, LongType, true),
      StructField(&quot;TransNumItems&quot;, LongType, true),
      StructField(&quot;TransDesc&quot;, StringType, true)
    ))
    val T = SQLContext.read
      .format(&quot;csv&quot;)
      .schema(table_schema)
      .option(&quot;header&quot;,&quot;false&quot;)
      .option(&quot;nullValue&quot;,&quot;NA&quot;)
      .option(&quot;delimiter&quot;,&quot;,&quot;)
      .load(args(0))
    //    T.show(5)

    val T1 = T.filter($&quot;TransTotal&quot; &gt;= 200)
    //    T1.show(5)
    val T2 = T1.groupBy(&quot;TransNumItems&quot;).agg(sum(&quot;TransTotal&quot;), avg(&quot;TransTotal&quot;),
      min(&quot;TransTotal&quot;), max(&quot;TransTotal&quot;))
    //    T2.show(500)
    T2.show()
    val T3 =  T1.groupBy(&quot;CustID&quot;).agg(count(&quot;TransID&quot;).as(&quot;number_of_transactions_T3&quot;))
    //    T3.show(50)
    val T4 = T.filter($&quot;TransTotal&quot; &gt;= 600)
    //   T4.show(5)
    val T5 = T4.groupBy(&quot;CustID&quot;).agg(count(&quot;TransID&quot;).as(&quot;number_of_transactions_T5&quot;))
    //    T5.show(50)
    val temp = T3.as(&quot;T3&quot;).join(T5.as(&quot;T5&quot;), ($&quot;T3.CustID&quot; === $&quot;T5.CustID&quot;) )
    //    T6.show(5)
    //    print(T6.count())
    val T6 = temp.where(($&quot;number_of_transactions_T5&quot;)*5 &lt; $&quot;number_of_transactions_T3&quot;)
    //    T6.show(5)
    T6.show()
    sc.stop
  }
}

答案1

得分: 1

为什么不选择一个带有sbt的Docker镜像？
当然，你可以在命令行中使用纯Scala创建一个jar包，而无需使用sbt。你应该拥有依赖的jar包（spark-core，spark-catalyst，spark-sql，log4j，如果需要的话可能还有其他一些），并手动指定类路径

scalac -cp /path/to/spark-core_2.13-3.3.1.jar:/path/to/spark-catalyst_2.13/3.3.1/spark-catalyst_2.13-3.3.1.jar:/path/to/spark-sql_2.13/3.3.1/spark-sql_2.13-3.3.1.jar:/path/to/log4j-1.2-api-2.17.2.jar -d problem1.jar problem1.scala

例如，对于我来说，path/to 是以下内容：

scalac -cp /home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/spark/spark-core_2.13/3.3.1/spark-core_2.13-3.3.1.jar:/home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/spark/spark-catalyst_2.13/3.3.1/spark-catalyst_2.13-3.3.1.jar:/home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/spark/spark-sql_2.13/3.3.1/spark-sql_2.13-3.3.1.jar:/home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/logging/log4j/log4j-1.2-api/2.17.2/log4j-1.2-api-2.17.2.jar -d problem1.jar problem1.scala

或者，你可以在拥有sbt的地方创建一个带有所有依赖项的fat jar（sbt assembly），然后使用它

scalac -cp fat-jar.jar -d problem1.jar problem1.scala

https://github.com/sbt/sbt-assembly

另一个选择是为你的应用程序创建一个sbt启动器

https://www.scala-sbt.org/1.x/docs/Sbt-Launcher.html

https://stackoverflow.com/questions/74440324/sbt-gives-java-lang-nullpointerexception-when-trying-to-run-spark

Sbt启动器有助于在只安装了Java的环境中运行应用程序。

另一个选项是以编程方式使用Coursier管理依赖项

https://stackoverflow.com/questions/74311373/can-you-import-a-separate-version-of-the-same-dependency-into-one-build-file-for

https://stackoverflow.com/questions/70945320/how-to-compile-and-execute-scala-code-at-run-time-in-scala3

https://stackoverflow.com/questions/73911801/how-can-i-run-generated-code-during-script-runtime

英文:

Why not to choose a Docker image with sbt?
Anyway, yes, surely you can create a jar from command line using pure Scala without sbt. You should have dependency jars (spark-core, spark-catalyst, spark-sql, log4j, maybe some others if needed) and specify classpath manually

scalac -cp /path/to/spark-core_2.13-3.3.1.jar:/path/to/spark-catalyst_2.13/3.3.1/spark-catalyst_2.13-3.3.1.jar:/path/to/spark-sql_2.13/3.3.1/spark-sql_2.13-3.3.1.jar:/path/to/log4j-1.2-api-2.17.2.jar -d problem1.jar problem1.scala

For example for me the path/to is the following:

scalac -cp /home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/spark/spark-core_2.13/3.3.1/spark-core_2.13-3.3.1.jar:/home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/spark/spark-catalyst_2.13/3.3.1/spark-catalyst_2.13-3.3.1.jar:/home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/spark/spark-sql_2.13/3.3.1/spark-sql_2.13-3.3.1.jar:/home/dmitin/.cache/coursier/v1/https/repo1.maven.org/maven2/org/apache/logging/log4j/log4j-1.2-api/2.17.2/log4j-1.2-api-2.17.2.jar -d problem1.jar problem1.scala

Alternatively, somewhere where you have sbt, you can create a fat jar (sbt assembly) with all dependencies (or even with your application and all dependencies) and use it

scalac -cp fat-jar.jar -d problem1.jar problem1.scala

https://github.com/sbt/sbt-assembly

One more option is to create sbt launcher for your application

https://www.scala-sbt.org/1.x/docs/Sbt-Launcher.html

https://stackoverflow.com/questions/74440324/sbt-gives-java-lang-nullpointerexception-when-trying-to-run-spark

Sbt launcher helps to run application in environments with only Java installed.

One more option is to manage dependencies with Coursier programmatically

https://stackoverflow.com/questions/74311373/can-you-import-a-separate-version-of-the-same-dependency-into-one-build-file-for

https://stackoverflow.com/questions/70945320/how-to-compile-and-execute-scala-code-at-run-time-in-scala3

https://stackoverflow.com/questions/73911801/how-can-i-run-generated-code-during-script-runtime

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

error: 对象 apache 不是包 org 的成员

问题

答案1

删除基于另一个pyspark的值的列。

火花广播变量 Map 给出了空值

在Scala中，基于属性的测试用于tagless final，其中包括变量解释器。

从S3使用Spark加载数据时出错。

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论