From d7670536bf454f046e3496e619341f32904a7a29 Mon Sep 17 00:00:00 2001 From: xiongyutian Date: Thu, 28 Dec 2023 20:04:32 +0800 Subject: [PATCH] Update version to 3.0.0 --- README.md | 10 +- graph-accelerator/pom.xml | 18 +- .../org/apache/spark/graphx/GraphOps.scala | 465 ++++++++++++++++++ graph-core/pom.xml | 17 +- .../scala/spark/mllib/feature/Word2Vec.scala | 4 +- graph-kernel/pom.xml | 9 +- pom.xml | 6 +- scalastyle-config.xml | 457 +++++++++-------- 8 files changed, 726 insertions(+), 260 deletions(-) create mode 100644 graph-accelerator/src/main/scala/org/apache/spark/graphx/GraphOps.scala diff --git a/README.md b/README.md index 9f4d90c..4ffe10c 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ Introduction ============ -The graph algorithm library running on Kunpeng processors is an acceleration library that provides a rich set of high-level tools for graph algorithms. It is developed based on original APIs of Apache [Spark 3.1.1](https://github.com/apache/spark/tree/v3.1.1). The acceleration library greatly improves the computing power in big data scenarios. Additionally, it provides multiple APIs in addition to the original APIs if the Apache Spark graph library. +The graph algorithm library running on Kunpeng processors is an acceleration library that provides a rich set of high-level tools for graph algorithms. It is developed based on original APIs of Apache [Spark 3.3.1](https://github.com/apache/spark/tree/v3.3.1). The acceleration library greatly improves the computing power in big data scenarios. Additionally, it provides multiple APIs in addition to the original APIs if the Apache Spark graph library. -The library provides 16 graph algorithms: triangle count (TC), weak clique enumeration (WCE), maximal clique enumeration (MCE), modualrity, cycle detection (CD), label propagation algorithm (LPA), Louvain, PageRank, Multiple Source shortest path(MSSP), strongly connected components (SCC), K-core decomposition (KCore), breadth-first-search (BFS), ClusteringCoefficient, PersonalizedPageRank, Betweenness and SubgraphMatching. You can find the latest documentation on the project web page. This README file contains only basic setup instructions. +The library provides 4 graph algorithms: maximal clique enumeration (MCE), Multiple Source shortest path(MSSP), PageRank and Betweenness. You can find the latest documentation on the project web page. This README file contains only basic setup instructions. @@ -20,11 +20,11 @@ Building mvn package - Obtain "boostkit-graph-acc_2.12-2.2.0-spark3.1.1.jar" from the "Spark-graph-algo-lib/graph-accelerator/target/" directory + Obtain "boostkit-graph-acc_2.12-3.0.0-spark3.3.1.jar" from the "Spark-graph-algo-lib/graph-accelerator/target/" directory - Obtain "boostkit-graph-core_2.12-2.2.0-Spark3.1.1.jar" from the "Spark-graph-algo-lib/graph-core/target/" directory + Obtain "boostkit-graph-core_2.12-3.0.0-Spark3.3.1.jar" from the "Spark-graph-algo-lib/graph-core/target/" directory - Obtain "boostkit-graph-kernel-clinet_2.12-2.2.0-Spark3.1.1.jar" from the "Spark-graph-algo-lib/graph-kernel/target/" directory + Obtain "boostkit-graph-kernel-clinet_2.12-3.0.0-Spark3.3.1.jar" from the "Spark-graph-algo-lib/graph-kernel/target/" directory diff --git a/graph-accelerator/pom.xml b/graph-accelerator/pom.xml index 5ff7084..81de0a5 100644 --- a/graph-accelerator/pom.xml +++ b/graph-accelerator/pom.xml @@ -2,33 +2,35 @@ org.apache.spark boostkit-graph - 2.2.0 + 3.0.0 4.0.0 org.apache.spark.graphx.lib boostkit-graph-acc_2.12 - 2.2.0 + 3.0.0 ${project.artifactId} Spark graph algo accelerator - + org.apache.spark spark-graphx_2.12 - 3.1.1 + 3.3.1 + provided org.apache.spark.graphx.lib boostkit-graph-kernel-client_2.12 - 2.2.0 + 3.0.0 ${spark.version} - compile + provided it.unimi.dsi fastutil 8.3.1 + provided org.scalatest @@ -40,11 +42,13 @@ org.mockito mockito-core 1.10.19 + provided org.apache.spark spark-mllib_2.12 - 3.1.1 + 3.3.1 + provided diff --git a/graph-accelerator/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graph-accelerator/src/main/scala/org/apache/spark/graphx/GraphOps.scala new file mode 100644 index 0000000..be2451c --- /dev/null +++ b/graph-accelerator/src/main/scala/org/apache/spark/graphx/GraphOps.scala @@ -0,0 +1,465 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.graphx + +import scala.reflect.ClassTag +import scala.util.Random + +import org.apache.spark.SparkException +import org.apache.spark.graphx.lib._ +import org.apache.spark.ml.linalg.Vector +import org.apache.spark.rdd.RDD + +/** + * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the + * efficient GraphX API. This class is implicitly constructed for each Graph object. + * + * @tparam VD the vertex attribute type + * @tparam ED the edge attribute type + */ +class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Serializable { + + /** The number of edges in the graph. */ + @transient lazy val numEdges: Long = graph.edges.count() + + /** The number of vertices in the graph. */ + @transient lazy val numVertices: Long = graph.vertices.count() + + /** + * The in-degree of each vertex in the graph. + * @note Vertices with no in-edges are not returned in the resulting RDD. + */ + @transient lazy val inDegrees: VertexRDD[Int] = + degreesRDD(EdgeDirection.In).setName("GraphOps.inDegrees") + + /** + * The out-degree of each vertex in the graph. + * @note Vertices with no out-edges are not returned in the resulting RDD. + */ + @transient lazy val outDegrees: VertexRDD[Int] = + degreesRDD(EdgeDirection.Out).setName("GraphOps.outDegrees") + + /** + * The degree of each vertex in the graph. + * @note Vertices with no edges are not returned in the resulting RDD. + */ + @transient lazy val degrees: VertexRDD[Int] = + degreesRDD(EdgeDirection.Either).setName("GraphOps.degrees") + + /** + * Computes the neighboring vertex degrees. + * + * @param edgeDirection the direction along which to collect neighboring vertex attributes + */ + private def degreesRDD(edgeDirection: EdgeDirection): VertexRDD[Int] = { + if (edgeDirection == EdgeDirection.In) { + DegreeUtil.inDegreesRDD(graph) + } else if (edgeDirection == EdgeDirection.Out) { + DegreeUtil.outDegreesRDD(graph) + } else { + DegreeUtil.degreesRDD(graph) + } + } + + + /** + * Collect the neighbor vertex ids for each vertex. + * + * @param edgeDirection the direction along which to collect + * neighboring vertices + * + * @return the set of neighboring ids for each vertex + */ + def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] = { + val nbrs = + if (edgeDirection == EdgeDirection.Either) { + graph.aggregateMessages[Array[VertexId]]( + ctx => { ctx.sendToSrc(Array(ctx.dstId)); ctx.sendToDst(Array(ctx.srcId)) }, + _ ++ _, TripletFields.None) + } else if (edgeDirection == EdgeDirection.Out) { + graph.aggregateMessages[Array[VertexId]]( + ctx => ctx.sendToSrc(Array(ctx.dstId)), + _ ++ _, TripletFields.None) + } else if (edgeDirection == EdgeDirection.In) { + graph.aggregateMessages[Array[VertexId]]( + ctx => ctx.sendToDst(Array(ctx.srcId)), + _ ++ _, TripletFields.None) + } else { + throw new SparkException("It doesn't make sense to collect neighbor ids without a " + + "direction. (EdgeDirection.Both is not supported; use EdgeDirection.Either instead.)") + } + graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) => + nbrsOpt.getOrElse(Array.empty[VertexId]) + } + } // end of collectNeighborIds + + /** + * Collect the neighbor vertex attributes for each vertex. + * + * @note This function could be highly inefficient on power-law + * graphs where high degree vertices may force a large amount of + * information to be collected to a single location. + * + * @param edgeDirection the direction along which to collect + * neighboring vertices + * + * @return the vertex set of neighboring vertex attributes for each vertex + */ + def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]] = { + val nbrs = edgeDirection match { + case EdgeDirection.Either => + graph.aggregateMessages[Array[(VertexId, VD)]]( + ctx => { + ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr))) + ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr))) + }, + (a, b) => a ++ b, TripletFields.All) + case EdgeDirection.In => + graph.aggregateMessages[Array[(VertexId, VD)]]( + ctx => ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr))), + (a, b) => a ++ b, TripletFields.Src) + case EdgeDirection.Out => + graph.aggregateMessages[Array[(VertexId, VD)]]( + ctx => ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr))), + (a, b) => a ++ b, TripletFields.Dst) + case EdgeDirection.Both => + throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" + + "EdgeDirection.Either instead.") + } + graph.vertices.leftJoin(nbrs) { (vid, vdata, nbrsOpt) => + nbrsOpt.getOrElse(Array.empty[(VertexId, VD)]) + } + } // end of collectNeighbor + + /** + * Returns an RDD that contains for each vertex v its local edges, + * i.e., the edges that are incident on v, in the user-specified direction. + * Warning: note that singleton vertices, those with no edges in the given + * direction will not be part of the return value. + * + * @note This function could be highly inefficient on power-law + * graphs where high degree vertices may force a large amount of + * information to be collected to a single location. + * + * @param edgeDirection the direction along which to collect + * the local edges of vertices + * + * @return the local edges for each vertex + */ + def collectEdges(edgeDirection: EdgeDirection): VertexRDD[Array[Edge[ED]]] = { + edgeDirection match { + case EdgeDirection.Either => + graph.aggregateMessages[Array[Edge[ED]]]( + ctx => { + ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))) + ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))) + }, + (a, b) => a ++ b, TripletFields.EdgeOnly) + case EdgeDirection.In => + graph.aggregateMessages[Array[Edge[ED]]]( + ctx => ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))), + (a, b) => a ++ b, TripletFields.EdgeOnly) + case EdgeDirection.Out => + graph.aggregateMessages[Array[Edge[ED]]]( + ctx => ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))), + (a, b) => a ++ b, TripletFields.EdgeOnly) + case EdgeDirection.Both => + throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" + + "EdgeDirection.Either instead.") + } + } + + /** + * Remove self edges. + * + * @return a graph with all self edges removed + */ + def removeSelfEdges(): Graph[VD, ED] = { + graph.subgraph(epred = e => e.srcId != e.dstId) + } + + /** + * Join the vertices with an RDD and then apply a function from the + * vertex and RDD entry to a new vertex value. The input table + * should contain at most one entry for each vertex. If no entry is + * provided the map function is skipped and the old value is used. + * + * @tparam U the type of entry in the table of updates + * @param table the table to join with the vertices in the graph. + * The table should contain at most one entry for each vertex. + * @param mapFunc the function used to compute the new vertex + * values. The map function is invoked only for vertices with a + * corresponding entry in the table otherwise the old vertex value + * is used. + * + * @example This function is used to update the vertices with new + * values based on external data. For example we could add the out + * degree to each vertex record + * + * {{{ + * val rawGraph: Graph[Int, Int] = GraphLoader.edgeListFile(sc, "webgraph") + * .mapVertices((_, _) => 0) + * val outDeg = rawGraph.outDegrees + * val graph = rawGraph.joinVertices[Int](outDeg) + * ((_, _, outDeg) => outDeg) + * }}} + * + */ + def joinVertices[U: ClassTag](table: RDD[(VertexId, U)])(mapFunc: (VertexId, VD, U) => VD) + : Graph[VD, ED] = { + val uf = (id: VertexId, data: VD, o: Option[U]) => { + o match { + case Some(u) => mapFunc(id, data, u) + case None => data + } + } + graph.outerJoinVertices(table)(uf) + } + + /** + * Filter the graph by computing some values to filter on, and applying the predicates. + * + * @param preprocess a function to compute new vertex and edge data before filtering + * @param epred edge pred to filter on after preprocess, see more details under + * [[org.apache.spark.graphx.Graph#subgraph]] + * @param vpred vertex pred to filter on after preprocess, see more details under + * [[org.apache.spark.graphx.Graph#subgraph]] + * @tparam VD2 vertex type the vpred operates on + * @tparam ED2 edge type the epred operates on + * @return a subgraph of the original graph, with its data unchanged + * + * @example This function can be used to filter the graph based on some property, without + * changing the vertex and edge values in your program. For example, we could remove the vertices + * in a graph with 0 outdegree + * + * {{{ + * graph.filter( + * graph => { + * val degrees: VertexRDD[Int] = graph.outDegrees + * graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)} + * }, + * vpred = (vid: VertexId, deg:Int) => deg > 0 + * ) + * }}} + * + */ + def filter[VD2: ClassTag, ED2: ClassTag]( + preprocess: Graph[VD, ED] => Graph[VD2, ED2], + epred: (EdgeTriplet[VD2, ED2]) => Boolean = (x: EdgeTriplet[VD2, ED2]) => true, + vpred: (VertexId, VD2) => Boolean = (v: VertexId, d: VD2) => true): Graph[VD, ED] = { + graph.mask(preprocess(graph).subgraph(epred, vpred)) + } + + /** + * Picks a random vertex from the graph and returns its ID. + */ + def pickRandomVertex(): VertexId = { + val probability = 50.0 / graph.numVertices + var found = false + var retVal: VertexId = null.asInstanceOf[VertexId] + while (!found) { + val selectedVertices = graph.vertices.flatMap { vidVvals => + if (Random.nextDouble() < probability) { Some(vidVvals._1) } + else { None } + } + if (selectedVertices.count > 0) { + found = true + val collectedVertices = selectedVertices.collect() + retVal = collectedVertices(Random.nextInt(collectedVertices.length)) + } + } + retVal + } + + /** + * Convert bi-directional edges into uni-directional ones. + * Some graph algorithms (e.g., TriangleCount) assume that an input graph + * has its edges in canonical direction. + * This function rewrites the vertex ids of edges so that srcIds are smaller + * than dstIds, and merges the duplicated edges. + * + * @param mergeFunc the user defined reduce function which should + * be commutative and associative and is used to combine the output + * of the map phase + * + * @return the resulting graph with canonical edges + */ + def convertToCanonicalEdges( + mergeFunc: (ED, ED) => ED = (e1, e2) => e1): Graph[VD, ED] = { + val newEdges = + graph.edges + .map { + case e if e.srcId < e.dstId => ((e.srcId, e.dstId), e.attr) + case e => ((e.dstId, e.srcId), e.attr) + } + .reduceByKey(mergeFunc) + .map(e => new Edge(e._1._1, e._1._2, e._2)) + Graph(graph.vertices, newEdges) + } + + /** + * Execute a Pregel-like iterative vertex-parallel abstraction. The + * user-defined vertex-program `vprog` is executed in parallel on + * each vertex receiving any inbound messages and computing a new + * value for the vertex. The `sendMsg` function is then invoked on + * all out-edges and is used to compute an optional message to the + * destination vertex. The `mergeMsg` function is a commutative + * associative function used to combine messages destined to the + * same vertex. + * + * On the first iteration all vertices receive the `initialMsg` and + * on subsequent iterations if a vertex does not receive a message + * then the vertex-program is not invoked. + * + * This function iterates until there are no remaining messages, or + * for `maxIterations` iterations. + * + * @tparam A the Pregel message type + * + * @param initialMsg the message each vertex will receive at the on + * the first iteration + * + * @param maxIterations the maximum number of iterations to run for + * + * @param activeDirection the direction of edges incident to a vertex that received a message in + * the previous round on which to run `sendMsg`. For example, if this is `EdgeDirection.Out`, only + * out-edges of vertices that received a message in the previous round will run. + * + * @param vprog the user-defined vertex program which runs on each + * vertex and receives the inbound message and computes a new vertex + * value. On the first iteration the vertex program is invoked on + * all vertices and is passed the default message. On subsequent + * iterations the vertex program is only invoked on those vertices + * that receive messages. + * + * @param sendMsg a user supplied function that is applied to out + * edges of vertices that received messages in the current + * iteration + * + * @param mergeMsg a user supplied function that takes two incoming + * messages of type A and merges them into a single message of type + * A. ''This function must be commutative and associative and + * ideally the size of A should not increase.'' + * + * @return the resulting graph at the end of the computation + * + */ + def pregel[A: ClassTag]( + initialMsg: A, + maxIterations: Int = Int.MaxValue, + activeDirection: EdgeDirection = EdgeDirection.Either)( + vprog: (VertexId, VD, A) => VD, + sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)], + mergeMsg: (A, A) => A) + : Graph[VD, ED] = { + Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg) + } + + /** + * Run a dynamic version of PageRank returning a graph with vertex attributes containing the + * PageRank and edge attributes containing the normalized edge weight. + * + * @see [[org.apache.spark.graphx.lib.PageRank$#runUntilConvergence]] + */ + def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = { + PageRank.runUntilConvergence(graph, tol, resetProb) + } + + + /** + * Run personalized PageRank for a given vertex, such that all random walks + * are started relative to the source node. + * + * @see [[org.apache.spark.graphx.lib.PageRank$#runUntilConvergenceWithOptions]] + */ + def personalizedPageRank(src: VertexId, tol: Double, + resetProb: Double = 0.15): Graph[Double, Double] = { + PageRank.runUntilConvergenceWithOptions(graph, tol, resetProb, Some(src)) + } + + /** + * Run parallel personalized PageRank for a given array of source vertices, such + * that all random walks are started relative to the source vertices + */ + def staticParallelPersonalizedPageRank(sources: Array[VertexId], numIter: Int, + resetProb: Double = 0.15) : Graph[Vector, Double] = { + PageRank.runParallelPersonalizedPageRank(graph, numIter, resetProb, sources) + } + + /** + * Run Personalized PageRank for a fixed number of iterations with + * with all iterations originating at the source node + * returning a graph with vertex attributes + * containing the PageRank and edge attributes the normalized edge weight. + * + * @see [[org.apache.spark.graphx.lib.PageRank$#runWithOptions]] + */ + def staticPersonalizedPageRank(src: VertexId, numIter: Int, + resetProb: Double = 0.15): Graph[Double, Double] = { + PageRank.runWithOptions(graph, numIter, resetProb, Some(src)) + } + + /** + * Run PageRank for a fixed number of iterations returning a graph with vertex attributes + * containing the PageRank and edge attributes the normalized edge weight. + * + * @see [[org.apache.spark.graphx.lib.PageRank$#run]] + */ + def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = { + PageRank.run(graph, numIter, resetProb) + } + + /** + * Compute the connected component membership of each vertex and return a graph with the vertex + * value containing the lowest vertex id in the connected component containing that vertex. + * + * @see `org.apache.spark.graphx.lib.ConnectedComponents.run` + */ + def connectedComponents(): Graph[VertexId, ED] = { + ConnectedComponents.run(graph) + } + + /** + * Compute the connected component membership of each vertex and return a graph with the vertex + * value containing the lowest vertex id in the connected component containing that vertex. + * + * @see `org.apache.spark.graphx.lib.ConnectedComponents.run` + */ + def connectedComponents(maxIterations: Int): Graph[VertexId, ED] = { + ConnectedComponents.run(graph, maxIterations) + } + + /** + * Compute the number of triangles passing through each vertex. + * + * @see [[org.apache.spark.graphx.lib.TriangleCount$#run]] + */ + def triangleCount(): Graph[Int, ED] = { + TriangleCount.run(graph) + } + + /** + * Compute the strongly connected component (SCC) of each vertex and return a graph with the + * vertex value containing the lowest vertex id in the SCC containing that vertex. + * + * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents$#run]] + */ + def stronglyConnectedComponents(numIter: Int): Graph[VertexId, ED] = { + StronglyConnectedComponents.run(graph, numIter) + } +} // end of GraphOps diff --git a/graph-core/pom.xml b/graph-core/pom.xml index edc08a8..b143306 100644 --- a/graph-core/pom.xml +++ b/graph-core/pom.xml @@ -2,13 +2,13 @@ org.apache.spark boostkit-graph - 2.2.0 + 3.0.0 4.0.0 boostkit-graph-core_2.12 - 2.2.0 + 3.0.0 ${project.artifactId} Spark graph core @@ -16,7 +16,8 @@ org.apache.spark spark-graphx_2.12 - 3.1.1 + 3.3.1 + provided org.scalatest @@ -24,15 +25,23 @@ 3.0.5 test + + com.fasterxml.jackson.core + jackson-core + 2.13.0 + provided + org.mockito mockito-core 1.10.19 + provided org.apache.spark spark-mllib_2.12 - 3.1.1 + 3.3.1 + provided diff --git a/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala b/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala index ae3ba98..8212d90 100644 --- a/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala +++ b/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala @@ -705,7 +705,7 @@ class Word2VecModel private[spark]( } @Since("1.4.0") -object Word2VecModel extends Loader[Word2VecModel] { +object Word2VecModel extends Loader[Word2VecModel]{ private def buildWordIndex(model: Map[String, Array[Float]]): Map[String, Int] = { model.keys.zipWithIndex.toMap @@ -746,7 +746,7 @@ object Word2VecModel extends Loader[Word2VecModel] { def save(sc: SparkContext, path: String, model: Map[String, Array[Float]]): Unit = { val spark = SparkSession.builder().sparkContext(sc).getOrCreate() - val vectorSize = model.values.head.length + val vectorSize = model.values.toArray.head.length val numWords = model.size val metadata = compact(render( ("class" -> classNameV1_0) ~ ("version" -> formatVersionV1_0) ~ diff --git a/graph-kernel/pom.xml b/graph-kernel/pom.xml index 6bd7513..b6f2275 100644 --- a/graph-kernel/pom.xml +++ b/graph-kernel/pom.xml @@ -2,15 +2,15 @@ org.apache.spark boostkit-graph - 2.2.0 + 3.0.0 4.0.0 org.apache.spark.graphx.lib boostkit-graph-kernel-client_2.12 - 2.2.0 + 3.0.0 ${project.artifactId} Spark graph algo client - 2020 + 2022 1.8 1.8 @@ -21,7 +21,8 @@ org.apache.spark spark-graphx_2.12 - 3.1.1 + 3.3.1 + provided diff --git a/pom.xml b/pom.xml index 112de3a..c0e87b0 100644 --- a/pom.xml +++ b/pom.xml @@ -3,16 +3,16 @@ 4.0.0 org.apache.spark boostkit-graph - 2.2.0 + 3.0.0 ${project.artifactId} Spark graph algo - 2020 + 2022 pom 1.8 1.8 UTF-8 - spark3.1.1 + spark3.3.1 diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 866e938..10b2bcd 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -38,17 +38,17 @@ This file is divided into 3 sections: --> - Scalastyle standard configuration + Scalastyle standard configuration - - - + + + - + - - - + + - - + + - + - + - + - - - - - true - - + + + + + true + + - - - + + + - - - + + + - - - + + + - - - + + + - + - + - + - + - - - - - - + + + + + + - + - + - + - + - - - ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW - - + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + - - - ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW - - + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + - - + + - - - ^FunSuite[A-Za-z]*$ - Tests must extend org.apache.spark.SparkFunSuite instead. - + + + ^FunSuite[A-Za-z]*$ + Tests must extend org.apache.spark.SparkFunSuite instead. + - - - ^println$ - + + ^println$ + - - - - spark(.sqlContext)?.sparkContext.hadoopConfiguration - - + - - @VisibleForTesting - + @VisibleForTesting + - + - - Runtime\.getRuntime\.addShutdownHook - + Runtime\.getRuntime\.addShutdownHook + - + - - mutable\.SynchronizedBuffer - + mutable\.SynchronizedBuffer + - + - - Class\.forName - + Class\.forName + - + - - Await\.result - + Await\.result + - + - - Await\.ready - + Await\.ready + - - - - - JavaConversions - Instead of importing implicits in scala.collection.JavaConversions._, import - scala.collection.JavaConverters._ and use .asScala / .asJava methods - - - - org\.apache\.commons\.lang\. - Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead - of Commons Lang 2 (package org.apache.commons.lang.*) - - - - extractOpt - Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter - is slower. - - - - - java,scala,3rdParty,spark - javax?\..* - scala\..* - (?!org\.apache\.spark\.).* - org\.apache\.spark\..* - - - - - - COMMA - - - - - - \)\{ - + + + + JavaConversions + Instead of importing implicits in scala.collection.JavaConversions._, import + scala.collection.JavaConverters._ and use .asScala / .asJava methods + + + + org\.apache\.commons\.lang\. + Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead + of Commons Lang 2 (package org.apache.commons.lang.*) + + + + extractOpt + Use Utils.jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter + is slower. + + + + + java,scala,3rdParty,spark + javax?\..* + scala\..* + (?!org\.apache\.spark\.).* + org\.apache\.spark\..* + + + + + + COMMA + + + + + + \)\{ + - - - - (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] - Use Javadoc style indentation for multiline comments - - - - case[^\n>]*=>\s*\{ - Omit braces in case clauses. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 800> - - - - - 30 - - - - - 10 - - - - - 50 - - - - - - - - - - - -1,0,1,2,3 - + + + + (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] + Use Javadoc style indentation for multiline comments + + + + case[^\n>]*=>\s*\{ + Omit braces in case clauses. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 800> + + + + + 30 + + + + + 10 + + + + + 50 + + + + + + + + + + + -1,0,1,2,3 + -- Gitee