diff --git a/README.md b/README.md index 3639e62d34..ecb10916c4 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,10 @@ SynapseML requires Scala 2.12, Spark 3.4+, and Python 3.8+. | Topics | Links | | :------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | Build | [![Build Status](https://msdata.visualstudio.com/A365/_apis/build/status/microsoft.SynapseML?branchName=master)](https://msdata.visualstudio.com/A365/_build/latest?definitionId=17563&branchName=master) [![codecov](https://codecov.io/gh/Microsoft/SynapseML/branch/master/graph/badge.svg)](https://codecov.io/gh/Microsoft/SynapseML) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) | -| Version | [![Version](https://img.shields.io/badge/version-1.0.7-blue)](https://github.com/Microsoft/SynapseML/releases) [![Release Notes](https://img.shields.io/badge/release-notes-blue)](https://github.com/Microsoft/SynapseML/releases) [![Snapshot Version](https://mmlspark.blob.core.windows.net/icons/badges/master_version3.svg)](#sbt) | -| Docs | [![Website](https://img.shields.io/badge/SynapseML-Website-blue)](https://aka.ms/spark) [![Scala Docs](https://img.shields.io/static/v1?label=api%20docs&message=scala&color=blue&logo=scala)](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/index.html#package) [![PySpark Docs](https://img.shields.io/static/v1?label=api%20docs&message=python&color=blue&logo=python)](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/index.html) [![Academic Paper](https://img.shields.io/badge/academic-paper-7fdcf7)](https://arxiv.org/abs/1810.08744) | +| Version | [![Version](https://img.shields.io/badge/version-1.0.8-blue)](https://github.com/Microsoft/SynapseML/releases) [![Release Notes](https://img.shields.io/badge/release-notes-blue)](https://github.com/Microsoft/SynapseML/releases) [![Snapshot Version](https://mmlspark.blob.core.windows.net/icons/badges/master_version3.svg)](#sbt) | +| Docs | [![Website](https://img.shields.io/badge/SynapseML-Website-blue)](https://aka.ms/spark) [![Scala Docs](https://img.shields.io/static/v1?label=api%20docs&message=scala&color=blue&logo=scala)](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/index.html#package) [![PySpark Docs](https://img.shields.io/static/v1?label=api%20docs&message=python&color=blue&logo=python)](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/index.html) [![Academic Paper](https://img.shields.io/badge/academic-paper-7fdcf7)](https://arxiv.org/abs/1810.08744) | | Support | [![Gitter](https://badges.gitter.im/Microsoft/MMLSpark.svg)](https://gitter.im/Microsoft/MMLSpark?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![Mail](https://img.shields.io/badge/mail-synapseml--support-brightgreen)](mailto:synapseml-support@microsoft.com) | -| Binder | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/microsoft/SynapseML/v1.0.7?labpath=notebooks%2Ffeatures) | +| Binder | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/microsoft/SynapseML/v1.0.8?labpath=notebooks%2Ffeatures) | | Usage | [![Downloads](https://static.pepy.tech/badge/synapseml)](https://pepy.tech/project/synapseml) |
@@ -119,7 +119,7 @@ In Azure Synapse notebooks please place the following in the first cell of your { "name": "synapseml", "conf": { - "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.7", + "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", @@ -155,7 +155,7 @@ cloud](http://community.cloud.databricks.com), create a new [library from Maven coordinates](https://docs.databricks.com/user-guide/libraries.html#libraries-from-maven-pypi-or-spark-packages) in your workspace. -For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.7` +For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.8` with the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is attached to your target cluster(s). @@ -163,7 +163,7 @@ Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. I You can use SynapseML in both your Scala and PySpark notebooks. To get started with our example notebooks import the following databricks archive: -`https://mmlspark.blob.core.windows.net/dbcs/SynapseMLExamplesv1.0.7.dbc` +`https://mmlspark.blob.core.windows.net/dbcs/SynapseMLExamplesv1.0.8.dbc` ### Python Standalone @@ -174,7 +174,7 @@ the above example, or from python: ```python import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ - .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.7") \ + .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.8") \ .getOrCreate() import synapse.ml ``` @@ -185,9 +185,9 @@ SynapseML can be conveniently installed on existing Spark clusters via the `--packages` option, examples: ```bash -spark-shell --packages com.microsoft.azure:synapseml_2.12:1.0.7 -pyspark --packages com.microsoft.azure:synapseml_2.12:1.0.7 -spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.7 MyApp.jar +spark-shell --packages com.microsoft.azure:synapseml_2.12:1.0.8 +pyspark --packages com.microsoft.azure:synapseml_2.12:1.0.8 +spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.8 MyApp.jar ``` ### SBT @@ -196,7 +196,7 @@ If you are building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.7" +libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.8" ``` ### Apache Livy and HDInsight @@ -210,7 +210,7 @@ Excluding certain packages from the library may be necessary due to current issu { "name": "synapseml", "conf": { - "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.7", + "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind" } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala index a65714dfe2..786b84825d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala @@ -19,7 +19,7 @@ object PackageUtils { val PackageName = s"synapseml_$ScalaVersionSuffix" val PackageMavenCoordinate = s"$PackageGroup:$PackageName:${BuildInfo.version}" // Use a fixed version for local testing - // val PackageMavenCoordinate = s"$PackageGroup:$PackageName:1.0.7" + // val PackageMavenCoordinate = s"$PackageGroup:$PackageName:1.0.8" private val AvroCoordinate = "org.apache.spark:spark-avro_2.12:3.4.1" val PackageRepository: String = SparkMLRepository diff --git a/docs/Explore Algorithms/AI Services/Overview.ipynb b/docs/Explore Algorithms/AI Services/Overview.ipynb index c6bf18bd0a..8c5b337342 100644 --- a/docs/Explore Algorithms/AI Services/Overview.ipynb +++ b/docs/Explore Algorithms/AI Services/Overview.ipynb @@ -85,66 +85,66 @@ "\n", "### Vision\n", "[**Computer Vision**](https://azure.microsoft.com/services/cognitive-services/computer-vision/)\n", - "- Describe: provides description of an image in human readable language ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/vision/DescribeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.DescribeImage))\n", - "- Analyze (color, image type, face, adult/racy content): analyzes visual features of an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/vision/AnalyzeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.AnalyzeImage))\n", - "- OCR: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/vision/OCR.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.OCR))\n", - "- Recognize Text: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/vision/RecognizeText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.RecognizeText))\n", - "- Thumbnail: generates a thumbnail of user-specified size from the image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/vision/GenerateThumbnails.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.GenerateThumbnails))\n", - "- Recognize domain-specific content: recognizes domain-specific content (celebrity, landmark) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/vision/RecognizeDomainSpecificContent.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.RecognizeDomainSpecificContent))\n", - "- Tag: identifies list of words that are relevant to the input image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/vision/TagImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.TagImage))\n", + "- Describe: provides description of an image in human readable language ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/vision/DescribeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.DescribeImage))\n", + "- Analyze (color, image type, face, adult/racy content): analyzes visual features of an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/vision/AnalyzeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.AnalyzeImage))\n", + "- OCR: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/vision/OCR.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.OCR))\n", + "- Recognize Text: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/vision/RecognizeText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.RecognizeText))\n", + "- Thumbnail: generates a thumbnail of user-specified size from the image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/vision/GenerateThumbnails.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.GenerateThumbnails))\n", + "- Recognize domain-specific content: recognizes domain-specific content (celebrity, landmark) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/vision/RecognizeDomainSpecificContent.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.RecognizeDomainSpecificContent))\n", + "- Tag: identifies list of words that are relevant to the input image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/vision/TagImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.vision.html#module-synapse.ml.services.vision.TagImage))\n", "\n", "[**Face**](https://azure.microsoft.com/services/cognitive-services/face/)\n", - "- Detect: detects human faces in an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/face/DetectFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.DetectFace))\n", - "- Verify: verifies whether two faces belong to a same person, or a face belongs to a person ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/face/VerifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.VerifyFaces))\n", - "- Identify: finds the closest matches of the specific query person face from a person group ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/face/IdentifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.IdentifyFaces))\n", - "- Find similar: finds similar faces to the query face in a face list ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/face/FindSimilarFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.FindSimilarFace))\n", - "- Group: divides a group of faces into disjoint groups based on similarity ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/face/GroupFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.GroupFaces))\n", + "- Detect: detects human faces in an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/face/DetectFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.DetectFace))\n", + "- Verify: verifies whether two faces belong to a same person, or a face belongs to a person ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/face/VerifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.VerifyFaces))\n", + "- Identify: finds the closest matches of the specific query person face from a person group ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/face/IdentifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.IdentifyFaces))\n", + "- Find similar: finds similar faces to the query face in a face list ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/face/FindSimilarFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.FindSimilarFace))\n", + "- Group: divides a group of faces into disjoint groups based on similarity ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/face/GroupFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.face.html#module-synapse.ml.services.face.GroupFaces))\n", "\n", "### Speech\n", "[**Speech Services**](https://azure.microsoft.com/products/ai-services/ai-speech)\n", - "- Speech-to-text: transcribes audio streams ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.speech.html#module-synapse.ml.services.speech.SpeechToText))\n", - "- Conversation Transcription: transcribes audio streams into live transcripts with identified speakers. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/speech/ConversationTranscription.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.speech.html#module-synapse.ml.services.speech.ConversationTranscription))\n", - "- Text to Speech: Converts text to realistic audio ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeech.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.speech.html#module-synapse.ml.services.speech.TextToSpeech))\n", + "- Speech-to-text: transcribes audio streams ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.speech.html#module-synapse.ml.services.speech.SpeechToText))\n", + "- Conversation Transcription: transcribes audio streams into live transcripts with identified speakers. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/speech/ConversationTranscription.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.speech.html#module-synapse.ml.services.speech.ConversationTranscription))\n", + "- Text to Speech: Converts text to realistic audio ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeech.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.speech.html#module-synapse.ml.services.speech.TextToSpeech))\n", "\n", "\n", "### Language\n", "[**AI Language**](https://azure.microsoft.com/products/ai-services/ai-language)\n", - "- Language detection: detects language of the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/text/LanguageDetector.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.LanguageDetector))\n", - "- Key phrase extraction: identifies the key talking points in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/text/KeyPhraseExtractor.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.KeyPhraseExtractor))\n", - "- Named entity recognition: identifies known entities and general named entities in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/text/NER.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.NER))\n", - "- Sentiment analysis: returns a score between 0 and 1 indicating the sentiment in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/text/TextSentiment.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.TextSentiment))\n", - "- Healthcare Entity Extraction: Extracts medical entities and relationships from text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/text/AnalyzeHealthText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.AnalyzeHealthText))\n", + "- Language detection: detects language of the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/text/LanguageDetector.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.LanguageDetector))\n", + "- Key phrase extraction: identifies the key talking points in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/text/KeyPhraseExtractor.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.KeyPhraseExtractor))\n", + "- Named entity recognition: identifies known entities and general named entities in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/text/NER.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.NER))\n", + "- Sentiment analysis: returns a score between 0 and 1 indicating the sentiment in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/text/TextSentiment.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.TextSentiment))\n", + "- Healthcare Entity Extraction: Extracts medical entities and relationships from text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/text/AnalyzeHealthText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.text.html#module-synapse.ml.services.text.AnalyzeHealthText))\n", "\n", "\n", "### Translation\n", "[**Translator**](https://azure.microsoft.com/products/ai-services/translator)\n", - "- Translate: Translates text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/translate/Translate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.Translate))\n", - "- Transliterate: Converts text in one language from one script to another script. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/translate/Transliterate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.Transliterate))\n", - "- Detect: Identifies the language of a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/translate/Detect.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.Detect))\n", - "- BreakSentence: Identifies the positioning of sentence boundaries in a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/translate/BreakSentence.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.BreakSentence))\n", - "- Dictionary Lookup: Provides alternative translations for a word and a small number of idiomatic phrases. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/translate/DictionaryLookup.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.DictionaryLookup))\n", - "- Dictionary Examples: Provides examples that show how terms in the dictionary are used in context. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/translate/DictionaryExamples.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.DictionaryExamples))\n", - "- Document Translation: Translates documents across all supported languages and dialects while preserving document structure and data format. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/translate/DocumentTranslator.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.DocumentTranslator))\n", + "- Translate: Translates text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/translate/Translate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.Translate))\n", + "- Transliterate: Converts text in one language from one script to another script. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/translate/Transliterate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.Transliterate))\n", + "- Detect: Identifies the language of a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/translate/Detect.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.Detect))\n", + "- BreakSentence: Identifies the positioning of sentence boundaries in a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/translate/BreakSentence.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.BreakSentence))\n", + "- Dictionary Lookup: Provides alternative translations for a word and a small number of idiomatic phrases. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/translate/DictionaryLookup.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.DictionaryLookup))\n", + "- Dictionary Examples: Provides examples that show how terms in the dictionary are used in context. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/translate/DictionaryExamples.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.DictionaryExamples))\n", + "- Document Translation: Translates documents across all supported languages and dialects while preserving document structure and data format. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/translate/DocumentTranslator.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.translate.html#module-synapse.ml.services.translate.DocumentTranslator))\n", "\n", "### Document Intelligence\n", "[**Document Intelligence**](https://azure.microsoft.com/products/ai-services/ai-document-intelligence/)\n", - "- Analyze Layout: Extract text and layout information from a given document. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeLayout.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeLayout))\n", - "- Analyze Receipts: Detects and extracts data from receipts using optical character recognition (OCR) and our receipt model, enabling you to easily extract structured data from receipts such as merchant name, merchant phone number, transaction date, transaction total, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeReceipts.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeReceipts))\n", - "- Analyze Business Cards: Detects and extracts data from business cards using optical character recognition (OCR) and our business card model, enabling you to easily extract structured data from business cards such as contact names, company names, phone numbers, emails, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeBusinessCards.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeBusinessCards))\n", - "- Analyze Invoices: Detects and extracts data from invoices using optical character recognition (OCR) and our invoice understanding deep learning models, enabling you to easily extract structured data from invoices such as customer, vendor, invoice ID, invoice due date, total, invoice amount due, tax amount, ship to, bill to, line items and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeInvoices.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeInvoices))\n", - "- Analyze ID Documents: Detects and extracts data from identification documents using optical character recognition (OCR) and our ID document model, enabling you to easily extract structured data from ID documents such as first name, last name, date of birth, document number, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeIDDocuments.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeIDDocuments))\n", - "- Analyze Custom Form: Extracts information from forms (PDFs and images) into structured data based on a model created from a set of representative training forms. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeCustomModel.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeCustomModel))\n", - "- Get Custom Model: Get detailed information about a custom model. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/GetCustomModel.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/ListCustomModels.html))\n", - "- List Custom Models: Get information about all custom models. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/form/ListCustomModels.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.ListCustomModels))\n", + "- Analyze Layout: Extract text and layout information from a given document. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeLayout.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeLayout))\n", + "- Analyze Receipts: Detects and extracts data from receipts using optical character recognition (OCR) and our receipt model, enabling you to easily extract structured data from receipts such as merchant name, merchant phone number, transaction date, transaction total, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeReceipts.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeReceipts))\n", + "- Analyze Business Cards: Detects and extracts data from business cards using optical character recognition (OCR) and our business card model, enabling you to easily extract structured data from business cards such as contact names, company names, phone numbers, emails, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeBusinessCards.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeBusinessCards))\n", + "- Analyze Invoices: Detects and extracts data from invoices using optical character recognition (OCR) and our invoice understanding deep learning models, enabling you to easily extract structured data from invoices such as customer, vendor, invoice ID, invoice due date, total, invoice amount due, tax amount, ship to, bill to, line items and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeInvoices.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeInvoices))\n", + "- Analyze ID Documents: Detects and extracts data from identification documents using optical character recognition (OCR) and our ID document model, enabling you to easily extract structured data from ID documents such as first name, last name, date of birth, document number, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeIDDocuments.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeIDDocuments))\n", + "- Analyze Custom Form: Extracts information from forms (PDFs and images) into structured data based on a model created from a set of representative training forms. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/AnalyzeCustomModel.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.AnalyzeCustomModel))\n", + "- Get Custom Model: Get detailed information about a custom model. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/GetCustomModel.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/ListCustomModels.html))\n", + "- List Custom Models: Get information about all custom models. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/form/ListCustomModels.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.ListCustomModels))\n", "\n", "### Decision\n", "[**Anomaly Detector**](https://azure.microsoft.com/products/ai-services/ai-anomaly-detector)\n", - "- Anomaly status of latest point: generates a model using preceding points and determines whether the latest point is anomalous ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/anomaly/DetectLastAnomaly.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.anomaly.html#module-synapse.ml.services.anomaly.DetectLastAnomaly))\n", - "- Find anomalies: generates a model using an entire series and finds anomalies in the series ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/anomaly/DetectAnomalies.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.anomaly.html#module-synapse.ml.services.anomaly.DetectAnomalies))\n", + "- Anomaly status of latest point: generates a model using preceding points and determines whether the latest point is anomalous ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/anomaly/DetectLastAnomaly.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.anomaly.html#module-synapse.ml.services.anomaly.DetectLastAnomaly))\n", + "- Find anomalies: generates a model using an entire series and finds anomalies in the series ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/anomaly/DetectAnomalies.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.anomaly.html#module-synapse.ml.services.anomaly.DetectAnomalies))\n", "\n", "### Search\n", - "- [**Bing Image search**](https://azure.microsoft.com/services/services-services/bing-image-search-api/) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/bing/BingImageSearch.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.bing.html#module-synapse.ml.services.bing.BingImageSearch))\n", - "- [**Azure Cognitive search**](https://docs.microsoft.com/azure/search/search-what-is-azure-search) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/com/microsoft/azure/synapse/ml/services/search/AzureSearchWriter$.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.search.html#module-synapse.ml.services.search.AzureSearchWriter))" + "- [**Bing Image search**](https://azure.microsoft.com/services/services-services/bing-image-search-api/) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/bing/BingImageSearch.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.bing.html#module-synapse.ml.services.bing.BingImageSearch))\n", + "- [**Azure Cognitive search**](https://docs.microsoft.com/azure/search/search-what-is-azure-search) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/com/microsoft/azure/synapse/ml/services/search/AzureSearchWriter$.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.search.html#module-synapse.ml.services.search.AzureSearchWriter))" ] }, { diff --git a/docs/Explore Algorithms/Deep Learning/Getting Started.md b/docs/Explore Algorithms/Deep Learning/Getting Started.md index 3ae4371d32..d23cc8330d 100644 --- a/docs/Explore Algorithms/Deep Learning/Getting Started.md +++ b/docs/Explore Algorithms/Deep Learning/Getting Started.md @@ -21,12 +21,12 @@ Restarting the cluster automatically installs horovod v0.25.0 with pytorch_light You could install the single synapseml-deep-learning wheel package to get the full functionality of deep vision classification. Run the following command: ```powershell -pip install synapseml==1.0.7 +pip install synapseml==1.0.8 ``` An alternative is installing the SynapseML jar package in library management section, by adding: ``` -Coordinate: com.microsoft.azure:synapseml_2.12:1.0.7 +Coordinate: com.microsoft.azure:synapseml_2.12:1.0.8 Repository: https://mmlspark.azureedge.net/maven ``` :::note diff --git a/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb b/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb index 90533ce675..bc7af7ecaa 100644 --- a/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb +++ b/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.ipynb @@ -266,7 +266,7 @@ "source": [ "## 4 - Simplify form recognition output\n", "\n", - "This code uses the [FormOntologyLearner](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.FormOntologyTransformer), a transformer that analyzes the output of Form Recognizer transformers (for Azure AI Document Intelligence) and infers a tabular data structure. The output of AnalyzeInvoices is dynamic and varies based on the features detected in your content.\n", + "This code uses the [FormOntologyLearner](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.FormOntologyTransformer), a transformer that analyzes the output of Form Recognizer transformers (for Azure AI Document Intelligence) and infers a tabular data structure. The output of AnalyzeInvoices is dynamic and varies based on the features detected in your content.\n", "\n", "FormOntologyLearner extends the utility of the AnalyzeInvoices transformer by looking for patterns that can be used to create a tabular data structure. Organizing the output into multiple columns and rows makes for simpler downstream analysis." ] diff --git a/docs/Explore Algorithms/Other Algorithms/Cyber ML.md b/docs/Explore Algorithms/Other Algorithms/Cyber ML.md index fbe0144764..8f501f1371 100644 --- a/docs/Explore Algorithms/Other Algorithms/Cyber ML.md +++ b/docs/Explore Algorithms/Other Algorithms/Cyber ML.md @@ -18,50 +18,50 @@ sidebar_label: CyberML In other words, it returns a sample from the complement set. ## feature engineering: [indexers.py](https://github.com/microsoft/SynapseML/blob/master/core/src/main/python/synapse/ml/cyber/feature/indexers.py) -1. [IdIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexer) +1. [IdIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexer) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe, it creates an IdIndexerModel (described next) for categorical features. The model maps each partition and column seen in the given dataframe to an ID, for each partition or one consecutive range for all partition and column values. -2. [IdIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexerModel) +2. [IdIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexerModel) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe maps each partition and column field to a consecutive integer ID. Partitions or column values not encountered in the estimator are mapped to 0. The model can operate in two modes, either create consecutive integer ID independently -3. [MultiIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexer) +3. [MultiIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexer) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Uses multiple IdIndexers to generate a MultiIndexerModel (described next) for categorical features. The model contains multiple IdIndexers for multiple partitions and columns. -4. [MultiIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexerModel) +4. [MultiIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexerModel) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe maps each partition and column field to a consecutive integer ID. Partitions or column values not encountered in the estimator are mapped to 0. The model can operate in two modes, either create consecutive integer ID independently ## feature engineering: [scalers.py](https://github.com/microsoft/SynapseML/blob/master/core/src/main/python/synapse/ml/cyber/feature/scalers.py) -1. [StandardScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScaler) +1. [StandardScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScaler) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe it creates a StandardScalarScalerModel (described next) which normalizes any given dataframe according to the mean and standard deviation calculated on the dataframe given to the estimator. -2. [StandardScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScalerModel) +2. [StandardScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScalerModel) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe with a value column x, the transformer changes its value as follows: x'=(x-mean)/stddev. That is, if the transformer is given the same dataframe the estimator was given then the value column will have a mean of 0.0 and a standard deviation of 1.0. -3. [LinearScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScaler) +3. [LinearScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScaler) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe it creates a LinearScalarScalerModel (described next) which normalizes any given dataframe according to the minimum and maximum values calculated on the dataframe given to the estimator. -4. [LinearScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScalerModel) +4. [LinearScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScalerModel) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe with a value column x, the transformer changes its value such that if the transformer is given the same dataframe the estimator was given then the value column will be scaled linearly to the given ranges. ## access anomalies: [collaborative_filtering.py](https://github.com/microsoft/SynapseML/blob/master/core/src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py) -1. [AccessAnomaly](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomaly) +1. [AccessAnomaly](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomaly) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe, the estimator generates an AccessAnomalyModel (described next). The model can detect anomalous access of users to resources where the access @@ -69,14 +69,14 @@ sidebar_label: CyberML a resource from Finance. This result is based solely on access patterns rather than explicit features. Internally, the code is based on Collaborative Filtering as implemented in Spark, using Matrix Factorization with Alternating Least Squares. -2. [AccessAnomalyModel](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyModel) +2. [AccessAnomalyModel](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyModel) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe the transformer computes a value between (-inf, inf) where positive values indicate an anomaly score. Anomaly scores are computed to have a mean of 1.0 and a standard deviation of 1.0 over the original dataframe given to the estimator. -3. [ModelNormalizeTransformer](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.ModelNormalizeTransformer) +3. [ModelNormalizeTransformer](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.ModelNormalizeTransformer) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). This transformer is used internally by AccessAnomaly to normalize a model to generate anomaly scores with mean 0.0 and standard deviation of 1.0. -4. [AccessAnomalyConfig](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyConfig) +4. [AccessAnomalyConfig](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyConfig) contains the default values for AccessAnomaly. diff --git a/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb b/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb index b76215648a..1dc88b7d1f 100644 --- a/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb +++ b/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb @@ -34,7 +34,7 @@ "# Create an Azure Databricks cluster and install the following libs\n", "\n", "1. In Cluster Libraries install from library source Maven:\n", - "Coordinates: com.microsoft.azure:synapseml_2.12:1.0.7\n", + "Coordinates: com.microsoft.azure:synapseml_2.12:1.0.8\n", "Repository: https://mmlspark.azureedge.net/maven\n", "\n", "2. In Cluster Libraries install from PyPI the library called plotly" diff --git a/docs/Explore Algorithms/Regression/Quickstart - Data Cleaning.ipynb b/docs/Explore Algorithms/Regression/Quickstart - Data Cleaning.ipynb index 0fd2d92eff..617cdc6305 100644 --- a/docs/Explore Algorithms/Regression/Quickstart - Data Cleaning.ipynb +++ b/docs/Explore Algorithms/Regression/Quickstart - Data Cleaning.ipynb @@ -16,11 +16,11 @@ "\n", "This sample demonstrates how to use the following APIs:\n", "- [`TrainRegressor`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n", + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n", "- [`ComputePerInstanceStatistics`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputePerInstanceStatistics)\n", + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputePerInstanceStatistics)\n", "- [`DataConversion`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.DataConversion)" + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.DataConversion)" ] }, { diff --git a/docs/Explore Algorithms/Regression/Quickstart - Train Regressor.ipynb b/docs/Explore Algorithms/Regression/Quickstart - Train Regressor.ipynb index f70ea6145e..d794bbca19 100644 --- a/docs/Explore Algorithms/Regression/Quickstart - Train Regressor.ipynb +++ b/docs/Explore Algorithms/Regression/Quickstart - Train Regressor.ipynb @@ -15,15 +15,15 @@ "\n", "This sample demonstrates the use of several members of the synapseml library:\n", "- [`TrainRegressor`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n", + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n", "- [`SummarizeData`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.stages.html?#module-synapse.ml.stages.SummarizeData)\n", + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.stages.html?#module-synapse.ml.stages.SummarizeData)\n", "- [`CleanMissingData`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.CleanMissingData)\n", + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.CleanMissingData)\n", "- [`ComputeModelStatistics`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputeModelStatistics)\n", + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputeModelStatistics)\n", "- [`FindBestModel`\n", - " ](https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/synapse.ml.automl.html?#module-synapse.ml.automl.FindBestModel)\n", + " ](https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/synapse.ml.automl.html?#module-synapse.ml.automl.FindBestModel)\n", "\n", "First, import the pandas package so that we can read and parse the datafile\n", "using `pandas.read_csv()`" diff --git a/docs/Get Started/Install SynapseML.md b/docs/Get Started/Install SynapseML.md index deb7dc7d75..394d45daca 100644 --- a/docs/Get Started/Install SynapseML.md +++ b/docs/Get Started/Install SynapseML.md @@ -32,7 +32,7 @@ For Spark3.4 pools { "name": "synapseml", "conf": { - "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.7", + "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", @@ -64,8 +64,8 @@ installed via pip with `pip install pyspark`. ```python import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ - # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.7 version for Spark3.4 - .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.7") \ + # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 + .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.8") \ .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ .getOrCreate() import synapse.ml @@ -78,8 +78,8 @@ your `build.sbt`: ```scala resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" -// Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.7 version for Spark3.4 -libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.7" +// Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 +libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.8" ``` ## Spark package @@ -88,10 +88,10 @@ SynapseML can be conveniently installed on existing Spark clusters via the `--packages` option, examples: ```bash -# Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.7 version for Spark3.4 -spark-shell --packages com.microsoft.azure:synapseml_2.12:1.0.7 -pyspark --packages com.microsoft.azure:synapseml_2.12:1.0.7 -spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.7 MyApp.jar +# Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 +spark-shell --packages com.microsoft.azure:synapseml_2.12:1.0.8 +pyspark --packages com.microsoft.azure:synapseml_2.12:1.0.8 +spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.8 MyApp.jar ``` A similar technique can be used in other Spark contexts too. For example, you can use SynapseML @@ -106,7 +106,7 @@ cloud](http://community.cloud.databricks.com), create a new [library from Maven coordinates](https://docs.databricks.com/user-guide/libraries.html#libraries-from-maven-pypi-or-spark-packages) in your workspace. -For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.7` for Spark3.4 Cluster and +For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.8` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is attached to your target cluster(s). @@ -115,7 +115,7 @@ Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. You can use SynapseML in both your Scala and PySpark notebooks. To get started with our example notebooks, import the following databricks archive: -`https://mmlspark.blob.core.windows.net/dbcs/SynapseMLExamplesv1.0.7.dbc` +`https://mmlspark.blob.core.windows.net/dbcs/SynapseMLExamplesv1.0.8.dbc` ## Apache Livy and HDInsight @@ -128,8 +128,8 @@ Excluding certain packages from the library may be necessary due to current issu { "name": "synapseml", "conf": { - # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.7 version for Spark3.4 - "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.7", + # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 + "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind" } } @@ -142,8 +142,8 @@ In Azure Synapse, "spark.yarn.user.classpath.first" should be set to "true" to o { "name": "synapseml", "conf": { - # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.7 version for Spark3.4 - "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.7", + # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 + "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true" } diff --git a/docs/Reference/Docker Setup.md b/docs/Reference/Docker Setup.md index fb0c587614..ce21547f41 100644 --- a/docs/Reference/Docker Setup.md +++ b/docs/Reference/Docker Setup.md @@ -32,7 +32,7 @@ You can now select one of the sample notebooks and run it, or create your own. In the preceding docker command, `mcr.microsoft.com/mmlspark/release` specifies the project and image name that you want to run. There's another component implicit here: the _tsag_ (= version) that you want to use. Specifying it explicitly looks like -`mcr.microsoft.com/mmlspark/release:1.0.7` for the `1.0.7` tag. +`mcr.microsoft.com/mmlspark/release:1.0.8` for the `1.0.8` tag. Leaving `mcr.microsoft.com/mmlspark/release` by itself has an implicit `latest` tag, so it's equivalent to `mcr.microsoft.com/mmlspark/release:latest`. The `latest` tag is identical to the @@ -48,7 +48,7 @@ that you'll probably want to use can look as follows: docker run -it --rm \ -p 127.0.0.1:80:8888 \ -v ~/myfiles:/notebooks/myfiles \ - mcr.microsoft.com/mmlspark/release:1.0.7 + mcr.microsoft.com/mmlspark/release:1.0.8 ``` In this example, backslashes are for readability; you @@ -58,7 +58,7 @@ path and line breaks looks a little different: docker run -it --rm ` -p 127.0.0.1:80:8888 ` -v C:\myfiles:/notebooks/myfiles ` - mcr.microsoft.com/mmlspark/release:1.0.7 + mcr.microsoft.com/mmlspark/release:1.0.8 Let's break this command and go over the meaning of each part: @@ -141,7 +141,7 @@ Let's break this command and go over the meaning of each part: model.write().overwrite().save('myfiles/myTrainedModel.mml') ``` -- **`mcr.microsoft.com/mmlspark/release:1.0.7`** +- **`mcr.microsoft.com/mmlspark/release:1.0.8`** Finally, this argument specifies an explicit version tag for the image that we want to run. diff --git a/docs/Reference/Dotnet Setup.md b/docs/Reference/Dotnet Setup.md index f100f093b7..bca947399e 100644 --- a/docs/Reference/Dotnet Setup.md +++ b/docs/Reference/Dotnet Setup.md @@ -37,7 +37,7 @@ for a Windows x64 machine or jdk-8u231-macosx-x64.dmg for macOS. Then, use the c ### 3. Install Apache Spark [Download and install Apache Spark](https://spark.apache.org/downloads.html) with version >= 3.2.0. -(SynapseML v1.0.7 only supports spark version >= 3.2.0) +(SynapseML v1.0.8 only supports spark version >= 3.2.0) Extract downloaded zipped files (with 7-Zip app on Windows or `tar` on linux) and remember the location of extracted files, we take `~/bin/spark-3.2.0-bin-hadoop3.2/` as an example here. @@ -127,7 +127,7 @@ In your command prompt or terminal, run the following command: dotnet add package Microsoft.Spark --version 2.1.1 ``` :::note -This tutorial uses Microsoft.Spark version 2.1.1 as SynapseML 1.0.7 depends on it. +This tutorial uses Microsoft.Spark version 2.1.1 as SynapseML 1.0.8 depends on it. Change to corresponding version if necessary. ::: @@ -137,7 +137,7 @@ In your command prompt or terminal, run the following command: ```powershell # Update Nuget Config to include SynapseML Feed dotnet nuget add source https://mmlspark.blob.core.windows.net/synapsemlnuget/index.json -n SynapseMLFeed -dotnet add package SynapseML.Cognitive --version 1.0.7 +dotnet add package SynapseML.Cognitive --version 1.0.8 ``` The `dotnet nuget add` command adds SynapseML's resolver to the source, so that our package can be found. @@ -202,7 +202,7 @@ namespace SynapseMLApp of Apache Spark applications, which manages the context and information of your application. A DataFrame is a way of organizing data into a set of named columns. -Create a [TextSentiment](https://mmlspark.blob.core.windows.net/docs/1.0.7/dotnet/classSynapse_1_1ML_1_1Cognitive_1_1TextSentiment.html) +Create a [TextSentiment](https://mmlspark.blob.core.windows.net/docs/1.0.8/dotnet/classSynapse_1_1ML_1_1Cognitive_1_1TextSentiment.html) instance, set corresponding subscription key and other configurations. Then, apply transformation to the dataframe, which analyzes the sentiment based on each row, and stores result into output column. @@ -218,9 +218,9 @@ dotnet build Navigate to your build output directory. For example, in Windows you could run `cd bin\Debug\net5.0`. Use the spark-submit command to submit your application to run on Apache Spark. ```powershell -spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --packages com.microsoft.azure:synapseml_2.12:1.0.7 --master local microsoft-spark-3-2_2.12-2.1.1.jar dotnet SynapseMLApp.dll +spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --packages com.microsoft.azure:synapseml_2.12:1.0.8 --master local microsoft-spark-3-2_2.12-2.1.1.jar dotnet SynapseMLApp.dll ``` -`--packages com.microsoft.azure:synapseml_2.12:1.0.7` specifies the dependency on synapseml_2.12 version 1.0.7; +`--packages com.microsoft.azure:synapseml_2.12:1.0.8` specifies the dependency on synapseml_2.12 version 1.0.8; `microsoft-spark-3-2_2.12-2.1.1.jar` specifies Microsoft.Spark version 2.1.1 and Spark version 3.2 :::note This command assumes you have downloaded Apache Spark and added it to your PATH environment variable so that you can use spark-submit. @@ -238,7 +238,7 @@ When your app runs, the sentiment analysis result is written to the console. +-----------------------------------------+--------+-----+--------------------------------------------------+ ``` Congratulations! You successfully authored and ran a .NET for SynapseML app. -Refer to the [developer docs](https://mmlspark.blob.core.windows.net/docs/1.0.7/dotnet/index.html) for API guidance. +Refer to the [developer docs](https://mmlspark.blob.core.windows.net/docs/1.0.8/dotnet/index.html) for API guidance. ## Next diff --git a/docs/Reference/Quickstart - LightGBM in Dotnet.md b/docs/Reference/Quickstart - LightGBM in Dotnet.md index 816919d79d..50988edb37 100644 --- a/docs/Reference/Quickstart - LightGBM in Dotnet.md +++ b/docs/Reference/Quickstart - LightGBM in Dotnet.md @@ -13,8 +13,8 @@ Make sure you have followed the guidance in [.NET installation](../Dotnet%20Setu Install NuGet packages by running following command: ```powershell dotnet add package Microsoft.Spark --version 2.1.1 -dotnet add package SynapseML.Lightgbm --version 1.0.7 -dotnet add package SynapseML.Core --version 1.0.7 +dotnet add package SynapseML.Lightgbm --version 1.0.8 +dotnet add package SynapseML.Core --version 1.0.8 ``` Use the following code in your main program file: @@ -91,7 +91,7 @@ namespace SynapseMLApp Run `dotnet build` to build the project. Then navigate to build output directory, and run following command: ```powershell -spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --packages com.microsoft.azure:synapseml_2.12:1.0.7,org.apache.hadoop:hadoop-azure:3.3.1 --master local microsoft-spark-3-2_2.12-2.1.1.jar dotnet SynapseMLApp.dll +spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --packages com.microsoft.azure:synapseml_2.12:1.0.8,org.apache.hadoop:hadoop-azure:3.3.1 --master local microsoft-spark-3-2_2.12-2.1.1.jar dotnet SynapseMLApp.dll ``` :::note Here we added two packages: synapseml_2.12 for SynapseML's scala source, and hadoop-azure to support reading files from ADLS. diff --git a/docs/Reference/R Setup.md b/docs/Reference/R Setup.md index 142b371591..d7588702db 100644 --- a/docs/Reference/R Setup.md +++ b/docs/Reference/R Setup.md @@ -55,7 +55,7 @@ Installing all dependencies may be time-consuming. When complete, run: library(sparklyr) library(dplyr) config <- spark_config() -config$sparklyr.defaultPackages <- "com.microsoft.azure:synapseml_2.12:1.0.7" +config$sparklyr.defaultPackages <- "com.microsoft.azure:synapseml_2.12:1.0.8" sc <- spark_connect(master = "local", config = config) ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.7.zip") +devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.8.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/start b/start index caa815a7b1..b3e69253b5 100644 --- a/start +++ b/start @@ -4,7 +4,7 @@ export OPENMPI_VERSION="3.1.2" export SPARK_VERSION="3.4.1" export HADOOP_VERSION="3.3" -export SYNAPSEML_VERSION="1.0.7" # Binder compatibility version +export SYNAPSEML_VERSION="1.0.8" # Binder compatibility version echo "Beginning Spark Session..." exec "$@" diff --git a/tools/docker/demo/Dockerfile b/tools/docker/demo/Dockerfile index 623b08869b..c426146ace 100644 --- a/tools/docker/demo/Dockerfile +++ b/tools/docker/demo/Dockerfile @@ -1,6 +1,6 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 -ARG SYNAPSEML_VERSION=1.0.7 +ARG SYNAPSEML_VERSION=1.0.8 ARG DEBIAN_FRONTEND=noninteractive ENV SPARK_VERSION=3.4.1 diff --git a/tools/docker/demo/README.md b/tools/docker/demo/README.md index 1bb3fed807..167298b08d 100644 --- a/tools/docker/demo/README.md +++ b/tools/docker/demo/README.md @@ -15,9 +15,9 @@ docker build . --build-arg SYNAPSEML_VERSION= -f tools/docker eg. -For building image with SynapseML version 1.0.7, run: +For building image with SynapseML version 1.0.8, run: ``` -docker build . --build-arg SYNAPSEML_VERSION=1.0.7 -f tools/docker/demo/Dockerfile -t synapseml:1.0.7 +docker build . --build-arg SYNAPSEML_VERSION=1.0.8 -f tools/docker/demo/Dockerfile -t synapseml:1.0.8 ``` # Run the image diff --git a/tools/docker/demo/init_notebook.py b/tools/docker/demo/init_notebook.py index bf95d9725c..4d28f684d1 100644 --- a/tools/docker/demo/init_notebook.py +++ b/tools/docker/demo/init_notebook.py @@ -27,7 +27,7 @@ ( "spark.jars.packages", "com.microsoft.azure:synapseml_2.12:" - + os.getenv("SYNAPSEML_VERSION", "1.0.7") + + os.getenv("SYNAPSEML_VERSION", "1.0.8") + ",org.apache.hadoop:hadoop-azure:2.7.0,org.apache.hadoop:hadoop-common:2.7.0,com.microsoft.azure:azure-storage:2.0.0", ), ( diff --git a/tools/docker/minimal/Dockerfile b/tools/docker/minimal/Dockerfile index 682a73a2db..aa78c5c5c5 100644 --- a/tools/docker/minimal/Dockerfile +++ b/tools/docker/minimal/Dockerfile @@ -1,6 +1,6 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 -ARG SYNAPSEML_VERSION=1.0.7 +ARG SYNAPSEML_VERSION=1.0.8 ARG DEBIAN_FRONTEND=noninteractive ENV SPARK_VERSION=3.4.1 diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index e86ea6164a..850c5125fa 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -1,7 +1,7 @@ const math = require('remark-math') const katex = require('rehype-katex') const path = require('path'); -let version = "1.0.7"; +let version = "1.0.8"; module.exports = { title: 'SynapseML', @@ -13,7 +13,7 @@ module.exports = { projectName: 'SynapseML', trailingSlash: true, customFields: { - version: "1.0.7", + version: "1.0.8", }, stylesheets: [ { @@ -88,11 +88,11 @@ module.exports = { }, { label: 'Python API Reference', - to: 'https://mmlspark.blob.core.windows.net/docs/1.0.7/pyspark/index.html', + to: 'https://mmlspark.blob.core.windows.net/docs/1.0.8/pyspark/index.html', }, { label: 'Scala API Reference', - to: 'https://mmlspark.blob.core.windows.net/docs/1.0.7/scala/index.html', + to: 'https://mmlspark.blob.core.windows.net/docs/1.0.8/scala/index.html', }, ], }, diff --git a/website/src/pages/index.js b/website/src/pages/index.js index 9cca14e404..8ee4f90856 100644 --- a/website/src/pages/index.js +++ b/website/src/pages/index.js @@ -268,7 +268,7 @@ function Home() { { "name": "synapseml", "conf": { - "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.7", + "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", @@ -314,9 +314,9 @@ function Home() { SynapseML can be conveniently installed on existing Spark clusters via the --packages option, examples: This can be used in other Spark contexts too. For example, you @@ -344,7 +344,7 @@ spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.7 MyApp.jar `}

For the coordinates:

Spark 3.4 Cluster: Spark 3.3 Cluster: @@ -367,7 +367,7 @@ spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.7 MyApp.jar `} notebooks. To get started with our example notebooks import the following databricks archive: @@ -405,7 +405,7 @@ spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.7 MyApp.jar `}