diff --git a/cookbook/company-info/scrapegraph_sdk.ipynb b/cookbook/company-info/scrapegraph_sdk.ipynb index 50ee223..78a3122 100644 --- a/cookbook/company-info/scrapegraph_sdk.ipynb +++ b/cookbook/company-info/scrapegraph_sdk.ipynb @@ -6,7 +6,7 @@ "id": "jEkuKbcRrPcK" }, "source": [ - "## ๐Ÿ•ท๏ธ Extract Company Info with Official Scrapegraph SDK\n" + "## \ud83d\udd77\ufe0f Extract Company Info with Official Scrapegraph SDK\n" ] }, { @@ -24,7 +24,7 @@ "id": "IzsyDXEWwPVt" }, "source": [ - "### ๐Ÿ”ง Install `dependencies`" + "### \ud83d\udd27 Install `dependencies`" ] }, { @@ -45,7 +45,7 @@ "id": "apBsL-L2KzM7" }, "source": [ - "### ๐Ÿ”‘ Import `ScrapeGraph` API key" + "### \ud83d\udd11 Import `ScrapeGraph` API key" ] }, { @@ -54,7 +54,7 @@ "id": "ol9gQbAFkh9b" }, "source": [ - "You can find the Scrapegraph API key [here](https://dashboard.scrapegraphai.com/)" + "You can find the Scrapegraph API key [here](https://scrapegraphai.com/dashboard)" ] }, { @@ -83,7 +83,7 @@ "output_type": "stream", "text": [ "SGAI_API_KEY not found in environment.\n", - "Please enter your SGAI_API_KEY: ยทยทยทยทยทยทยทยทยทยท\n", + "Please enter your SGAI_API_KEY: \u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\n", "SGAI_API_KEY has been set in the environment.\n" ] } @@ -102,7 +102,7 @@ "id": "jnqMB2-xVYQ7" }, "source": [ - "### ๐Ÿ“ Defining an `Output Schema` for Webpage Content Extraction\n" + "### \ud83d\udcdd Defining an `Output Schema` for Webpage Content Extraction\n" ] }, { @@ -237,7 +237,7 @@ "id": "cDGH0b2DkY63" }, "source": [ - "### ๐Ÿš€ Initialize `SGAI Client` and start extraction" + "### \ud83d\ude80 Initialize `SGAI Client` and start extraction" ] }, { @@ -246,7 +246,7 @@ "id": "4SLJgXgcob6L" }, "source": [ - "Initialize the client for scraping (there's also an async version [here](https://github.com/ScrapeGraphAI/scrapegraph-sdk/blob/main/scrapegraph-py/examples/async_smartscraper_example.py))" + "Initialize the client for scraping (an async version using `AsyncScrapeGraphAI` is available [here](https://github.com/ScrapeGraphAI/scrapegraph-py/blob/main/examples/extract/extract_basic_async.py))." ] }, { @@ -257,10 +257,9 @@ }, "outputs": [], "source": [ - "from scrapegraph_py import Client\n", + "from scrapegraph_py import ScrapeGraphAI\n", "\n", - "# Initialize the client with explicit API key\n", - "sgai_client = Client(api_key=sgai_api_key)" + "sgai_client = ScrapeGraphAI()" ] }, { @@ -269,13 +268,7 @@ "id": "M1KSXffZopUD" }, "source": [ - "Here we use `Smartscraper` service to extract structured data using AI from a webpage.\n", - "\n", - "\n", - "> If you already have an HTML file, you can upload it and use `Localscraper` instead.\n", - "\n", - "\n", - "\n" + "Use the `extract` method to pull structured data from a URL with AI. The same method also accepts raw `html=` or `markdown=` if you already have the page content." ] }, { @@ -286,11 +279,10 @@ }, "outputs": [], "source": [ - "# Request for Trending Repositories\n", - "repo_response = sgai_client.smartscraper(\n", - " website_url=\"https://scrapegraphai.com/\",\n", - " user_prompt=\"Extract info about the company\",\n", - " output_schema=CompanyInfoSchema,\n", + "repo_response = sgai_client.extract(\n", + " \"Extract info about the company\",\n", + " url=\"https://scrapegraphai.com/\",\n", + " schema=CompanyInfoSchema.model_json_schema(),\n", ")" ] }, @@ -323,91 +315,16 @@ "id": "F1VfD8B4LPc8", "outputId": "8d7b2955-1569-4b3a-8ffe-014a8442dd12" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Request ID: 87a7ea1a-9dd4-4d1d-ae76-b419ead57c11\n", - "Company Info:\n", - "{\n", - " \"company_name\": \"ScrapeGraphAI\",\n", - " \"description\": \"ScrapeGraphAI is a powerful AI scraping API designed for efficient web data extraction to power LLM applications and AI agents. It enables developers to perform intelligent AI scraping and extract structured information from websites using advanced AI techniques.\",\n", - " \"founders\": [\n", - " {\n", - " \"name\": \"\",\n", - " \"role\": \"Founder & Technical Lead\",\n", - " \"linkedin\": \"https://www.linkedin.com/in/perinim/\"\n", - " },\n", - " {\n", - " \"name\": \"Marco Vinciguerra\",\n", - " \"role\": \"Founder & Software Engineer\",\n", - " \"linkedin\": \"https://www.linkedin.com/in/marco-vinciguerra-7ba365242/\"\n", - " },\n", - " {\n", - " \"name\": \"Lorenzo Padoan\",\n", - " \"role\": \"Founder & Product Engineer\",\n", - " \"linkedin\": \"https://www.linkedin.com/in/lorenzo-padoan-4521a2154/\"\n", - " }\n", - " ],\n", - " \"logo\": \"https://scrapegraphai.com/images/scrapegraphai_logo.svg\",\n", - " \"partners\": [\n", - " \"PostHog\",\n", - " \"AWS\",\n", - " \"NVIDIA\",\n", - " \"JinaAI\",\n", - " \"DagWorks\",\n", - " \"Browserbase\",\n", - " \"ScrapeDo\",\n", - " \"HackerNews\",\n", - " \"Medium\",\n", - " \"HackADay\"\n", - " ],\n", - " \"pricing_plans\": [\n", - " {\n", - " \"tier\": \"Free\",\n", - " \"price\": \"$0\",\n", - " \"credits\": 100\n", - " },\n", - " {\n", - " \"tier\": \"Starter\",\n", - " \"price\": \"$20/month\",\n", - " \"credits\": 5000\n", - " },\n", - " {\n", - " \"tier\": \"Growth\",\n", - " \"price\": \"$100/month\",\n", - " \"credits\": 40000\n", - " },\n", - " {\n", - " \"tier\": \"Pro\",\n", - " \"price\": \"$500/month\",\n", - " \"credits\": 250000\n", - " }\n", - " ],\n", - " \"contact_emails\": [\n", - " \"contact@scrapegraphai.com\"\n", - " ],\n", - " \"social_links\": {\n", - " \"linkedin\": \"https://www.linkedin.com/company/101881123\",\n", - " \"twitter\": \"https://x.com/scrapegraphai\",\n", - " \"github\": \"https://github.com/ScrapeGraphAI/Scrapegraph-ai\"\n", - " },\n", - " \"privacy_policy\": \"https://scrapegraphai.com/privacy\",\n", - " \"terms_of_service\": \"https://scrapegraphai.com/terms\",\n", - " \"api_status\": \"https://scrapegraphapi.openstatus.dev\"\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "import json\n", "\n", - "# Print the response\n", - "request_id = repo_response['request_id']\n", - "result = repo_response['result']\n", + "if repo_response.status != \"success\":\n", + " raise RuntimeError(repo_response.error)\n", + "\n", + "result = repo_response.data.json_data\n", "\n", - "print(f\"Request ID: {request_id}\")\n", + "print(\"Tokens used:\", repo_response.data.usage)\n", "print(\"Company Info:\")\n", "print(json.dumps(result, indent=2))" ] @@ -418,7 +335,7 @@ "id": "2as65QLypwdb" }, "source": [ - "### ๐Ÿ’พ Save the output to a `CSV` file" + "### \ud83d\udcbe Save the output to a `CSV` file" ] }, { @@ -1883,7 +1800,7 @@ "id": "-1SZT8VzTZNd" }, "source": [ - "## ๐Ÿ”— Resources" + "## \ud83d\udd17 Resources" ] }, { @@ -1893,13 +1810,13 @@ }, "source": [ "\n", - "- ๐Ÿš€ **Get your API Key:** [ScrapeGraphAI Dashboard](https://dashboard.scrapegraphai.com) \n", - "- ๐Ÿ™ **GitHub:** [ScrapeGraphAI GitHub](https://github.com/scrapegraphai) \n", - "- ๐Ÿ’ผ **LinkedIn:** [ScrapeGraphAI LinkedIn](https://www.linkedin.com/company/scrapegraphai/) \n", - "- ๐Ÿฆ **Twitter:** [ScrapeGraphAI Twitter](https://twitter.com/scrapegraphai) \n", - "- ๐Ÿ’ฌ **Discord:** [Join our Discord Community](https://discord.gg/uJN7TYcpNa) \n", + "- \ud83d\ude80 **Get your API Key:** [ScrapeGraphAI Dashboard](https://scrapegraphai.com/dashboard) \n", + "- \ud83d\udc19 **GitHub:** [ScrapeGraphAI GitHub](https://github.com/scrapegraphai) \n", + "- \ud83d\udcbc **LinkedIn:** [ScrapeGraphAI LinkedIn](https://www.linkedin.com/company/scrapegraphai/) \n", + "- \ud83d\udc26 **Twitter:** [ScrapeGraphAI Twitter](https://twitter.com/scrapegraphai) \n", + "- \ud83d\udcac **Discord:** [Join our Discord Community](https://discord.gg/uJN7TYcpNa) \n", "\n", - "Made with โค๏ธ by the [ScrapeGraphAI](https://scrapegraphai.com) Team \n" + "Made with \u2764\ufe0f by the [ScrapeGraphAI](https://scrapegraphai.com) Team \n" ] } ],