Free Google SERP scraper in python

Clock
15.02.2025
Clock
15.02.2025
An eye
43
Hearts
0
Connected dots
0
Connected dots
0
Connected dots
0
For Linux
For Linux
For Windows
For Windows
Terminal user interface
Terminal user interface
Scraper
Scraper

Description

This parser is written in python with minimal dependencies. It parses Google search results using its official API.

Configuration file, config.json

{
"key": "AIzaSyDmt2BUl9gwkrw4iOaObCdosnQcjH4M9B4",
"cx": "43ed4817eb4d8481a",
"save_to": "exel",
"title": true,
"description": false,
"url": true,
"depth": 1
}

Script file, main.py

import json
import argparse
import requests
import pandas
from urllib.parse import quote, unquote


def save_to_json(path, list):
with open(path, 'w', encoding='utf-8') as file:
json.dump(list, file, indent=2, ensure_ascii=False)
file.close()

def save_to_exel(path, data):
frame = pandas.DataFrame({
'title': [],
'link': [],
'description': []
})
for indx, entry in enumerate(data):
frame.at[indx, 'title'] = entry['title']
frame.at[indx, 'link'] = entry['url']
frame.at[indx, 'description'] = entry['description']
frame.to_excel(path, index=False )

def serp_page_scrape(query: str, options: dict) -> list:
data = []
for i in range(0, options['depth']):
try:
with open(f'./data/temp/{query}_{i*10 + 1}-{i*10 + 10}.json', 'r+', encoding='utf-8') as file:
data_temp = json.loads(file.read())
for item in data_temp['items']:
title = None
if options['title']:
title = item['title']
description = None
if options['description']:
description = item['snippet']
url = None
if options['url']:
url = item['link']

data.append({
'title': title,
'description': description,
'url': url,
})
except:
pass
if options['save_to'] == 'json':
save_to_json(f'./data/serp/{query}.json', data)
else:
save_to_exel(f'./data/serp/{query}.xlsx', data)

return data

def serp_scrape_init(query: str, options: dict = {}) -> list:
print(f'Query: {unquote(query)},\nOptions: title={options['title']} | description={options['description']} | urls={options['url']} | depth={options['depth']} | save to={options['save_to']}')
for i in range(0, options['depth']):
response = requests.get(f'https://www.googleapis.com/customsearch/v1?key={options['key']}&cx={options['cx']}&q={query}&num=10&start={i * 10 + 1}')
save_to_json(f'./data/temp/{query}_{i*10 + 1}-{i*10 + 10}.json',response.json())

def run():
# This is going to be only in standalone script
# Get the options and query from CLI
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('-q', type=str, help='Query to parse', metavar='QUERY', required=True, nargs='*')
parser.add_argument('-C', type=str, help='Path to config, in json format', metavar='CONFIG_FILE', required=True, nargs=1)
args = parser.parse_args()
# query
raw_query = ''.join(word + ' ' for word in args.q)
if raw_query is None:
return
query = quote(raw_query)
# check if config exist
options = {
'key': '',
'cx': '',
'save_to': '',
'title': '',
'description': '',
'url': '',
'depth':''
}
with open(args.C[0], 'r') as file:
data = json.loads(file.read())
for key in data:
if options.get(key) is not None:
options[key] = data[key]
else:
print(f'ERROR: Something went wrong in your config file, {key}')
return False

# check depth
if options['depth'] > 10:
print('WARNING: Google Search API allowed only 100 search results to be available')
options['depth'] = 10
else:
options['depth'] = data['depth']
serp_scrape_init(query, options)
serp_page_scrape(query, options)

if __name__ == "__main__":
run()

Media files

  1. Simple google serp scraper in python

heart
cloud
cloud
cloud
message

Reviews

(0)

captcha
Send
It's empty now. Be the first (o゚v゚)ノ