Home
About
Contacts
Articles
Tools
RU
EN
Free Google SERP scraper in python
15.02.2025
15.02.2025
43
0
0
0
0
For Linux
For Windows
Terminal user interface
Scraper
Description
This parser is written in python with minimal dependencies. It parses Google search results using its official API.
Configuration file, config.json
Plain
Bash
C++
C#
CSS
Diff
HTML/XML
Java
JavaScript
Markdown
PHP
Python
Ruby
SQL
{
"key"
:
"AIzaSyDmt2BUl9gwkrw4iOaObCdosnQcjH4M9B4"
,
"cx"
:
"43ed4817eb4d8481a"
,
"save_to"
:
"exel"
,
"title"
:
true
,
"description"
:
false
,
"url"
:
true
,
"depth"
:
1
}
Script file, main.py
Plain
Bash
C++
C#
CSS
Diff
HTML/XML
Java
JavaScript
Markdown
PHP
Python
Ruby
SQL
import
json
import
argparse
import
requests
import
pandas
from
urllib.parse
import
quote, unquote
def
save_to_json
(
path, list
):
with
open
(path,
'w'
, encoding=
'utf-8'
)
as
file:
json.dump(
list
, file, indent=
2
, ensure_ascii=
False
)
file.close()
def
save_to_exel
(
path, data
):
frame = pandas.DataFrame({
'title'
: [],
'link'
: [],
'description'
: []
})
for
indx, entry
in
enumerate
(data):
frame.at[indx,
'title'
] = entry[
'title'
]
frame.at[indx,
'link'
] = entry[
'url'
]
frame.at[indx,
'description'
] = entry[
'description'
]
frame.to_excel(path, index=
False
)
def
serp_page_scrape
(
query: str, options: dict
) ->
list
:
data = []
for
i
in
range
(
0
, options[
'depth'
]):
try
:
with
open
(
f'./data/temp/{query}_{i*10 + 1}-{i*10 + 10}.json'
,
'r+'
, encoding=
'utf-8'
)
as
file:
data_temp = json.loads(file.read())
for
item
in
data_temp[
'items'
]:
title =
None
if
options[
'title'
]:
title = item[
'title'
]
description =
None
if
options[
'description'
]:
description = item[
'snippet'
]
url =
None
if
options[
'url'
]:
url = item[
'link'
]
data.append({
'title'
: title,
'description'
: description,
'url'
: url,
})
except
:
pass
if
options[
'save_to'
] ==
'json'
:
save_to_json(
f'./data/serp/{query}.json'
, data)
else
:
save_to_exel(
f'./data/serp/{query}.xlsx'
, data)
return
data
def
serp_scrape_init
(
query: str, options: dict = {}
) ->
list
:
print
(
f'Query: {unquote(query)},\nOptions: title={options['title']} | description={options['description']} | urls={options['url']} | depth={options['depth']} | save to={options['save_to']}'
)
for
i
in
range
(
0
, options[
'depth'
]):
response = requests.get(
f'https://www.googleapis.com/customsearch/v1?key={options['key']}&cx={options['cx']}&q={query}&num=10&start={i * 10 + 1}'
)
save_to_json(
f'./data/temp/{query}_{i*10 + 1}-{i*10 + 10}.json'
,response.json())
def
run
():
# This is going to be only in standalone script
# Get the options and query from CLI
parser = argparse.ArgumentParser(add_help=
True
)
parser.add_argument(
'-q'
,
type
=
str
,
help
=
'Query to parse'
, metavar=
'QUERY'
, required=
True
, nargs=
'*'
)
parser.add_argument(
'-C'
,
type
=
str
,
help
=
'Path to config, in json format'
, metavar=
'CONFIG_FILE'
, required=
True
, nargs=
1
)
args = parser.parse_args()
# query
raw_query =
''
.join(word +
' '
for
word
in
args.q)
if
raw_query
is
None
:
return
query = quote(raw_query)
# check if config exist
options = {
'key'
:
''
,
'cx'
:
''
,
'save_to'
:
''
,
'title'
:
''
,
'description'
:
''
,
'url'
:
''
,
'depth'
:
''
}
with
open
(args.C[
0
],
'r'
)
as
file:
data = json.loads(file.read())
for
key
in
data:
if
options.get(key)
is
not
None
:
options[key] = data[key]
else
:
print
(
f'ERROR: Something went wrong in your config file, {key}'
)
return
False
# check depth
if
options[
'depth'
] >
10
:
print
(
'WARNING: Google Search API allowed only 100 search results to be available'
)
options[
'depth'
] =
10
else
:
options[
'depth'
] = data[
'depth'
]
serp_scrape_init(query, options)
serp_page_scrape(query, options)
if
__name__ ==
"__main__"
:
run()
Media files
Simple google serp scraper in python
Feedback form
Email subscription
Reviews
(0)
---------
One
Two
Three
Four
Five
Zero
Send
It's empty now. Be the first (o゚v゚)ノ
Reviews
(0)