I'm making a script to pull data from Google Analytics API v4. The script works fine. However, when validating the data by comparing GA with my fetched data I can see some discrepancies. Not too different but I don't understand why is not the same.
Just to mention that I'm using dynamic segments on my script which has the exact same condition as the segment I have in my GA view. The segment is just filtering spam traffic by only including traffic where session duration > 1sec.
Here is the structure I'm pulling:
body={
"reportRequests":[
{
"viewId": view_id,
"dimensions":[{"name": "ga:date"},{"name": "ga:sourceMedium"},{"name": "ga:campaign"},{"name": "ga:adContent"},{"name": "ga:channelGrouping"},{"name": "ga:segment"}],
"dateRanges":[
{
"startDate":"2018-12-16",
"endDate":"2018-12-20"
}],
"metrics":[{"expression":"ga:sessions","alias":"sessions"}],
"segments":[
{
"dynamicSegment":
{
"name": "sessions_no_spam",
"userSegment":
{
"segmentFilters":[
{
"simpleSegment":
{
"orFiltersForSegment":
{
"segmentFilterClauses": [
{
"metricFilter":
{
"metricName":"ga:sessionDuration",
"operator":"GREATER_THAN",
"comparisonValue":"1"
}
}]
}
}
}]
}
}
}]
}]
}).execute()
Not sure if the answer to my question will be more conceptual rather than technical but just in case I'm also including the function where I bulk the results in my database:
def print_results(no_spam_traffic):
connection = psycopg2.connect(database = 'web_insights_data', user = 'XXXX', password = 'XXXXX', host = 'XXX', port = 'XXXXX')
cursor = connection.cursor()
for report in no_spam_traffic.get('reports', []):
for row in report.get('data', {}).get('rows', []):
gadate = row['dimensions'][0]
gadate = gadate[0:4]+'/'+gadate[4:6]+'/'+gadate[6:8]
gasourcemedium = row['dimensions'][1]
gacampaign = row['dimensions'][2]
gaadcontent = row['dimensions'][3]
gachannel = row['dimensions'][4]
gasessions = row['metrics'][0]['values'][0]
cursor.execute("SELECT * from GA_no_spam_traffic where gadate = %s AND sourcemedium = %s AND campaign = %s AND adcontent = %s", (str(gadate),str(gasourcemedium),str(gacampaign),str(gaadcontent)))
if len(cursor.fetchall())>0: #update old entries
cursor.execute("UPDATE GA_no_spam_traffic set sessions = %s where gadate = %s AND sourcemedium = %s AND campaign = %s AND adcontent = %s", (str(gasessions),str(gadate),str(gasourcemedium),str(gacampaign),str(gaadcontent)))
connection.commit()
else: #Insert new rows
cursor.execute("INSERT INTO GA_no_spam_traffic (gadate,sourcemedium,campaign,adcontent,channel,sessions) VALUES (%s,%s,%s,%s,%s,%s)", (gadate,gasourcemedium,gacampaign,gaadcontent,gachannel,gasessions))
connection.commit()
connection.close()
Any ideas what the issue might be? Thanks!!