Skip to content

Commit

Permalink
Add AnalyzedAd model to track ads
Browse files Browse the repository at this point in the history
This is a match of the AnalyzedURL,
but tracking ads against a specific advertiser.

This is the first step in doing targeting based on embedding data,
is to keep track of analyzed ad data.
  • Loading branch information
ericholscher committed Mar 14, 2024
1 parent 3202a68 commit 8aef840
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 35 deletions.
22 changes: 22 additions & 0 deletions adserver/analyzer/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from django.contrib import admin
from simple_history.admin import SimpleHistoryAdmin

from .models import AnalyzedAd
from .models import AnalyzedUrl


Expand All @@ -24,3 +25,24 @@ class AnalyzedUrlAdmin(SimpleHistoryAdmin):
search_fields = ("url", "keywords")

# Note: may need to use the estimated count paginator if this gets large


@admin.register(AnalyzedAd)
class AnalyzedAdAdmin(SimpleHistoryAdmin):

"""Django admin configuration for analyzed ads."""

list_display = (
"url",
"advertiser",
"keywords",
"last_analyzed_date",
"visits_since_last_analyzed",
)
list_per_page = 500
list_filter = ("last_analyzed_date", "last_ad_served_date", "advertiser")
list_select_related = ("advertiser",)
raw_id_fields = ("advertiser",)
search_fields = ("url", "keywords")

# Note: may need to use the estimated count paginator if this gets large
102 changes: 102 additions & 0 deletions adserver/analyzer/migrations/0006_add_analyzedad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Generated by Django 4.2.11 on 2024-03-14 21:03
import django.db.models.deletion
import django_extensions.db.fields
import jsonfield.fields
from django.db import migrations
from django.db import models

import adserver.analyzer.validators


class Migration(migrations.Migration):

dependencies = [
("adserver", "0093_publisher_ignore_mobile_traffic"),
("adserver_analyzer", "0005_remove_embedding"),
]

operations = [
migrations.CreateModel(
name="AnalyzedAd",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"created",
django_extensions.db.fields.CreationDateTimeField(
auto_now_add=True, verbose_name="created"
),
),
(
"modified",
django_extensions.db.fields.ModificationDateTimeField(
auto_now=True, verbose_name="modified"
),
),
(
"url",
models.URLField(
db_index=True,
help_text="URL of the page being analyzed after certain query parameters are stripped away",
max_length=1024,
),
),
(
"keywords",
jsonfield.fields.JSONField(
blank=True,
null=True,
validators=[adserver.analyzer.validators.KeywordsValidator()],
verbose_name="Keywords for this URL",
),
),
(
"last_analyzed_date",
models.DateTimeField(
blank=True,
db_index=True,
default=None,
help_text="Last time the ad server analyzed this URL",
null=True,
),
),
(
"last_ad_served_date",
models.DateField(
blank=True,
default=None,
help_text="Last date an ad was served for this URL",
null=True,
),
),
(
"visits_since_last_analyzed",
models.PositiveIntegerField(
default=0,
help_text="Number of times ads have been served for this URL since it was last analyzed",
),
),
(
"ad",
models.ForeignKey(
help_text="Ad with the URL",
on_delete=django.db.models.deletion.CASCADE,
to="adserver.advertisement",
),
),
],
options={
"unique_together": {("url", "ad")},
},
),
migrations.DeleteModel(
name="HistoricalAnalyzedUrl",
),
]
22 changes: 22 additions & 0 deletions adserver/analyzer/migrations/0007_rename_ad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.11 on 2024-03-14 21:27
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("adserver", "0093_publisher_ignore_mobile_traffic"),
("adserver_analyzer", "0006_add_analyzedad"),
]

operations = [
migrations.RenameField(
model_name="analyzedad",
old_name="ad",
new_name="advertisement",
),
migrations.AlterUniqueTogether(
name="analyzedad",
unique_together={("url", "advertisement")},
),
]
38 changes: 38 additions & 0 deletions adserver/analyzer/migrations/0008_rename_ad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 4.2.11 on 2024-03-14 21:45
import django.db.models.deletion
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):

dependencies = [
("adserver", "0093_publisher_ignore_mobile_traffic"),
("adserver_analyzer", "0007_rename_ad"),
]

operations = [
migrations.AlterUniqueTogether(
name="analyzedad",
unique_together=set(),
),
migrations.AddField(
model_name="analyzedad",
name="advertiser",
field=models.ForeignKey(
default=1,
help_text="Advertiser with the URL",
on_delete=django.db.models.deletion.CASCADE,
to="adserver.advertiser",
),
preserve_default=False,
),
migrations.AlterUniqueTogether(
name="analyzedad",
unique_together={("url", "advertiser")},
),
migrations.RemoveField(
model_name="analyzedad",
name="advertisement",
),
]
38 changes: 28 additions & 10 deletions adserver/analyzer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@
from django.utils.translation import gettext_lazy as _
from django_extensions.db.models import TimeStampedModel
from jsonfield import JSONField
from pgvector.django import VectorField
from simple_history.models import HistoricalRecords

from ..models import Advertiser
from ..models import Publisher
from .validators import KeywordsValidator


class AnalyzedUrl(TimeStampedModel):

"""Analyzed keywords for a given URL."""
class BaseAnalyzedUrl(TimeStampedModel):

url = models.URLField(
db_index=True,
Expand All @@ -22,12 +20,6 @@ class AnalyzedUrl(TimeStampedModel):
),
)

publisher = models.ForeignKey(
Publisher,
help_text=_("Publisher where this URL appears"),
on_delete=models.CASCADE,
)

# Fields below are updated by the analyzer
keywords = JSONField(
_("Keywords for this URL"),
Expand Down Expand Up @@ -66,5 +58,31 @@ def save(self, *args, **kwargs):
self.full_clean()
return super().save(*args, **kwargs)

class Meta:
abstract = True


class AnalyzedUrl(BaseAnalyzedUrl):
"""Analyzed keywords for a given URL."""

publisher = models.ForeignKey(
Publisher,
help_text=_("Publisher where this URL appears"),
on_delete=models.CASCADE,
)

class Meta:
unique_together = ("url", "publisher")


class AnalyzedAd(BaseAnalyzedUrl):
"""Analyzed keywords for a given URL."""

advertiser = models.ForeignKey(
Advertiser,
help_text=_("Advertiser with the URL"),
on_delete=models.CASCADE,
)

class Meta:
unique_together = ("url", "advertiser")
46 changes: 23 additions & 23 deletions adserver/analyzer/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,28 +66,22 @@ def analyze_url(url, publisher_slug, force=False):
response = backend_instance.fetch()

analyzed_keywords = backend_instance.analyze(response) # Can be None
log.debug("Keywords from '%s': %s", backend.__name__, analyzed_keywords)
if analyzed_keywords:
log.debug("Keywords from '%s': %s", backend.__name__, analyzed_keywords)

for kw in analyzed_keywords:
keywords.add(kw)

analyzed_embedding = backend_instance.embedding(response) # Can be None
if analyzed_embedding:
log.debug(
"Embedding from '%s': %s", backend.__name__, len(analyzed_embedding)
)

if analyzed_keywords:
for kw in analyzed_keywords:
keywords.add(kw)

if analyzed_embedding:
embeddings.append(analyzed_embedding)
model = getattr(backend_instance, "MODEL_NAME", None)
embeddings.append([analyzed_embedding, model])

log.debug("Keywords found : %s", keywords)

if len(embeddings) > 1:
log.warning("Multiple embeddings found for URL: %s", normalized_url)

embedding = embeddings[0] if embeddings else None

keywords = list(keywords)
url_obj, created = AnalyzedUrl.objects.get_or_create(
url=normalized_url,
Expand All @@ -105,16 +99,22 @@ def analyze_url(url, publisher_slug, force=False):
url_obj.save()

if "ethicalads_ext" in settings.INSTALLED_APPS:
embedding_obj, embedding_created = Embedding.objects.get_or_create(
url=url_obj,
model="v1",
defaults={
"embedding": embedding,
},
)
if not embedding_created:
embedding_obj.embedding = embedding
embedding_obj.save()

if len(embeddings) > 1:
log.warning("Multiple embeddings found for URL: %s", normalized_url)

if embeddings:
embedding, model = embeddings[0]
embedding_obj, embedding_created = Embedding.objects.get_or_create(
url=url_obj,
model=model,
defaults={
"vector": embedding,
},
)
if not embedding_created:
embedding_obj.vector = embedding
embedding_obj.save()


@app.task
Expand Down
4 changes: 2 additions & 2 deletions adserver/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
router.register(r"publishers", PublisherViewSet, basename="publishers")

if "ethicalads_ext.embedding" in settings.INSTALLED_APPS:
from ethicalads_ext.embedding.views import EmbeddingViewSet
from ethicalads_ext.embedding import urls as embedding_urls

urlpatterns += [path(r"similar/", EmbeddingViewSet.as_view(), name="similar")]
urlpatterns += embedding_urls.urlpatterns


urlpatterns += router.urls
4 changes: 4 additions & 0 deletions adserver/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1644,6 +1644,10 @@ def get_absolute_url(self):
},
)

@property
def advertiser(self):
return self.flight.campaign.advertiser

def incr(self, impression_type, publisher):
"""
Add to the number of times this action has been performed, stored in the DB.
Expand Down

0 comments on commit 8aef840

Please sign in to comment.