shit and i'm lazy

This commit is contained in:
Jurn Wubben 2025-06-22 19:42:20 +02:00
parent 1f269afce5
commit 5dafe72895
14 changed files with 367 additions and 109 deletions

View file

@ -14,5 +14,6 @@ class Config(object):
class DevelopmentConfig(Config):
DEBUG = True
class TestingConfig(Config):
TESTING = True

View file

@ -3,19 +3,19 @@ from flask_wtf import FlaskForm
from wtforms import (
StringField,
SubmitField,
IntegerField,
HiddenField,
FloatField,
URLField,
TextAreaField,
)
from wtforms.validators import DataRequired
class NewWishlist(FlaskForm):
title = StringField("Title:", validators=[DataRequired()])
description = StringField("Description:", validators=[DataRequired()])
submit = SubmitField("Submit")
title = StringField("Title", validators=[DataRequired()])
description = TextAreaField("Description", validators=[DataRequired()])
submit = SubmitField("Create")
# Each submit needs a different page fot it to work on the same page.
@ -25,7 +25,7 @@ class DeleteWishlist(FlaskForm):
class EditWishlistInfo(FlaskForm):
title = StringField("Title", validators=[DataRequired()])
description = StringField("Description", validators=[DataRequired()])
description = TextAreaField("Description", validators=[DataRequired()])
wl_edit_submit = SubmitField("Submit")
@ -34,7 +34,7 @@ class ResetWishlistUrls(FlaskForm):
class NewItem(FlaskForm):
title = StringField("Title", validators=[DataRequired()])
it_new_title = StringField("Title", validators=[DataRequired()])
description = StringField("Description", validators=[DataRequired()])
price = FloatField("Price", validators=[DataRequired()])
url = URLField("Url", validators=[DataRequired()])

162
app/scrapers.py Normal file
View file

@ -0,0 +1,162 @@
from abc import ABC, abstractmethod
from typing import Callable, override
from bs4 import BeautifulSoup
from requests import get
from re import findall, match, search
noReturnLambda: Callable[[str], str] = lambda x: x
class ScrapeError(Exception):
pass
class ScraperResult:
def __init__(self, name: str, price: float, image: str):
self.name = name
self.price = price
self.image = image
@override
def __repr__(self) -> str:
return (
f"<ScraperResult name:{self.name} price:{ self.price } image:{self.image}>"
)
name: str
price: float
image: str
class ScraperLike(ABC):
name: str
urlRegex: str
@abstractmethod
def scrape(self, url: str) -> ScraperResult:
pass
class GenericScraper(ScraperLike):
name: str
urlRegex: str
_nameQuery: str
_priceQuery: str
_imageQuery: str
priceParser: Callable[[str], str]
imageParser: Callable[[str], str]
def __init__(
self,
name: str,
baseUrl: str,
nameQuery: str,
priceQuery: str,
imageQuery: str,
priceParser: Callable[[str], str] = noReturnLambda,
imageParser: Callable[[str], str] = noReturnLambda,
):
self.name = name
self.urlRegex = baseUrl
self._nameQuery = nameQuery
self._priceQuery = priceQuery
self._imageQuery = imageQuery
self.priceParser = priceParser
self.imageParser = imageParser
@override
def scrape(self, url: str) -> ScraperResult:
res = get(
url,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0"
},
)
if res.status_code != 200:
raise ScrapeError("Failed to fetch page.")
soup = BeautifulSoup(res.text, features="html.parser")
name = soup.select_one(self._nameQuery)
price = soup.select_one(self._priceQuery)
image = soup.select_one(self._imageQuery)
if name is None or price is None or image is None:
raise ScrapeError(
f"Failed to scrape site. Invalid webpage or queries: N:{name},P:{price},I:{image}"
)
name = name.text.strip()
image = image.get("src")
try:
x = self.priceParser(price.text)
reg = search(r"([0-9]+)(?:(?:\.|,)([0-9]+))?", x)
if not reg:
raise ValueError
x = reg.group(1)
g2 = reg.group(2)
if g2:
x += "." + g2
price = float(x)
except ValueError:
print(price)
raise ScrapeError(f"Failed to scrape site. Error while parsing price.")
if not isinstance(image, str):
raise ScrapeError(f"Failed to scrape site. Error while parsing image.")
return ScraperResult(name, price, self.imageParser(image))
def scrapeSite(url: str) -> ScraperResult | None:
scraped: ScraperResult | None = None
for i in scrapers:
if match(i.urlRegex, url) is None:
continue
scraped = i.scrape(url)
return scraped
scrapers = [
GenericScraper(
"Amazon",
r"^https?:\/\/(www\.)?((amazon)|(amzn))\.\w*",
"#productTitle",
"#corePrice_feature_div > div:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(1)",
"#landingImage",
),
GenericScraper(
"Bol.com",
r"^https?:\/\/(www\.)?bol.com",
".page-heading > span:nth-child(1)",
".promo-price",
"div.container-item:nth-child(2) > wsp-selected-item-image-zoom-modal-application:nth-child(1) > button:nth-child(2) > img:nth-child(1)",
priceParser=lambda x: x.replace("\n ", "."),
),
GenericScraper(
"MediaMarkt",
r"^https?:\/\/(www\.)?mediamarkt.\w*",
"h1.sc-d571b66f-0",
".sc-6db49389-0 > span:nth-child(2)",
"div.sc-hLBbgP:nth-child(2) > div:nth-child(3) > ul:nth-child(1) > li:nth-child(1) > div:nth-child(1) > div:nth-child(1) > button:nth-child(1) > img:nth-child(1)",
priceParser=lambda x: x.replace("", ""),
),
GenericScraper(
"Coolblue",
r"^https?:\/\/(www\.)?coolblue.\w*",
".css-1o2kclk",
".css-puih25 > span:nth-child(1)",
".css-ptvba5",
),
GenericScraper(
"Megekko",
r"^https?:\/\/(www\.)?megekko.nl",
"#prd_title",
"a.prsPrice:nth-child(1) > div:nth-child(1)",
"#prd_afbeeldingen > div:nth-child(1) > img:nth-child(1)",
imageParser=lambda x: f"https://www.megekko.nl/{x}",
),
]

View file

@ -0,0 +1,5 @@
{% macro mainCenter() %}
<main class="w-full h-screen flex justify-center items-center">
{{ caller() }}
</main>
{% endmacro %}

View file

@ -1,73 +1,59 @@
{% set cpath = url_for("edit", id=wishlist.editId) %}
<h1>Metadata</h1>
<form action="{{ cpath }}" method="POST">
<main>
<h1>Edit '{{wishlist.title}}'</h1>
<sub>Manage your wishlist details and items</sub>
<br>
<form action="{{ cpath }}" method="POST">
{{ form_wl_editinfo.hidden_tag() }}
{{ form_wl_editinfo.title.label }}
{{ form_wl_editinfo.title(placeholder=wishlist.title) }}
<!-- <br> -->
{{ form_wl_editinfo.description.label }}
{{ form_wl_editinfo.description(placeholder=wishlist.description) }}
<!-- <br> -->
{{ form_wl_editinfo.wl_edit_submit() }}
</form>
<br>
<h1>Urls</h1>
<ul>
</form>
<br>
<h1>Urls</h1>
<ul>
<li>
View: <a href={{ url_for("view", id=wishlist.viewId) }}>{{ wishlist.viewId }}</a>
</li>
<li>
Edit: <a href={{ url_for("edit", id=wishlist.editId) }}>{{ wishlist.editId }}</a>
</li>
</ul>
<form action="{{ cpath }}" method="POST">
</ul>
<form action="{{ cpath }}" method="POST">
{{ form_wl_reseturls.hidden_tag() }}
{{ form_wl_reseturls.wl_reset_submit() }}
</form>
<br>
<h1>New item</h1>
<form action="{{ cpath }}" method="POST">
</form>
<br>
<h1>New item</h1>
<form action="{{ cpath }}" method="POST">
{{ form_it_new.hidden_tag() }}
<!-- <br> -->
{{ form_it_new.title.label }}
{{ form_it_new.title() }}
{{ form_it_new.it_new_title.label }}
{{ form_it_new.it_new_title() }}
<!-- <br> -->
{{ form_it_new.description.label }}
{{ form_it_new.description() }}
<!-- <br> -->
{{ form_it_new.price.label }}
{{ form_it_new.price() }}
<!-- <br> -->
{{ form_it_new.url.label }}
{{ form_it_new.url() }}
<!-- <br> -->
{{ form_it_new.image.label }}
{{ form_it_new.image() }}
<!-- <br> -->
{{ form_it_new.it_new_submit() }}
</form>
<br>
<h1>Delete items</h1>
{% if wishlist.items|length == 0 %}
<p>No items yet</p>
{% endif %}
<ul>
<button id="scrape">Scrape</button>
</form>
<br>
<h1>Delete items</h1>
{% if wishlist.items|length == 0 %}<p>No items yet</p>{% endif %}
<ul>
{% for value in wishlist.items %}
<li>
<form action="{{ cpath }}" method="POST">
@ -78,16 +64,14 @@
{{ value.title }}
</li>
{% endfor %}
</ul>
<br>
<h1>Delete wishlist</h1>
<form action="{{ cpath }}" method="POST">
</ul>
<br>
<h1>Delete wishlist</h1>
<form action="{{ cpath }}" method="POST">
{{ form_wl_delete.hidden_tag() }}
{{ form_wl_delete.wl_del_submit() }}
</form>
<style>
</form>
<style>
form {
display:grid;
grid-template-columns: max-content max-content;
@ -95,4 +79,43 @@
}
form label { text-align:right; }
form label:after { content: ":"; }
</style>
</style>
</main>
<script>
const $q = (...i) => document.querySelector(...i);
const title = $q("#it_new_title")
const price = $q("#price")
const url = $q("#url")
const image = $q("#image")
// const description = $q("#description")
$q("#scrape").addEventListener("click", async e => {
e.preventDefault()
const tUrl = url.value.trim();
if (!tUrl) {
alert("Please provide a valid url.") //TODO: Replace with daisyui modal
return
}
const res = await fetch(
"/scrape?" + new URLSearchParams({
url: tUrl
}).toString(),
{
method: "get",
}
)
if (res.status !== 200) {
alert("Failed to scrape site.")
return
}
const json = await res.json()
title.value = json.name;
image.value = json.image;
price.value = json.price;
})
</script>

View file

@ -0,0 +1,2 @@
</body>
</html>

11
app/templates/header.html Normal file
View file

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Wishthat</title>
<link href="https://cdn.jsdelivr.net/npm/daisyui@5" rel="stylesheet" type="text/css" />
<script src="https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4"></script>
</head>
<body class="m-0 p-0 w-full h-screen bg-base-200">

0
app/templates/index.html Normal file
View file

View file

@ -1,11 +1,20 @@
<form action="{{ url_for("new") }}" method="POST">
{% include 'header.html' %}
<main class="w-full h-screen flex justify-center items-end md:items-center">
<form action="{{ url_for("new") }}" method="POST" class="border-2 border-base-300 w-full md:w-md p-6 m-4 rounded-lg shadow-lg bg-base-100">
{{ form.hidden_tag() }}
{{ form.title.label }}
{{ form.title() }}
<h1 class="text-3xl font-semibold text-center text-info-content mb-4">New wishlist</h1>
{{ form.description.label }}
{{ form.description() }}
<legend class="fieldset-legend">{{ form.title.label.text }}</legend>
{{ form.title(class="w-full input validator mt-1 mb-4", placeholder="Wishlist Title") }}
{{ form.submit() }}
</form>
<legend class="fieldset-legend">{{ form.description.label.text }}</legend>
{{ form.description(class="w-full textarea validator mt-1 mb-2", placeholder="Wishlist Description") }}
<div class="validator-hint">Please make sure that both inputs are filled.</div>
{{ form.submit(class="btn btn-soft w-full") }}
</form>
</main>
{% include 'footer.html' %}

View file

@ -0,0 +1,14 @@
{% include 'header.html' %}
<dialog class="modal modal-bottom md:modal-middle" open>
<div class="modal-box">
<h3 class="text-lg font-bold">Before you continue</h3>
<p class="py-4">Wishthat operates using two distinct passcodes: one for viewing the wishlist and another for editing it. Think of it as a combination of a username and password. Please navigate to the edit page and <b>bookmark the URL</b>, as you will <b>not be able to edit your wishlist if you lose this passcode</b>. You can also find the passcode for viewing the wishlist on the edit page.</p>
<div class="modal-action">
<a class="btn" href="/view/{{viewId}}">View</a>
<a class="btn btn-primary" href="/edit/{{editId}}">Edit</a>
</div>
</div>
</dialog>
{% include 'footer.html' %}

View file

@ -1,3 +1,5 @@
{% include 'header.html' %}
<h1>{{wishlist.title}}</h1>
<sub>{{wishlist.description}}</sub>
@ -54,3 +56,5 @@
}
</script>
{% include 'footer.html' %}

View file

@ -1,4 +1,5 @@
from flask import url_for, redirect, render_template, abort
import json
from flask import request, url_for, redirect, render_template, abort
from app import app, db
from app.forms import (
NewWishlist,
@ -12,6 +13,9 @@ from app.forms import (
)
from app.models import Wishlist, Item
from uuid import UUID, uuid4 as uuid
from json import JSONEncoder
from app.scrapers import scrapeSite
@app.route("/")
@ -22,16 +26,25 @@ def index():
@app.route("/new", methods=["GET", "POST"])
def new():
form = NewWishlist()
if form.validate_on_submit():
wishlist = Wishlist(str(form.title.data), str(form.description.data))
db.session.add(wishlist)
db.session.commit()
return redirect(url_for("view", id=wishlist.viewId))
return redirect(
url_for("postNew", viewId=wishlist.viewId, editId=wishlist.editId)
)
return render_template("new.html", form=form)
@app.route("/post_new/<viewId>/<editId>")
def postNew(viewId: str, editId: str):
return render_template("post_new.html", viewId=viewId, editId=editId)
@app.route("/edit/<id>", methods=["GET", "POST"])
def edit(id: str):
wishlist: Wishlist = db.one_or_404(
@ -74,7 +87,7 @@ def edit(id: str):
item = Item(
str(
f.title.data,
f.it_new_title.data,
),
str(
f.description.data,
@ -118,6 +131,7 @@ def view(id: str):
db.select(Wishlist).filter_by(viewId=UUID(id)),
description="Failed to get wishlist. Are you sure this is the correct url?",
)
checkform = CheckItem()
checkform.num
if checkform.validate_on_submit():
@ -131,3 +145,16 @@ def view(id: str):
return redirect(url_for("view", id=id))
return render_template("view.html", wishlist=wishlist, form=checkform)
@app.route("/scrape", methods=["GET"])
def scrape():
url = request.args.get("url")
if url is None:
abort(400)
scraped = scrapeSite(url)
if scraped is None:
abort(404)
return json.dumps(scraped.__dict__)

View file

@ -14,7 +14,7 @@
{
nativeBuildInputs = [
(pkgs.python3.withPackages
(x: [x.flask x.flask-wtf x.wtforms x.flask-sqlalchemy]))
(x: [x.flask x.flask-wtf x.wtforms x.flask-sqlalchemy x.beautifulsoup4 x.types-beautifulsoup4 x.requests]))
pkgs.entr
];
};

Binary file not shown.