From 13d63245ed20e847d84ce7201a20180e2ad17733 Mon Sep 17 00:00:00 2001 From: Jurn Wubben Date: Sun, 22 Jun 2025 19:42:20 +0200 Subject: [PATCH] Updated forms. Added scrapers. Updated edit page for scrapers. Made new wishlist look better and added warning to save uuids somewhere safe. Updated nix flake. --- app/configuration.py | 1 + app/forms.py | 12 +- app/scrapers.py | 162 +++++++++++++++++++++++++++ app/templates/components.html | 5 + app/templates/edit.html | 205 +++++++++++++++++++--------------- app/templates/footer.html | 2 + app/templates/header.html | 11 ++ app/templates/index.html | 0 app/templates/new.html | 25 +++-- app/templates/post_new.html | 14 +++ app/templates/view.html | 4 + app/views.py | 33 +++++- flake.nix | 2 +- instance/application.db | Bin 12288 -> 12288 bytes 14 files changed, 367 insertions(+), 109 deletions(-) create mode 100644 app/scrapers.py create mode 100644 app/templates/components.html create mode 100644 app/templates/footer.html create mode 100644 app/templates/header.html create mode 100644 app/templates/index.html create mode 100644 app/templates/post_new.html diff --git a/app/configuration.py b/app/configuration.py index 092b4a8..150124b 100644 --- a/app/configuration.py +++ b/app/configuration.py @@ -14,5 +14,6 @@ class Config(object): class DevelopmentConfig(Config): DEBUG = True + class TestingConfig(Config): TESTING = True diff --git a/app/forms.py b/app/forms.py index 5cf3bcd..f8c70dd 100644 --- a/app/forms.py +++ b/app/forms.py @@ -3,19 +3,19 @@ from flask_wtf import FlaskForm from wtforms import ( StringField, SubmitField, - IntegerField, HiddenField, FloatField, URLField, + TextAreaField, ) from wtforms.validators import DataRequired class NewWishlist(FlaskForm): - title = StringField("Title:", validators=[DataRequired()]) - description = StringField("Description:", validators=[DataRequired()]) - submit = SubmitField("Submit") + title = StringField("Title", validators=[DataRequired()]) + description = TextAreaField("Description", validators=[DataRequired()]) + submit = SubmitField("Create") # Each submit needs a different page fot it to work on the same page. @@ -25,7 +25,7 @@ class DeleteWishlist(FlaskForm): class EditWishlistInfo(FlaskForm): title = StringField("Title", validators=[DataRequired()]) - description = StringField("Description", validators=[DataRequired()]) + description = TextAreaField("Description", validators=[DataRequired()]) wl_edit_submit = SubmitField("Submit") @@ -34,7 +34,7 @@ class ResetWishlistUrls(FlaskForm): class NewItem(FlaskForm): - title = StringField("Title", validators=[DataRequired()]) + it_new_title = StringField("Title", validators=[DataRequired()]) description = StringField("Description", validators=[DataRequired()]) price = FloatField("Price", validators=[DataRequired()]) url = URLField("Url", validators=[DataRequired()]) diff --git a/app/scrapers.py b/app/scrapers.py new file mode 100644 index 0000000..ca99499 --- /dev/null +++ b/app/scrapers.py @@ -0,0 +1,162 @@ +from abc import ABC, abstractmethod +from typing import Callable, override +from bs4 import BeautifulSoup +from requests import get +from re import findall, match, search + +noReturnLambda: Callable[[str], str] = lambda x: x + + +class ScrapeError(Exception): + pass + + +class ScraperResult: + def __init__(self, name: str, price: float, image: str): + self.name = name + self.price = price + self.image = image + + @override + def __repr__(self) -> str: + return ( + f"" + ) + + name: str + price: float + image: str + + +class ScraperLike(ABC): + name: str + urlRegex: str + + @abstractmethod + def scrape(self, url: str) -> ScraperResult: + pass + + +class GenericScraper(ScraperLike): + name: str + urlRegex: str + _nameQuery: str + _priceQuery: str + _imageQuery: str + priceParser: Callable[[str], str] + imageParser: Callable[[str], str] + + def __init__( + self, + name: str, + baseUrl: str, + nameQuery: str, + priceQuery: str, + imageQuery: str, + priceParser: Callable[[str], str] = noReturnLambda, + imageParser: Callable[[str], str] = noReturnLambda, + ): + self.name = name + self.urlRegex = baseUrl + self._nameQuery = nameQuery + self._priceQuery = priceQuery + self._imageQuery = imageQuery + self.priceParser = priceParser + self.imageParser = imageParser + + @override + def scrape(self, url: str) -> ScraperResult: + res = get( + url, + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0" + }, + ) + if res.status_code != 200: + raise ScrapeError("Failed to fetch page.") + + soup = BeautifulSoup(res.text, features="html.parser") + name = soup.select_one(self._nameQuery) + price = soup.select_one(self._priceQuery) + image = soup.select_one(self._imageQuery) + + if name is None or price is None or image is None: + raise ScrapeError( + f"Failed to scrape site. Invalid webpage or queries: N:{name},P:{price},I:{image}" + ) + + name = name.text.strip() + image = image.get("src") + try: + x = self.priceParser(price.text) + reg = search(r"([0-9]+)(?:(?:\.|,)([0-9]+))?", x) + if not reg: + raise ValueError + x = reg.group(1) + + g2 = reg.group(2) + if g2: + x += "." + g2 + + price = float(x) + except ValueError: + print(price) + raise ScrapeError(f"Failed to scrape site. Error while parsing price.") + if not isinstance(image, str): + raise ScrapeError(f"Failed to scrape site. Error while parsing image.") + + return ScraperResult(name, price, self.imageParser(image)) + + +def scrapeSite(url: str) -> ScraperResult | None: + scraped: ScraperResult | None = None + + for i in scrapers: + if match(i.urlRegex, url) is None: + continue + + scraped = i.scrape(url) + + return scraped + + +scrapers = [ + GenericScraper( + "Amazon", + r"^https?:\/\/(www\.)?((amazon)|(amzn))\.\w*", + "#productTitle", + "#corePrice_feature_div > div:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(1)", + "#landingImage", + ), + GenericScraper( + "Bol.com", + r"^https?:\/\/(www\.)?bol.com", + ".page-heading > span:nth-child(1)", + ".promo-price", + "div.container-item:nth-child(2) > wsp-selected-item-image-zoom-modal-application:nth-child(1) > button:nth-child(2) > img:nth-child(1)", + priceParser=lambda x: x.replace("\n ", "."), + ), + GenericScraper( + "MediaMarkt", + r"^https?:\/\/(www\.)?mediamarkt.\w*", + "h1.sc-d571b66f-0", + ".sc-6db49389-0 > span:nth-child(2)", + "div.sc-hLBbgP:nth-child(2) > div:nth-child(3) > ul:nth-child(1) > li:nth-child(1) > div:nth-child(1) > div:nth-child(1) > button:nth-child(1) > img:nth-child(1)", + priceParser=lambda x: x.replace("€", ""), + ), + GenericScraper( + "Coolblue", + r"^https?:\/\/(www\.)?coolblue.\w*", + ".css-1o2kclk", + ".css-puih25 > span:nth-child(1)", + ".css-ptvba5", + ), + GenericScraper( + "Megekko", + r"^https?:\/\/(www\.)?megekko.nl", + "#prd_title", + "a.prsPrice:nth-child(1) > div:nth-child(1)", + "#prd_afbeeldingen > div:nth-child(1) > img:nth-child(1)", + imageParser=lambda x: f"https://www.megekko.nl/{x}", + ), +] diff --git a/app/templates/components.html b/app/templates/components.html new file mode 100644 index 0000000..c3a910c --- /dev/null +++ b/app/templates/components.html @@ -0,0 +1,5 @@ +{% macro mainCenter() %} +
+ {{ caller() }} +
+{% endmacro %} diff --git a/app/templates/edit.html b/app/templates/edit.html index 263a54e..61e8b90 100644 --- a/app/templates/edit.html +++ b/app/templates/edit.html @@ -1,98 +1,121 @@ {% set cpath = url_for("edit", id=wishlist.editId) %} +
+

Edit '{{wishlist.title}}'

+ Manage your wishlist details and items +
+
+ {{ form_wl_editinfo.hidden_tag() }} + {{ form_wl_editinfo.title.label }} + {{ form_wl_editinfo.title(placeholder=wishlist.title) }} + + {{ form_wl_editinfo.description.label }} + {{ form_wl_editinfo.description(placeholder=wishlist.description) }} + + {{ form_wl_editinfo.wl_edit_submit() }} +
+
+

Urls

+ +
+ {{ form_wl_reseturls.hidden_tag() }} + {{ form_wl_reseturls.wl_reset_submit() }} +
+
+

New item

+
+ {{ form_it_new.hidden_tag() }} + + {{ form_it_new.it_new_title.label }} + {{ form_it_new.it_new_title() }} + + {{ form_it_new.description.label }} + {{ form_it_new.description() }} + + {{ form_it_new.price.label }} + {{ form_it_new.price() }} + + {{ form_it_new.url.label }} + {{ form_it_new.url() }} + + {{ form_it_new.image.label }} + {{ form_it_new.image() }} + + {{ form_it_new.it_new_submit() }} + +
+
+

Delete items

+ {% if wishlist.items|length == 0 %}

No items yet

{% endif %} +
    + {% for value in wishlist.items %} +
  • +
    + {{ form_it_delete.csrf_token }} + {{ form_it_delete.index(value=loop.index) }} + {{ form_it_delete.it_del_submit() }} +
    + {{ value.title }} +
  • + {% endfor %} +
+
+

Delete wishlist

+
+ {{ form_wl_delete.hidden_tag() }} + {{ form_wl_delete.wl_del_submit() }} +
+ +
-

Metadata

-
- {{ form_wl_editinfo.hidden_tag() }} + diff --git a/app/templates/footer.html b/app/templates/footer.html new file mode 100644 index 0000000..b605728 --- /dev/null +++ b/app/templates/footer.html @@ -0,0 +1,2 @@ + + diff --git a/app/templates/header.html b/app/templates/header.html new file mode 100644 index 0000000..e3dce15 --- /dev/null +++ b/app/templates/header.html @@ -0,0 +1,11 @@ + + + + + + Wishthat + + + + + diff --git a/app/templates/index.html b/app/templates/index.html new file mode 100644 index 0000000..e69de29 diff --git a/app/templates/new.html b/app/templates/new.html index 1979102..54184df 100644 --- a/app/templates/new.html +++ b/app/templates/new.html @@ -1,11 +1,20 @@ - - {{ form.hidden_tag() }} +{% include 'header.html' %} - {{ form.title.label }} - {{ form.title() }} +
+ + {{ form.hidden_tag() }} - {{ form.description.label }} - {{ form.description() }} +

New wishlist

- {{ form.submit() }} - + {{ form.title.label.text }} + {{ form.title(class="w-full input validator mt-1 mb-4", placeholder="Wishlist Title") }} + + {{ form.description.label.text }} + {{ form.description(class="w-full textarea validator mt-1 mb-2", placeholder="Wishlist Description") }} +
Please make sure that both inputs are filled.
+ + {{ form.submit(class="btn btn-soft w-full") }} + +
+ +{% include 'footer.html' %} diff --git a/app/templates/post_new.html b/app/templates/post_new.html new file mode 100644 index 0000000..d8df650 --- /dev/null +++ b/app/templates/post_new.html @@ -0,0 +1,14 @@ +{% include 'header.html' %} + + + + + +{% include 'footer.html' %} diff --git a/app/templates/view.html b/app/templates/view.html index 5df40ec..742ebe1 100644 --- a/app/templates/view.html +++ b/app/templates/view.html @@ -1,3 +1,5 @@ +{% include 'header.html' %} +

{{wishlist.title}}

{{wishlist.description}} @@ -54,3 +56,5 @@ } + +{% include 'footer.html' %} diff --git a/app/views.py b/app/views.py index c168c6a..e1d55c8 100644 --- a/app/views.py +++ b/app/views.py @@ -1,4 +1,5 @@ -from flask import url_for, redirect, render_template, abort +import json +from flask import request, url_for, redirect, render_template, abort from app import app, db from app.forms import ( NewWishlist, @@ -12,6 +13,9 @@ from app.forms import ( ) from app.models import Wishlist, Item from uuid import UUID, uuid4 as uuid +from json import JSONEncoder + +from app.scrapers import scrapeSite @app.route("/") @@ -22,16 +26,25 @@ def index(): @app.route("/new", methods=["GET", "POST"]) def new(): form = NewWishlist() + if form.validate_on_submit(): wishlist = Wishlist(str(form.title.data), str(form.description.data)) db.session.add(wishlist) db.session.commit() - return redirect(url_for("view", id=wishlist.viewId)) + return redirect( + url_for("postNew", viewId=wishlist.viewId, editId=wishlist.editId) + ) return render_template("new.html", form=form) +@app.route("/post_new//") +def postNew(viewId: str, editId: str): + + return render_template("post_new.html", viewId=viewId, editId=editId) + + @app.route("/edit/", methods=["GET", "POST"]) def edit(id: str): wishlist: Wishlist = db.one_or_404( @@ -74,7 +87,7 @@ def edit(id: str): item = Item( str( - f.title.data, + f.it_new_title.data, ), str( f.description.data, @@ -118,6 +131,7 @@ def view(id: str): db.select(Wishlist).filter_by(viewId=UUID(id)), description="Failed to get wishlist. Are you sure this is the correct url?", ) + checkform = CheckItem() checkform.num if checkform.validate_on_submit(): @@ -131,3 +145,16 @@ def view(id: str): return redirect(url_for("view", id=id)) return render_template("view.html", wishlist=wishlist, form=checkform) + + +@app.route("/scrape", methods=["GET"]) +def scrape(): + url = request.args.get("url") + if url is None: + abort(400) + + scraped = scrapeSite(url) + if scraped is None: + abort(404) + + return json.dumps(scraped.__dict__) diff --git a/flake.nix b/flake.nix index 6fa056c..029325b 100644 --- a/flake.nix +++ b/flake.nix @@ -14,7 +14,7 @@ { nativeBuildInputs = [ (pkgs.python3.withPackages - (x: [x.flask x.flask-wtf x.wtforms x.flask-sqlalchemy])) + (x: [x.flask x.flask-wtf x.wtforms x.flask-sqlalchemy x.beautifulsoup4 x.types-beautifulsoup4 x.requests])) pkgs.entr ]; }; diff --git a/instance/application.db b/instance/application.db index f0c5b911dcbb9a98c52cca9331c38198a4695c9f..5e34ed5a3baa084af424db9310e9144f4cf2de15 100644 GIT binary patch delta 1636 zcmZojXh@hK%_uri#+gxcW5N=C86NI84E!tkZTMdCP2zLp{lGhm*M;XD&r}{8?l&6? z&v4iK@vt%Y`br9>n5HF~S(+GGB&VjBB%7yNSQ?sJ8mAbT85pIe7@Hd;TO=8oSR^MJ znG)px$uQxCwAVsct)Vw#1exuHc$N}7pTTB1d=rA3OFrGc4oa&n@vrG=@Xg=MOV zrJ+$`TB1pkL9&H~nPEzrQL4FtL29yri7AS~^&pc&I6)?>3K|0t_q*x{=r&^jABpN5DnkAd1rWmBACYxB8m|Iw;Bqyer7^EZ_o1~f>rh-h) zN-YArIU_TK17fO4vRPV+QEFmJijkQ~l7)Fva+;B;rGZ6~nT3I2eOjuesi8%pvAJoA znYp2fiKT&IqD6{HqLEpWk%>u?k*S#lnz8Hw3#4J|E9%uLeK43d&lO-u|; z6OGa=QVonuEmO^s49ra}%~LFmjZDld%QPc%3k%b<6w{PM^HfVCOAC`^6EjdU zF-x^DNlr7cFf=tcfI2oK)1MV$V48W7fr*7dnkgs@O-xcj-cCsbMV3*jrD45=VVa?# znVEU2fsv)Lv5B#%S(1sRNve^7g}F(xMQTzKEL1Zx!&ty3%3CBE8d;c`nV6(nn3*J} znph-Sm>QdyryNwhFbG)^-#vouIDNi#P}N=Y+M zF-u8k}63JK0A51P=?(8V3Hg{2Ba8 zJZm-<&f%$VDQ4keU~Cc%Wp7PwRODcZ%rDK&%t=)+GEfL`boN&8%}g%JR|rndNX$;n zNlYwKaCBBMbW|`jGF1puFb>GfQwR=l2~qI#SI}1Qa|SCc%F9eGQt(L4OG(X9@XgOn z%_~VPs#I_(D$UN$%P)W$>6)3BQk+~5H@7T5zevHcD5W$rFCT1aad~D*az<*gLP@?t zX>qDTT7Hp2ZlyvZM2Vh*(4qbqjjx9@N=gcft@QQF%ggl=a}%rb^Yrp^^r7C+MK~PH z1UpR^?j~K=dax^Cj?hmj(04L0k92YKF!wOFFGz_mN{KJGEi5vvtWrzP&nrpIE78qN zu}#da$}`j}uFTalGBh1X)HPuZtO|jHXGfgzJNKP>^O*KnWHFkt+ zNY+bEv^23Wur$<7G&4!jtv4|>NY%A4OEK3?HZ(I#PBb<)Ha1UED@X%bU0_R$`B240 zwnnBQf$j#5;V$N3j!wb8Q4t}Lk!ny+q}wKWMj9GJxka|7hNhMVX_ob-x@jgB2D&C@ zsRp`9smZCjrUn)ksY%A>=4NJwP}P~Xh(Lh_cdlM;YD#9JE;PuK^KVW_N delta 59 zcmZojXh@hK&B!@X#+i|GW5N=C4kmsZ2L6@&HX92ocsDleV4Q3te}aRFKZAjPEq?|? L@CoDO4E-4Z4^