Hier werden die Unterschiede zwischen zwei Versionen angezeigt.
Beide Seiten der vorigen Revision Vorhergehende Überarbeitung Nächste Überarbeitung | Vorhergehende Überarbeitung | ||
elasticsearch [2025/06/30 11:35] jango [Service] |
elasticsearch [2025/06/30 20:38] (aktuell) jango [Python] |
||
---|---|---|---|
Zeile 31: | Zeile 31: | ||
====Linux==== | ====Linux==== | ||
+ | |||
+ | < | ||
+ | wget https:// | ||
+ | sudo dpkg -i elasticsearch-7.10.0-amd64.deb | ||
+ | sudo apt --fix-broken install | ||
+ | </ | ||
+ | |||
< | < | ||
wget -qO - https:// | wget -qO - https:// | ||
Zeile 38: | Zeile 45: | ||
</ | </ | ||
- | sudo nano / | + | < |
+ | # sudo nano / | ||
+ | cluster.name: | ||
+ | network.host: | ||
+ | http.port: 9200 | ||
+ | </ | ||
+ | < | ||
+ | sudo systemctl start elasticsearch | ||
+ | sudo systemctl enable elasticsearch | ||
+ | curl -X GET " | ||
+ | </ | ||
+ | |||
+ | ====Docker==== | ||
+ | |||
+ | < | ||
+ | version: ' | ||
+ | services: | ||
+ | elasticsearch: | ||
+ | image: docker.elastic.co/ | ||
+ | container_name: | ||
+ | environment: | ||
+ | - discovery.type=single-node | ||
+ | - ES_JAVA_OPTS=-Xms512m -Xmx512m | ||
+ | - ELASTIC_PASSWORD=dein_sicheres_passwort | ||
+ | ports: | ||
+ | - 9200:9200 | ||
+ | - 9300:9300 | ||
+ | volumes: | ||
+ | - esdata:/ | ||
+ | networks: | ||
+ | - es-net | ||
+ | | ||
+ | volumes: | ||
+ | esdata: | ||
+ | driver: local | ||
+ | | ||
+ | networks: | ||
+ | es-net: | ||
+ | driver: bridge | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | sudo curl -X GET " | ||
+ | </ | ||
=====Service===== | =====Service===== | ||
Zeile 144: | Zeile 194: | ||
< | < | ||
#add user | #add user | ||
- | curl -X POST "https:// | + | curl -X POST "http:// |
#delete user | #delete user | ||
Zeile 150: | Zeile 200: | ||
# create index | # create index | ||
- | curl -X PUT "https:// | + | curl -X PUT "http:// |
#list indices | #list indices | ||
- | curl -X GET "https:// | + | curl -X GET "http:// |
#delete index | #delete index | ||
- | curl -X DELETE "https:// | + | curl -X DELETE "http:// |
#add entry | #add entry | ||
- | curl -X POST "https:// | + | curl -X POST "http:// |
#search entry | #search entry | ||
- | curl -X GET "https:// | + | curl -X GET "http:// |
- | curl -X GET "https:// | + | curl -X GET "http:// |
# As Text | # As Text | ||
- | curl -X GET "https:// | + | curl -X GET "http:// |
# As JSON | # As JSON | ||
- | curl -X GET "https:// | + | curl -X GET "http:// |
# For LIKE the field must be a " | # For LIKE the field must be a " | ||
Zeile 179: | Zeile 229: | ||
=====Python===== | =====Python===== | ||
+ | |||
+ | <code python> | ||
+ | import logging | ||
+ | from urllib.parse import urljoin | ||
+ | import requests | ||
+ | from bs4 import BeautifulSoup | ||
+ | |||
+ | logging.basicConfig( | ||
+ | format=' | ||
+ | level=logging.INFO) | ||
+ | |||
+ | class Crawler: | ||
+ | |||
+ | def __init__(self, | ||
+ | self.visited_urls = [] | ||
+ | self.urls_to_visit = urls | ||
+ | |||
+ | def download_url(self, | ||
+ | return requests.get(url).text | ||
+ | |||
+ | def get_linked_urls(self, | ||
+ | soup = BeautifulSoup(html, | ||
+ | for link in soup.find_all(' | ||
+ | path = link.get(' | ||
+ | if path and path.startswith('/' | ||
+ | path = urljoin(url, | ||
+ | yield path | ||
+ | |||
+ | def add_url_to_visit(self, | ||
+ | if url not in self.visited_urls and url not in self.urls_to_visit: | ||
+ | self.urls_to_visit.append(url) | ||
+ | |||
+ | def crawl(self, url): | ||
+ | html = self.download_url(url) | ||
+ | for url in self.get_linked_urls(url, | ||
+ | self.add_url_to_visit(url) | ||
+ | |||
+ | def run(self): | ||
+ | while self.urls_to_visit: | ||
+ | url = self.urls_to_visit.pop(0) | ||
+ | logging.info(f' | ||
+ | try: | ||
+ | self.crawl(url) | ||
+ | except Exception: | ||
+ | logging.exception(f' | ||
+ | finally: | ||
+ | self.visited_urls.append(url) | ||
+ | |||
+ | if __name__ == ' | ||
+ | Crawler(urls=[' | ||
+ | </ | ||
+ | |||
+ | |||
+ | ====Create Index==== | ||
+ | <code python> | ||
+ | import os | ||
+ | from elasticsearch import Elasticsearch | ||
+ | import urllib3 | ||
+ | |||
+ | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||
+ | |||
+ | es = Elasticsearch( | ||
+ | [' | ||
+ | basic_auth=(" | ||
+ | verify_certs=False | ||
+ | ) | ||
+ | |||
+ | index_name = " | ||
+ | |||
+ | if not es.indices.exists(index=index_name): | ||
+ | es.indices.create(index=index_name, | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | } | ||
+ | } | ||
+ | }) | ||
+ | print(f" | ||
+ | else: | ||
+ | print(f" | ||
+ | </ | ||
+ | |||
+ | ====Insert Data==== | ||
+ | <code python> | ||
+ | import os | ||
+ | from elasticsearch import Elasticsearch | ||
+ | from elasticsearch.helpers import streaming_bulk | ||
+ | import urllib3 | ||
+ | |||
+ | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||
+ | |||
+ | es = Elasticsearch( | ||
+ | [' | ||
+ | basic_auth=(" | ||
+ | verify_certs=False | ||
+ | ) | ||
+ | |||
+ | index_name = " | ||
+ | |||
+ | # Testeinträge als Dokumente | ||
+ | documents = [ | ||
+ | {" | ||
+ | {" | ||
+ | ] | ||
+ | |||
+ | # Dokumente in den Index einfügen | ||
+ | for doc in documents: | ||
+ | es.index(index=index_name, | ||
+ | print(f" | ||
+ | |||
+ | print(" | ||
+ | </ | ||
+ | |||
+ | ====Query Data==== | ||
+ | <code python> | ||
+ | from elasticsearch import Elasticsearch | ||
+ | import urllib3 | ||
+ | |||
+ | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||
+ | |||
+ | es = Elasticsearch( | ||
+ | [' | ||
+ | basic_auth=(" | ||
+ | verify_certs=False | ||
+ | ) | ||
+ | |||
+ | data = { | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | } | ||
+ | }, | ||
+ | " | ||
+ | } | ||
+ | |||
+ | response = es.search(index=" | ||
+ | |||
+ | if ' | ||
+ | print(" | ||
+ | exit(1) | ||
+ | |||
+ | scroll_id = response[' | ||
+ | res_c = 0 | ||
+ | |||
+ | for hit in response[' | ||
+ | res_c += 1 | ||
+ | print(" | ||
+ | |||
+ | while True: | ||
+ | response = es.scroll(body={" | ||
+ | print(" | ||
+ | if ' | ||
+ | break | ||
+ | |||
+ | for hit in response[' | ||
+ | res_c += 1 | ||
+ | print(" | ||
+ | |||
+ | es.clear_scroll(body={" | ||
+ | |||
+ | print(" | ||
+ | </ | ||
+ | |||
+ | ====Delete Index==== | ||
+ | |||
+ | <code python> | ||
+ | import csv | ||
+ | import glob | ||
+ | import os | ||
+ | from elasticsearch import Elasticsearch | ||
+ | import urllib3 | ||
+ | |||
+ | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||
+ | |||
+ | es = Elasticsearch( | ||
+ | [' | ||
+ | basic_auth=(" | ||
+ | verify_certs=False | ||
+ | ) | ||
+ | |||
+ | index_name = input(' | ||
+ | |||
+ | # Prüfen, ob der Index existiert und ihn löschen | ||
+ | if es.indices.exists(index=index_name): | ||
+ | es.indices.delete(index=index_name) | ||
+ | print(f" | ||
+ | else: | ||
+ | print(f" | ||
+ | </ | ||
====Scroll Query==== | ====Scroll Query==== |