cursor使用方式介绍

# 使用cursor的技巧

总是在一个空的文件夹开启项目

表述需求时尽量明确但谨慎

让Cursor从项目一开始就写readme文档

让Cursor写代码时写清楚各个代码块的注释

用git做版本管理

使用command加i调出composer开始一个项目

使用composer和chat功能时多at codebase

常用的文档链接加入到docs中

善用setting里的rules for AI

注册账号后免费期限用完的解决方法

# 例子一：使用python爬虫网页数据

新建一个a.txt文件，表明需求

我的目标：
    爬取https://www.mianshiya.com/?category=operation
   将每个标题的题目和答案分别写入md文件中
工具要求：
   python，selenium，requests，bs4，os，time，chrome浏览器，操作已经打开的浏览器
步骤：
    1. 打开浏览器，进入https://www.mianshiya.com/?category=operation
    2. 获取每个标题的题目和答案
    3. 将题目和答案分别写入md文件中
    4. 关闭浏览器
你理解的有误，我需要的是爬取https://www.mianshiya.com/?category=operation
     1.获取每个类型的标题作为md文件的文件名，点击标题获取标题里面的题目问题输出到md文件中去
     1.1标题的元素是
     <a href="/bank/1791003439968264194"><div class="ant-card-meta"><div class="ant-card-meta-avatar"><span class="ant-avatar ant-avatar-lg ant-avatar-square css-m4timi"><span class="ant-avatar-string" style="transform: scale(1);"></span></span></div><div class="ant-card-meta-detail"><div class="ant-card-meta-title">MySQL 面试题</div><div class="ant-card-meta-description"><div class="ant-typography ant-typography-secondary ant-typography-ellipsis ant-typography-ellipsis-single-line css-m4timi" style="margin-bottom: 0px;">MySQL 是主流的开源关系型数据库管理系统，是后端开发者必备的技能。MySQL 面试的重点包括数据表设计、SQL 查询、索引优化、事务管理、锁机制、存储引擎（如 InnoDB）、高并发处理和性能调优等。</div></div></div></div></a>
    1.2题目的问题元素是
    <a target="_blank" href="/bank/1791003439968264194/question/1780933295526146049">MySQL 中的数据排序是怎么实现的？</a>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
import time
import os

def create_md_file(title, questions, folder="interview_questions"):
    if not os.path.exists(folder):
        os.makedirs(folder)

    # 清理文件名中的非法字符
    title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
    file_path = os.path.join(folder, f"{title}.md")

    # 写入内容
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(f"# {title}\n\n")
        for question in questions:
            f.write(f"## {question}\n\n")

def main():
    options = webdriver.ChromeOptions()
    options.add_experimental_option("debuggerAddress", "127.0.0.1:9223")
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    service = Service()

    try:
        driver = webdriver.Chrome(service=service, options=options)
    except Exception as e:
        print("\n=== Chrome 连接失败 ===")
        print("1. 请确保已经关闭所有 Chrome 进程:")
        print("   pkill -9 Chrome")
        print("\n2. 然后使用以下命令启动 Chrome:")
        print("   /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9223 --user-data-dir=/tmp/chrome-debug")
        print(f"\n错误详情: {str(e)}")
        return

    try:
        print("正在访问网站...")
        driver.get("https://www.mianshiya.com/?category=operation")

        print("等待页面加载...")
        time.sleep(3)

        # 获取所有分类的链接和标题
        categories = []
        category_elements = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located(
                (By.CSS_SELECTOR, "a[href^='/bank/']")
            )
        )

        # 先收集所有分类信息
        for element in category_elements:
            try:
                title = element.find_element(By.CSS_SELECTOR, ".ant-card-meta-title").text.strip()
                url = element.get_attribute('href')
                if title and url:
                    categories.append((title, url))
            except Exception as e:
                print(f"获取分类信息失败: {str(e)}")
                continue

        print(f"找到 {len(categories)} 个分类")

        # 处理每个分类
        for index, (category_title, category_url) in enumerate(categories, 1):
            try:
                print(f"\n[{index}/{len(categories)}] 正在处理分类: {category_title}")
                print(f"访问URL: {category_url}")

                # 访问分类页面
                driver.get(category_url)
                time.sleep(2)

                # 获取该分类下的所有题目
                questions = []
                question_elements = WebDriverWait(driver, 10).until(
                    EC.presence_of_all_elements_located(
                        (By.CSS_SELECTOR, "a[target='_blank'][href*='/question/']")
                    )
                )

                print(f"找到 {len(question_elements)} 个题目")

                for q in question_elements:
                    question_text = q.text.strip()
                    if question_text:
                        questions.append(question_text)

                # 创建该分类的 MD 文件
                if questions:
                    create_md_file(category_title, questions)
                    print(f"已保存 {category_title} 的 {len(questions)} 个题目")
                else:
                    print(f"警告: {category_title} 没有找到任何题目")

            except Exception as e:
                print(f"处理分类 {category_title} 时出错: {str(e)}")
                print(f"当前URL: {driver.current_url}")
                continue

        print("\n爬取完成！")

    except Exception as e:
        print(f"运行过程中出错: {str(e)}")
        print("当前页面 URL:", driver.current_url)
        try:
            print("页面源码:", driver.page_source[:1000])
        except:
            pass

    finally:
        pass

if __name__ == "__main__":
    main()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

最终爬取结果

76edb51cfa129843png

# 根据爬取的题目使用chatgpt生成答案

我想让你帮我生成一个脚本，这个脚本运行之后： 1、读取interview_questions目录下的md文件里的二级标题 2、帮我一行一行读取二级标题名，分别发送给deepseek，让deepseek来撰写标题名的答案 3、把deepseek返回的答案保存到对应文件的二级标题下 4、deepseek的调用方式参考：https://api-docs.deepseek.com/

import os
import markdown
from bs4 import BeautifulSoup
import time

# 初始化 DeepSeek 客户端
client = OpenAI(
    api_key='sk-xxxxxxx',  # 替换为您的 DeepSeek API key
    base_url="https://api.deepseek.com"  # DeepSeek API 的基础URL
)

def get_h2_titles(markdown_content):
    """解析 Markdown 文件并提取所有二级标题"""
    html = markdown.markdown(markdown_content)
    soup = BeautifulSoup(html, 'html.parser')
    h2_tags = soup.find_all('h2')
    return [h2.text for h2 in h2_tags]

def get_ai_response(title):
    """调用 DeepSeek API 获取答案"""
    try:
        response = client.chat.completions.create(
            model="deepseek-chat",  # 使用 DeepSeek 的模型
            messages=[
                {"role": "system", "content": "你是一个面试专家，请详细回答以下面试问题。"},
                {"role": "user", "content": f"请回答这个面试问题：{title}"}
            ],
            stream=False
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"调用 DeepSeek API 时出错: {e}")
        return None

def process_markdown_file(file_path):
    """处理单个 Markdown 文件"""
    print(f"开始处理文件: {file_path}")

    try:
        # 检查文件是否存在
        if not os.path.exists(file_path):
            print(f"错误：文件不存在 {file_path}")
            return

        # 检查文件权限
        if not os.access(file_path, os.R_OK | os.W_OK):
            print(f"错误：没有文件的读写权限 {file_path}")
            return

        # 读取文件内容
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        print(f"成功读取文件，内容长度: {len(content)}")
        print(f"文件内容前100个字符: {content[:100]}")  # 打印文件开头部分

        # 获取所有二级标题
        titles = get_h2_titles(content)
        print(f"找到的二级标题: {titles}")

        content_modified = False  # 添加标志来追踪内容是否被修改

        # 处理每个标题
        for title in titles:
            print(f"\n正在处理标题: {title}")

            # 获取 AI 回答
            answer = get_ai_response(title)
            if answer is None:
                print(f"获取答案失败，跳过标题: {title}")
                continue

            print(f"获取到答案，长度: {len(answer)}")
            print(f"答案前100个字符: {answer[:100]}")  # 打印答案开头部分

            # 在标题后添加答案
            title_marker = f"## {title}"
            answer_text = f"\n\n{answer}\n"

            # 检查标题是否存在于内容中
            if title_marker not in content:
                print(f"警告：在文件中找不到标题 '{title_marker}'")
                continue

            # 更新内容
            new_content = content.replace(title_marker, f"{title_marker}{answer_text}")

            # 检查内容是否发生变化
            if new_content == content:
                print("警告：内容没有发生变化")
            else:
                content = new_content
                content_modified = True
                print("内容已更新")

            # 添加延迟以避免 API 限制
            time.sleep(1)

        # 只有在内容被修改时才保存文件
        if content_modified:
            try:

                # 保存更新后的内容
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(content)
                print(f"文件已成功保存: {file_path}")
                print(f"保存的内容长度: {len(content)}")
            except Exception as e:
                print(f"保存文件时出错: {e}")
        else:
            print("内容未发生变化，不需要保存文件")

    except Exception as e:
        print(f"处理文件时发生错误: {e}")

def main():
    # 指定目录路径
    directory = "interview_questions"

    # 确保目录存在
    if not os.path.exists(directory):
        print(f"目录 {directory} 不存在")
        return

    # 处理目录中的所有 md 文件
    md_files = [f for f in os.listdir(directory) if f.endswith(".md")]
    print(f"找到的 MD 文件: {md_files}")

    for filename in md_files:
        file_path = os.path.join(directory, filename)
        print(f"\n开始处理文件: {filename}")
        process_markdown_file(file_path)
        print(f"文件 {filename} 处理完成")

if __name__ == "__main__":
    main()