From 9d8e223df8ccdd9adc8df59331a883dc8b430ccd Mon Sep 17 00:00:00 2001 From: aaron <> Date: Fri, 15 May 2026 11:52:50 +0800 Subject: [PATCH] 1 --- .../app/__pycache__/config.cpython-313.pyc | Bin 6883 -> 7689 bytes backend/app/config.py | 9 +- .../tushare_client.cpython-313.pyc | Bin 15936 -> 18405 bytes backend/app/data/tushare_client.py | 9 +- .../db/__pycache__/database.cpython-313.pyc | Bin 6815 -> 7969 bytes .../app/db/__pycache__/tables.cpython-313.pyc | Bin 7583 -> 10447 bytes backend/app/news/collector.py | 239 +++++++++++++++++- backend/requirements.txt | 1 + 8 files changed, 253 insertions(+), 5 deletions(-) diff --git a/backend/app/__pycache__/config.cpython-313.pyc b/backend/app/__pycache__/config.cpython-313.pyc index 987cbf32c6a90052e66c3c3ce666b6a8fc582d8c..46d4202c7859c965522e05327acb46c3e64bac1e 100644 GIT binary patch delta 2442 zcmZ{k+fx%)9LL$PL6nP8xf{YY2>}H);x(XvB7%w-Z)obeBnydbvhdqQ3f0EZX~#~d z4xOFp(5L#)r@pnc|3qin((;nYOxuS(ba?F4na=d7=l5j+ow0fN=6ufYdVXhj|LFcG zQvP>Y*{%Zd8-G^xV7U70@-j(!WqYMuwvwu2J7Y!B`k=5ztEmR$12s@>Y_G62f|{rf z)C_8&{h(HmpS+*|C`k3l(&ms46arOJ1E?L;k)=ii9snMsCQv7+i<&`)K!>RXbOaQp z)>w&X?I@_2{Ge`-ivlsXupI*(ryvTR0G*^Z&?zS_8$#e|C$JrX5l|0xfO89H^8IfzE>l=`iR5NTMTI;1JM7VHCUw8m6P55zr;-23-bS zp<{>}1?{8bn9NmBF`Yo8V;~ouMBFt{Ih{gW6m*?V$Fh9)H-I-O0@quh+tdRZ2i>7w z&|OfB&Vb$l4bWL6y$4dL54QUtmHI&w4h?|fpah+ZxwE3Y2CSv?NHYlv(;!YU1xnHd z*ixV%4S^njTyzmhr$IUm!M(612 z)WgvH%9y-cl8S8AT^YV3-jsYrT+Jj>3Dwe+8N<*OJ(W&bd5;>mQbtCd&^2eLWL~x6 z$(3K-?Nau7zB;4Ln@Zf!bq$V+2H!-!0tRc&Osb^)&#r0AQCu_EC9dy%Bfv2!Ga4y0 zmmwFs)V$%6P_rq0L7AO1EHz*IhDp^mvJ{QTAT#fQiA+;*k}kGus%fQ-jJBYV==@RA zvS!RvU0n>FTei-)k?wM$=e3E=Xy;_=w3*DlcC_`1?GD+-Cet$F)6UrTql--ofX@b( z^%{=dIKreRrmN_NF+HJ*;G`IlncoO@Onfhxi`j$utsmj&x{dr9RbN`d`<`=8=pq%r&ZonLl(MJlRi@v$_pG_o|Yw6L_Y_*nw> zC%c;MZ+H2!-HgXrPOzM0In5Gb>0#+*Im2?6rH`ebWq{=z%XyYTmLZmlEW<1#ESFiX zu#B>dv0P(`7KkUDZt(IZ%Pp4MEaNQvaMN9um>nqzN%!r^l1`VW(EhCCMVt4@=u&h| z_U0O!SLK@JmNmIK=dE9rE0*GGa(%A4=E=y?zBReVzE%1->kqBU`XYg>nKkZM1+;6L%FE%Zjv{Gq}IKt#lm6)K=yvH&U>VjgCn$Q&f zZqf&0`*S;7bx>S|T6NF0XRFBOEbY~rit)D{k}rw2PKr$V9i+|ZYFrbYeaPnRE9JxI z-KcMh!SM`muRXQR?vDk3VJB)ACEfnLw!bIIp?ul656p!$M(&o_o9?slQ?PNc;z-&^ i%;{P$E!p?#8up|lsZV4n!O+FS2Tb}Ny@V3Q>#yTW+EGOl)EJkNg>b=jNr-}8Gu&vVXsF#5RP z{nX_;W>Me7kALX9jo-UnLiEUh*eeCeo2*Sb5+~b~HxVKqC=7~_AJhVh((B3BRAehC zMkhe0Ky4HNwS(dm1a*Kq=_IP0u2Klp1!^i$7}yOwLlGqQfX-42=p3k*qM$xdKed7e zKvs$+?P_a-p!0MJGz1!^HqZ!Yl-kj64CJ9w9A(CVPU=Y3sWKBFD|I4q66B`SNSp#q zQx|9kbb-1Nc@cDp&VXh?Z%_~DP0(9(7W6jgGMy`-%pC9v^};z1x=MYZYgOt8Er1d< z09pivXb@H20bQr_7~%%#T^fRK3Dil$pqn5ojU>~2Z0`Y+Gz#bYRp%I5ELUk9l&aDM z=mXGonnc4iXeE(JWV@|2mC$HN!)RR8G(9=DveHf&Y8*=`EFB$ZQg6s)ietIb>BYq;uuGqeGOh z)vgHpzE8Pi`PhB2C`amk5i`NTY@xuT(wsa&7PKfPDc`KR%trgt2%rC2_ejoV+=t>QFHS~c~&Mx0>??m#Iy?r*k# zsMZhUlD|vcz-RttYw4)wf6hslYEu=_MNSpH7_WU<6I>M?%|%?e@(O7ixpYQT``S|c z!l)IySH={R&O2{9$^e|%`c;>niPQs4N+ X@IB{&!zuc=TMsQxAs+@OY+Um~`&0^t_(lb?(o;?<4+cow?gd ze!Khj?c29+-`joPrCn3KNO>zJMoy7ZyGQ)w>4H9GG8*m6-m}Oep;gIL24#_^P!?H< z09YuBRyV6Ns1m+I(FqoLbD|}>IjLA=QKV2$is)Hw0#i)iXCuv~s7?i83%}NPv*)?c z`F-INM>=JM5Ioa6bzpzE|CPJFv%SHA^PS?Ev6I7m^W|sA49CnuTopajxpfKmqbpGducG|hD-Q{$9!wLREQ4&Il{zszTL`duZ zt9U&^!+5oH8_M*5DBU7NnRvcpfjFJ0NgJ?7@eDHgZz*0Bh|7VJi;u?6&sYe}MF8^w ziUGiytk_|9Y<9NLr8pd0g_ihp;tay*RWPgqScx0dtC1DIt~R4={FT~-s{6AOjb6Lg z<)CwcV<{)a>uGl}UQ^V}EErgYmnN)6wf>%jHNwI=LJ9welyEGx(tu$OwEj(KCJAi^ zf)Jnypc%&`zl=8Gw~{|Y6?l5y)A+XLjJN`ZtoZE|%`9j#;2S<4tp;)f0X7f&v|4;S zMTK@^xi$|~<1(#*pJu~0?IN_z|B`k;%H2koO#)g8a}@2a4wu&k>f2iAE<$=xF0Rwf zNBi+!U7`5rWRQLukLoJWUVK}(ifpbbtpugP=4dBC4S}5sm%GKe!`5PVwRf56HMo00 zGoDUgP`e*EECA~OtOO!TMBg`MSVZ4vf8d0PjEI1bl$q_~s^VGrdA$xD#i#XUX5MPR z419DoKor0(0>fhdSU3f0focQbR#}Nt49ifDf1}|Ns;nnuR^;v4;(S0mnRhc|<1{4t z_Q6!}jR(yHGw0#T>?E9$CC9rnRp?dRmsym0Pm>LV#U$i4;e^=mSDDr5CBHFiJ4)po z1nvXghH_Z)0ShL~t}Z+XB4*BwQ;OrDUm)Zk+lP>dJ5*+C@iDy$jo{JQWpO_?Qh&A< z_2AU(CFm3$DSgEMdiF3{z!~ZOv*A?faCU5R(qwHeDuCy~?Q_SmB~ye8=1TBMlM0RD z3#L5PASgVuXHKvU|J_uH{QlCpK7_X6(VR5ZLtHMDQ;t5t8My^<_slm$L=oq+=RS%) z^1q$ifV7<3y#!>=?PQGC<9N!}(Hil&o%ttnz+%G@i*8LyotA>_T^x%vxh&=r4eVGlB# z10fK2^sfOPvg?Y1Sy(;KfWF2h^X>-QQ}b4$N&ma^HldvRqc)m5dD|vCL++u6$f_zX zMt{cJit{2tJ6v3e!v1fHx6g!YD=5K-D{r^^oNYs9Cyava=j{O8=I#|@8gD0ae@xMz;wQ=zPy;9BP`M6C@SEiYF`U25 zIGGIx%a=~KWTI7^MWs9ic%V9 zPo$`(xQNJl6^>6zRc28T%6zF$z!yhP9*4Z=2l7bWr0dt5)00qBFr&xQxa zrY^n}dgsg^eS78d$bj3qozVw--k9or{l@6wsUwF%@BV74_d<}%UI^vJsPEhN4^5ps z7kc*%lI4Uh9-Vrz`4hG1P>e! z_M8qKJ`()Ro|~s%AWMhNyumLwH98tPwTI+LB*VHf`kuZ>AAI}O0zS1O)8zB`K9^j9 z3NH=?M^1(=o(MnJeF#)NdHCkh(_!B)0G~S?I&&=S8w>rmkFbXKT%6iR7(i!!=;qj| zn`0xv^9Mq{f$+Y=;n!XUBkYWV-IH669KlYhvC-IJY%#8^+Zk_c%rkc68C&$mm1bkD z*=R9|SRq3@W**&c!cTuez_p&Zb|)NX#GL%bLuW1o&+X+E(DMPe*vOdmMut_8_=TLz z21|Dm2l+py?i85yCKWA)Wh)7E(^_bP=kTeA2!zqx?V1b3Jb-+F zrr09N*xQ}t%5~DThi2FWn~UMpvu*A4c2FtI-8xM#lqT{bd>n7Bi1GUqlcv;4T zBJR3EcU7SqSEO_8)T@fraYfohqGn)y-}-?KeH$hcQYN&<>)QOQ+WbJl(remf6B@&H z&D^V+xq+ODYnmk!N!n={m6SP6MI|W@bXQL+sJNtqyN8larJhQioTVAq)xQhx4#XKJ z74f~Ro?msuby_{H$m5i_GZ~wFixSB)CKamQx`TBA?TSm=#uaNPV`p8DEh4{hbKT{w z*Uf9Mn%7RoWesf%lr{z)e+BK$~QWms+&eq4-5=VNmeLA=pCM-Pvc39+cogu>tk zq}st%G&hotUols9bAENtg~|y7@qRDO3SDk5_k6P=c#Zg6dn*~HcLO{N&S-~RsGF8ROG>H9`9Mofkx}3xCnQ(&rXEba9Rodz01dY|4KB$FojV+~ z5igWzy5Z^Gh>lTB#N+s4r6Lz~2WkTCxMD@c$b!f>mb3{nW3H(0en>`UWRbM8&T&^1 z;Q|5hsdNh>vjSyOdh&`|DK)i{XyS?QUMGOS*+w0U(o?nM3o)87v{9wPRz!0jH(@er z60(B)LRRvWvunGDZeb-IPKL3!IT;YH*Yl}7D_hj&bUSx!p)1(zhlOzdqIQqN-p*9y z-(@r-a+fh+mpfq+Zvkb^=S8cB+6J?~j5UzfKe%N0nfL@ma?dHk#R~22E;hwhx1zyf zTWejjuFAHezJ5);ZS{)AhBfueEhMr1hVaqf65tYBeI@^$@q3^Glo8`Fu(LZpYEl{h z=#k79Hv1l#j8={-YrZI2eN9<&ExINks^Lz{l*~$Owib`WW~1S=l$Er5+S*+1HrfR8 zG(Zw>vAbLB44sE9iyKS620C}w-GUZ(+RDf^=ioksx%-e6ZSr{9IgRKr{@vna`M(H1 z(T!qS@n05~mQLuJ0#wRv@kSBK_>SUmsf<#k<8+^LS}Z`dXwWrHL4A7p_gu|s{9g&< Brj!5x delta 1989 zcmZ{ldu&s66vuno?(RKj_h`HBLEEvmTWjmawo=9(5Gb&VUJml4t_{&TR*S%=^cu_nvcp z=XZYJd(Q3E*Wt|0qML<cZV%m-iMDpVl;%826&I9YL0SW2!w>@XR1bSu(Z86RgQWL}F~OpTDl!zLH6=UbCG9UZ_w zO>HnDyDRsCbA$&v)U1gAqzdA(ftVDeI)f3motqv?_e)h1?7^~XkFbOC@5Juvc9_6@ z)lWbVj=7BT_3BEfUPHbJp_^kzL2NJ*-5!jDV)3MpJ&7kDTaUXJw5-`f5zi8O34I*1 zh2+m!GOcBEbf!pJC~7&z`89h|!Mq3%mzcf2v{)2KF>D1Pi!jD9Eu{5VHt!*8fS~N) z$K&SZuwTAmJ_7~MaC1s4C5NJ1-OsHC{TmcVFZ&@)u31~*2-emN$;er z3*d0=N|=&At{sI+(r1Q-E3HqSv{`&mo%iZi!X*AtS8m)#`#p42Zc4fEhP51c;^_u-jR!o2RSBoQvTco^ra2 z?<@EYPsRJ|mUA_HA$Pc*2b0oOQ)E`UDp!;e;jO`eP3e~Ut6`G6rZbJ4JCGQRCO5?s zqij9}@sissT;}m&9X@R{qTOSFA?)%PeCsIx{VJ$q_#obsfKkV!azrV__=HFg^bUG@ z;G%rRa{$aAP!<|2c9Jm9kuSOR*xK|6o@&&=6}-@RZ=mipu7V$Bf73?r%$>}9WqN`e zLt-pEPd=*3jlt$B{CklOu3>JoYj#50nmge)c~|pjW-g;tQF<=Z3eKHN2b0K6QnGD? z`PAcfzS(Jy0Z+8*;U7HT>K0z3;Qf4lj9BKahg))|_f1%=%uv4aIpLB?BjVg4XJ6Br zM>#~`m>pz(o5{Y=rw0L-`rL&|Uu8NmfJt8msPPk@7r*MVLlm!f>7fb>mUV8mP(~-g zMWF9`N+U`vrHKtn%49#PcfxH_kg;KUk MSA#VZMas(m05Ci9%>V!Z diff --git a/backend/app/data/tushare_client.py b/backend/app/data/tushare_client.py index f2218e56..2f16a5c3 100644 --- a/backend/app/data/tushare_client.py +++ b/backend/app/data/tushare_client.py @@ -14,6 +14,7 @@ from app.data.cache import cache from app.db.error_logger import log_error_background logger = logging.getLogger(__name__) +_NEWS_QUOTA_ERROR_KEYWORDS = ("频率超限", "每分钟", "每小时", "每天", "权限") class TushareClient: @@ -300,8 +301,14 @@ class TushareClient: return result except Exception as e: logger.warning("Tushare 新闻请求失败 source=%s: %s", source, e) - log_error_background("tushare_news", f"Tushare 新闻请求失败 source={source}: {e}") + if settings.news_tushare_alert_errors and not _is_news_quota_error(e): + log_error_background("tushare_news", f"Tushare 新闻请求失败 source={source}: {e}") return pd.DataFrame() tushare_client = TushareClient() + + +def _is_news_quota_error(exc: Exception) -> bool: + message = str(exc) + return any(keyword in message for keyword in _NEWS_QUOTA_ERROR_KEYWORDS) diff --git a/backend/app/db/__pycache__/database.cpython-313.pyc b/backend/app/db/__pycache__/database.cpython-313.pyc index 0cd65a8260f49894657c898b224c3cd7f00511c9..5a4f4d2a9183d2f62702358cec24df7377f2535a 100644 GIT binary patch delta 1464 zcmbW0(Qn&C9LMi$*G{~qP17b>(~X)%QxlXnMeQhCveK+}OGKJ#Ya(?*!g7;K>&>w{ z+Zk&n8i}WBJWR+BfP{oLK!O)gRU~+WHt7p*qYs0K2mXYH@=zv(IHy%fOOX&Ko&3G; z?|z@V-<|KHOOM7o_FOI&;P#}@cI)f&>mARCfr)6=K)Ml?BwbUZhE~#ZNX%(@6qO89 z%iSK77LjU3H=Lw{fZuF*+Ri|Bj_wApUIIWRt@XA9BrTD?V0q~zWU!ic{^9`XZ*hNM zeM$Q}zXzZIu9E$r0I|1Vdn1pJd9OSVUiaABvKs>=5&#bLgLJYbFJb*oUxp5Ago(q1 z)o2fIe87y-aN4@=4p?s1bs>*(vLS1#XzEf9aT#Gg!|}pYBAd!^1NYw=C=XhRj^IYa z^_8Pio=&7P!VNByxRw$)1810MaRpbc7UV^POYl55Ih)GPq&aj~&Lb6{$Sp%tPl4Y2 z&!9p9?ZoCc=7O| zm2|}hANrl95jqG&I2;LE>%NaCDZLLri#mn>PPNYDG)2T`RCmg19^JF0LE$imo8D#5 zV`Nn}#r%REvTfXrQE0v6A9h}YXW6ys-6_JV`Um4l!XEEs2r9s?C3jiU<^9nYcu#p( z`Q*xH%1$u$G&u1jIPqQYPVluIU+Sql^@BUraN*gzJ=VT|c=!$h+|NB+eX#nIW4Mtb zh%V3mZ#3vRzXzbpTX*;DlKA_q;Q?JUaR2A-bJoP!Q04wX(!pOUsLx& z46XICe3+?SY&s!m>u^)!1%`;)5X;BfYA+LZ?iFb3STi~f@haPZ7W~XM4FzZWHq&lv rs+$o4)@bh+=j_Y*9O&Knb8u~rcu~uj6f~}nSbz4u@01|yYKH#;Om(ga delta 536 zcmYk2O=}ZT6o&7ak9%h(leDQqYbGC~A{CkF7&SIF7UTxRMcbu|LQ73s45SI2s8Esk zfs0avEbhjgRPDlzAQ3_ACb|9rm*x)$t|}^sbm6(_#si1@ocBHFUEH0S7nc%0<8g;z zE$)4<6_343{M8`UJ*Qh5q`OmQ48%uz4XEhQ6C%S-*}E2*dx||THrV)u4k2sgs-7cj zfLd^zER3lvRg5g_g!m*!=8FIAoE4u~37GiJ%6jSk4e`aE>@HXrDdffD(UaYS#Cyvf zIIWHPa=m(cb$wI3PQALUM;;~)jB2yd7_e%yzJv=cj3>VDM@i+eTBCMnY2{WlsZ{LN zHzKyZ(7H0=Y+c{K2SR#zYZGwXuV^AjhZD0J8O?Uu+wHGb_~QX+$+17bC^?!D&(fzK z-aIrr?XC7fYY+3DtA@eq^KIRRHj^K0nWmb8;xpKF@IujMdqwI5g J9w0fi{{i#Rg4+N9 diff --git a/backend/app/db/__pycache__/tables.cpython-313.pyc b/backend/app/db/__pycache__/tables.cpython-313.pyc index a7eda3d2fc9c8b15ae1944387ad777a2849d1e84..126a9b0fdfc0d8a589315b82631f9e0ff7a2c280 100644 GIT binary patch literal 10447 zcmbVR>318~bqC1-v63JuuA-=gq83`VXvv}^YK?7qqn9Km)wng&=@1xzV*)eqETZT% zt(z`!lBQ*vx<%6^NVgze9M*1YxBVxG!!OKy%W0hxe}l`@ldt{VH*Z!7KCr1HaPIHj z_ugH8_uhfD-`AIp;pe~P_-oVWW3m4x!Tp)y5Bo0+#A0v9mSTErscWhG`L4xoy?e1o z?^zUdVKJ`97kl;I#e|+%OzO$Sl%85l>*?dMmG0@-JU>Futn^IB=3+}dU+7xN;IA+p zTM|~{OZ2CALEuPYC9#xTN-dBUX`~}^W+gG-?HtK9vMaq2r0)RIe*noHKn4yVgDbu8 zcF42kP(&U+uw&!^GJ3$?u>(5e2at&a$l(L$Od>Kz0@h7IJ{pjZMb?i;1c|NfJ z@_~q5SxK#=UmkbK?f>>!x^$JYeWmwuOz$JUE-kENqD*D-HQdH{%B1Td=?0{mWC>fl z1&QT}Nw-7N=O8hk+1lfN)|vc7M1FE5?lPUN--)b0<$KK5pN{B16Oo^d=sy>c7bE9C zACX^x^?nRGjPCrX6L-Cz#rMTF@paCQFTJ!NFke0&vHc4XIU1WUM%G`B$V(AlmLqbf z-ekEqIp)pXHs1VFvOjmRqz`O6XcS5eulkFiLKD@kO{W#_Mj zBq_ws)sU2jRPgq78lNE^p8*yhxlMeEEFONgOA&dE$(Rbv0|nA+o+az>Anr+JtohZ} zp_S~^N@v|+KEpj4WFDz)JaRm6E!INzSDEzslFqqptfT^UWQH=*p$GFRZq*R8%4%bk zV}~E-d!cBBBe?=5(rD{Z{<-W}N4-kSrH zejT2E)$`^zLiy;>;`tO$Ec_jJIx_3Snd#8p z&lKSi=G*Uf*YAt6{#7{WZ_xKMUFTJxJ`WiAC|xJL`c=x6>HAwL8wd`-?W-{bk7hsCR!ATK%lv zb!xHO;U}2I-*01aJ!BJ2#~*kxrd;_vAm4oHuaP-_8vjj5`deuF*$C(6??R_^=nKo# z->;M#yoXFJ0D{`WS1Bj$lT(>E1A>-0^^XlP}7qbVo8JJH9NfmSwy5Nv&*ER6FsUYRV;9x8qNj zHOaJvW%<5oC!Wx>vMi}~5_{xjr6Sux(NgnvPM7mqr6Q{Z$y7Aeu)CGQ@@%)Asp?8a z(l^C*dDHHbEK^g|ye>l#hna?$*9!8}vt4#lwaR5?fS^h^wbzjIriR;%yr#?cAkJ8q zP0^GqRndX6js}mAnTWGOTK~yeSssili1CEnJ%@$VJI2oAwaw z%*&#bC+qnk{Z-9S0&?!2RJNFVT%p%cN~%=0Gw=p3vLo!QDd{Dc>PRWW)T&}xGYmVS z%aTE%N$HBQE~=U-+c7)CudJ3O)#^o3e*VL+efZ<=+k>XAln{SJNG&ic>`_I91&R~l zAZ}RDG)%c5dKOpnrah?3_Y`?Ul!_+KDM*{fM?JW}9w;kCIlq}N%c5aQrUi@g2nro6 z+x_^I#G)=6YpM*Zv*mI{l;Gv2p%`}3Fm-5^Htl{>N3z9LIJK@>xXg)x6A{tetb!S; zrpk8KSumA~qL%Cw9mf**lAQr(q;gp_rIPVR_p4u~B*;aj%&e93ih)$v13u=&&(0RL zyk)>uMKulB%QZzoSFtO0*S1Lo5SY48m;7evWK#`_IfzenNViO+BvtwG0j z1!AQqFg~VNHAycxe#Ol-5ZF$#U!qd5)6Nr9>~o%xCkY1FX(U3-uSseNp>;W`sKv6j zVfU926H~OR$fbpPNRthmT5)6sTXK{&O0}t|mWE7aD{^I(v4VE=qZSJYEApu2*O5oM zjU@&&v~*P?Po;Y$d!QmIs#w%?a2yF#G}x?51sSYB&W8fkC1csr<=Jk1khs!osmkjX zob|0NXd5cG7)&A$0!X?d-G?>A3Olo^YwNN~7b2Qey8Li^EZi+v>eh)Kplq;>IKf3; z*BwLq@|F(Txy98_?iO{eA~KHIDY~539oWVn`w7)$iib#LfmqxZ3%azy%qC=lMcF`S zLUcj~ASdw8sc5^8p8a~x=26zPfvTFw%!-YbbuX{0UMlW;vK_a;I#dt+Vl0WH4Jl>2 zuPPbFh6ZA-NyeHj=xDCCKw5T6F<3Xk?g7-jV#sO>{*aD+*qH5`A3rL|ZmlSjfo#$3RObJfQ_p1u3osOcL3wAbeondEiQeIc8bgrGG z&_T)gsEh3-eV4p$m91{tIYvjZphzVZH&c;1kMOBNr4{G-@?blMqjYgYGV^O?7>RDl z02CLx2+@Zvi6vdLs&+zKg*GCS&TEECh9Pe=Z&$cC>tj@=sH1$@Vojk}D%?ZEw-bg{ zq4{L@6)}MPMweqf3dt?XARH9?+#cw7kTUXXa=|K7GGtxXboBU=fw84os4+WUmhZ`B z_(@B4qJmaL<=k7qv_;+r+%`(3T~x3!;~_9Kfn+i(GtRR38ZoRFlgq-i>2unNOT0et zu0SR_TK#en&7D}sD#PxtqCTqVLHwdLmuVg+0GddJJ?7ocFLbVeS^O^6&l6R2e5%sy zDta3+&M{KOjB$ljK`9owy}UBg5+<+WkTnfmLj;*Ch8;H*6Ljyf^s=3;TC2!ALP|VL z6%g7gdu7Og(KLHZY6TgVgEzR6yJ8?Bt`eU+xP*-=SuL4scFLb-M1i6cF4TjxffowA z)ud@Cacgr<|8`HLX)Pm}5&}GX)J}+Kc6m{>)5hy%sSFNOHgy`Rxi`N~xI46mE=DD zz~>gnE}CUkE*2-M@(8=^s~Qz8ABztwQ~7 zQ#jX35R@e7^46vLsittYl_Drj(ABNwUEy#mLqL{*D_c(y&__T&fSt_IdVE(n-O3R% zKq|Lt^Si={)*t~x1l+D+&vfe$0mE$1k(#_K%(O-b8D+@v`l;=~rtny6jIeQr9jlLQ zcQu8JtqH;oGiTNmY8P6M z5O$tnC+o}G7n{Q6)*NAva_sK*>}Tra6<4HVV4*-RWlo}Hid=OJYkm^Hd$M4 z;Jov#D}-GI28S*+hMK~|tp$Rv5p-?K+&MK*S6nCT2HQ&J6`R6*>n35hLR)VW^tmp= z5RSS@$35Of;1diacZH_#Nb5-k-XU-&bF4nx&?!exG4yE%C9_{{3Xis)Vd%5$05Woz z?0$}ci;gOJbBR=+XXp#ADrM|N2ED|f8@2K6nfK4Wd-lDDn!?Th{X8x0#6~_|?uv~Z z-V+A5uKpxMb&|;Bk)|*geZEl}u5WN0ajL`Av!p{foOy_zZ`bZLP%>=qjoQera17{9 z#^E1-{L-#)%Gt-)Q66`8g$cHNq*g**(DJR?6>ygCK|M_qc)cd<3RBK9)eBpms#)HV z!;LtCMG9fp_s@-v!1F;c>BxyaVZ6pn_iNnQf!*7Mrf{94Yg-$R`8a9Jy=t4K^ z)v$iX*~iy;%@F2PkFy#~!TwnG2<#X~8J;-3Crs7ulTOGEs)xj`7hBIEry)Q+5Kmr% zbcR!n;bpRt$7UyUx{>$*&4M`xml|kVkJ855wPzaWX^+vfTN3n4Jm^NRqi1xq`c-h7 zo+E)hIdlRJO`+g(!+S#Jtyf4V(sm z*Ak5Pz{v}zGDsmb##86YiAl4 zxIUE`h7aqv6u22qk@gAXal>rfpX_(K@bUI!33k08H-pc?yXLoM>gfM8f-x7a;?3^{;S9I4m~M5a z(Sxm|_?_lc<=U22m%OPGuY^HXzi`)!aV?cQw;l;@%DH*JeEwLQQl5I(ml*M?FWXrPb7~(k20*Ob4q8PSdCj`Q#2nd@+ zSk105Rn**=N>P-!33Px1S1xeCQ@T)`;L52*E*yEUHH&AeC|u=||L=A8|G(EQ|9bJu z(!ibm{yq!*&XZo9?oP<5rcZvaF>r6)Z?HoY5VlFekHAsrF{a^KDfow>9g84>B~&3Z|w@G zC$*EWLL!Hzo`RbG)|jgYZiwB$)A*X%#Joy|A(r79W?1iesvgf69G_+CIqrGoE??k1 z`(8rTFcZB~kK*EIF4xgzyLYbMY0Z$=*vpH&gL_Hq>}62;tzd#lt#$)yy85rK)prtL zd0xRh!Y}UV=&O+7HSMg|4V~-VxRq{#Z1)?u+qxqx_GOE8Ll19)x~ZM^7Vq4b%-G-m zWx6x^E2RhWhd+(s{;Je@y)12NDYLMl9LzZ2G`T(-%6Z{ZrA8EVwK7C4`2bufF z=wAE;*6Qw`g8EF`*)Y^@fik^6{QsF(r#F0MGW&V^hVKts`&vLEwrsfl)5v{R&o`Dk z==*zE_&JjBHsMbz{9gBYgnuIGdV0k68wpW6eh$ak=bxl+H`f@Jwth&;WSKgk)OA`M z-rZtjOWKslQR+hEMpS6zq?AlD)PtxG(R`!QA&00J!2twwja3Bw2nGP`d1I2JLk`oy z^vJBO>%%omV!lJh=@72*BUlz;(*zA52(nF4al1p3G?W(PXaI9}!;HtJP|GTl89IXO z5MvQ>vqKUzf+V_s_b8GW5H)5T!5D(FnCg%*I*wohfs!eKAc>$PLhKk#0jPHmBY}x@ zNQ9gk{t8II>B`5>ll#Ba;HX3)z&y zHd`ymrjgAkdtI%ex=d#2EW#XvqhhW3kW7j+kF3C0M65L7z)3oXY#tc2lxzBBa*P%c zEg)KG)b>(&^e7=)WLq)cWtrq@8QGFy>oTJH2`fUFh*ZP#R;)a`DwLMFyyU`BDJx;f5@m0TuxBf367w1I) diff --git a/backend/app/news/collector.py b/backend/app/news/collector.py index 1c2f80a9..611558a5 100644 --- a/backend/app/news/collector.py +++ b/backend/app/news/collector.py @@ -7,20 +7,29 @@ import html import logging import re import xml.etree.ElementTree as ET -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta +from importlib import import_module import httpx +import pandas as pd +from sqlalchemy import text from app.config import settings from app.data.tushare_client import tushare_client +from app.db.database import get_db from app.news.models import NewsItem logger = logging.getLogger(__name__) _tushare_source_cursor = 0 +_tushare_calls_by_day: dict[str, int] = {} RSS_HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", } +WEB_HEADERS = { + "Referer": "https://www.eastmoney.com/", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", +} async def collect_news_sources( @@ -38,6 +47,15 @@ async def collect_news_sources( except Exception as e: logger.warning("Tushare 新闻源采集失败 source=%s error=%s", source, e) + web_sources = _parse_named_url_sources(settings.news_web_sources) + if web_sources: + async with httpx.AsyncClient(headers=WEB_HEADERS, timeout=10.0, follow_redirects=True) as client: + for name, url in web_sources: + try: + items.extend(await _collect_web_page(client, name, url, lookback_hours, limit_per_source)) + except Exception as e: + logger.warning("网页新闻源采集失败 source=%s url=%s error=%s", name, url, e) + rss_sources = _parse_rss_sources(settings.news_rss_sources) if rss_sources: async with httpx.AsyncClient(headers=RSS_HEADERS, timeout=10.0, follow_redirects=True) as client: @@ -47,6 +65,12 @@ async def collect_news_sources( except Exception as e: logger.warning("RSS 新闻源采集失败 source=%s url=%s error=%s", name, url, e) + if settings.news_akshare_enabled: + try: + items.extend(await _collect_akshare_stock_news(lookback_hours, limit_per_source)) + except Exception as e: + logger.warning("AKShare 个股新闻采集失败: %s", e) + return _dedup_in_memory(items) @@ -77,6 +101,23 @@ async def _collect_tushare_news(source: str, lookback_hours: int, limit: int) -> return items[:limit] +async def _collect_web_page( + client: httpx.AsyncClient, + source: str, + url: str, + lookback_hours: int, + limit: int, +) -> list[NewsItem]: + resp = await client.get(url) + resp.raise_for_status() + text = resp.text + + if source == "eastmoney_roll": + return _parse_eastmoney_roll(text, limit) + + return _parse_generic_links(text, source=source, limit=limit) + + async def _collect_rss( client: httpx.AsyncClient, source: str, @@ -112,11 +153,124 @@ async def _collect_rss( return items +async def _collect_akshare_stock_news(lookback_hours: int, limit_per_source: int) -> list[NewsItem]: + """采集推荐池/关注池中标的的东方财富个股新闻。 + + AKShare 是可选增强依赖:安装时启用,未安装时安静跳过,避免影响主服务。 + """ + ak = _load_akshare() + if ak is None: + logger.info("未安装 AKShare,跳过个股新闻补充源") + return [] + + symbols = await _load_focus_stock_symbols(limit=settings.news_akshare_stock_limit) + if not symbols: + return [] + + items: list[NewsItem] = [] + per_stock = max(1, min(settings.news_akshare_news_per_stock, limit_per_source)) + cutoff = datetime.now() - timedelta(hours=lookback_hours) + for ts_code, name in symbols: + try: + df = await _call_akshare_stock_news(ak, _strip_market(ts_code)) + except Exception as e: + logger.debug("AKShare 个股新闻失败 ts_code=%s error=%s", ts_code, e) + continue + items.extend(_parse_akshare_stock_news(df, ts_code=ts_code, name=name, cutoff=cutoff, limit=per_stock)) + return items + + +def _load_akshare(): + try: + return import_module("akshare") + except Exception: + return None + + +async def _call_akshare_stock_news(ak, symbol: str) -> pd.DataFrame: + import asyncio + + return await asyncio.to_thread(ak.stock_news_em, symbol=symbol) + + +async def _load_focus_stock_symbols(limit: int) -> list[tuple[str, str]]: + async with get_db() as db: + result = await db.execute( + text( + "SELECT ts_code, name FROM recommendations " + "WHERE action_plan IN ('可操作', '重点关注', '观察') " + "ORDER BY created_at DESC, score DESC LIMIT :limit" + ), + {"limit": limit}, + ) + rows = result.fetchall() + + seen: set[str] = set() + symbols: list[tuple[str, str]] = [] + for row in rows: + ts_code = str(row._mapping["ts_code"] or "") + if not ts_code or ts_code in seen: + continue + seen.add(ts_code) + symbols.append((ts_code, str(row._mapping["name"] or ts_code))) + return symbols + + +def _parse_akshare_stock_news( + df: pd.DataFrame, + ts_code: str, + name: str, + cutoff: datetime, + limit: int, +) -> list[NewsItem]: + if df is None or df.empty: + return [] + + items: list[NewsItem] = [] + for _, row in df.head(max(limit * 3, limit)).iterrows(): + title = _clean_text(_pick_row_value(row, "新闻标题", "title", "标题")) + if not _is_useful_title(title): + continue + published_at = _parse_datetime(_pick_row_value(row, "发布时间", "datetime", "time", "日期")) + if published_at and published_at < cutoff: + continue + content = _clean_text(_pick_row_value(row, "新闻内容", "content", "内容", "摘要")) + source = _clean_text(_pick_row_value(row, "文章来源", "source", "来源")) or "东方财富" + url = str(_pick_row_value(row, "新闻链接", "url", "链接") or "") + summary = content[:240] if content else title + items.append(NewsItem( + title=f"{name}: {title}" if name and name not in title else title, + content=content or title, + summary=summary, + source=f"akshare:{source}", + url=url, + published_at=published_at, + )) + if len(items) >= limit: + break + return items + + +def _pick_row_value(row, *keys: str): + for key in keys: + try: + value = row.get(key) + except Exception: + value = None + if value is not None and str(value).strip() and str(value).lower() != "nan": + return value + return "" + + def _split_csv(value: str) -> list[str]: return [item.strip() for item in (value or "").split(",") if item.strip()] def _parse_rss_sources(value: str) -> list[tuple[str, str]]: + return _parse_named_url_sources(value) + + +def _parse_named_url_sources(value: str) -> list[tuple[str, str]]: result: list[tuple[str, str]] = [] for chunk in _split_csv(value): if "|" not in chunk: @@ -130,21 +284,100 @@ def _parse_rss_sources(value: str) -> list[tuple[str, str]]: def _select_tushare_sources_for_run() -> list[str]: - """Tushare news 免费/低权限账号通常限制 1 次/分钟,每轮只取少量源。""" + """Tushare news 默认关闭,仅在显式配置时少量使用。""" global _tushare_source_cursor + if not settings.news_tushare_enabled: + return [] + sources = _split_csv(settings.news_tushare_sources) if not sources: return [] - limit = max(1, min(int(settings.news_tushare_sources_per_run or 1), len(sources))) + remaining_quota = _remaining_tushare_quota() + if remaining_quota <= 0: + logger.info("Tushare 新闻日额度已用尽,跳过本轮采集") + return [] + + limit = max(1, min(int(settings.news_tushare_sources_per_run or 1), len(sources), remaining_quota)) selected: list[str] = [] for offset in range(limit): selected.append(sources[(_tushare_source_cursor + offset) % len(sources)]) _tushare_source_cursor = (_tushare_source_cursor + limit) % len(sources) + _consume_tushare_quota(len(selected)) return selected +def _remaining_tushare_quota() -> int: + quota = int(settings.news_tushare_daily_quota or 0) + if quota <= 0: + return 0 + today = date.today().isoformat() + return max(0, quota - int(_tushare_calls_by_day.get(today, 0))) + + +def _consume_tushare_quota(count: int) -> None: + if count <= 0: + return + today = date.today().isoformat() + _tushare_calls_by_day[today] = int(_tushare_calls_by_day.get(today, 0)) + count + + +def _parse_eastmoney_roll(text: str, limit: int) -> list[NewsItem]: + result: list[NewsItem] = [] + pattern = re.compile( + r']+href=["\'](?Phttps?://(?:finance|stock|kuaixun)\.eastmoney\.com/[^"\']+)["\'][^>]*>(?P.*?)</a>', + re.I | re.S, + ) + for match in pattern.finditer(text or ""): + title = _clean_text(match.group("title")) + url = _clean_text(match.group("url")) + if not _is_useful_title(title): + continue + result.append(NewsItem( + title=title, + content=title, + summary=title, + source="web:eastmoney_roll", + url=url, + published_at=None, + )) + if len(result) >= limit: + break + return result + + +def _parse_generic_links(text: str, source: str, limit: int) -> list[NewsItem]: + result: list[NewsItem] = [] + pattern = re.compile(r'<a[^>]+href=["\'](?P<url>https?://[^"\']+)["\'][^>]*>(?P<title>.*?)</a>', re.I | re.S) + for match in pattern.finditer(text or ""): + title = _clean_text(match.group("title")) + if not _is_useful_title(title): + continue + result.append(NewsItem( + title=title, + content=title, + summary=title, + source=f"web:{source}", + url=_clean_text(match.group("url")), + published_at=None, + )) + if len(result) >= limit: + break + return result + + +def _is_useful_title(title: str) -> bool: + if len(title) < settings.news_min_title_length: + return False + return not any(token in title for token in ("广告", "下载APP", "扫一扫", "关于我们", "联系我们")) + + +def _strip_market(ts_code: str) -> str: + text = str(ts_code or "").strip() + return text.split(".", 1)[0] if "." in text else text + + def _xml_text(item: ET.Element, tag: str) -> str: node = item.find(tag) return node.text if node is not None and node.text else "" diff --git a/backend/requirements.txt b/backend/requirements.txt index e9e658bd..2f533f3c 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -6,6 +6,7 @@ sqlalchemy==2.0.36 aiosqlite==0.20.0 greenlet==3.3.2 tushare==1.4.20 +akshare==1.18.40 pandas==2.2.3 numpy==2.2.1 apscheduler==3.10.4